Blame - src/charset.c - android_external_vim

blob: 4560a96d1e78a1c7df187eb7ed8c8b5dc86aa311 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	#include "vim.h"
				11
				12	#ifdef FEAT_LINEBREAK
				13	static int win_chartabsize __ARGS((win_T wp, char_u p, colnr_T col));
				14	#endif
				15
				16	#ifdef FEAT_MBYTE
				17	static int win_nolbr_chartabsize __ARGS((win_T wp, char_u s, colnr_T col, int *headp));
				18	#endif
				19
				20	static int nr2hex __ARGS((int c));
				21
				22	static int chartab_initialized = FALSE;
				23
				24	/* b_chartab[] is an array of 32 bytes, each bit representing one of the
				25	* characters 0-255. */
				26	#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] \|= (1 << ((c) & 0x7))
				27	#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
				28	#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
				29
				30	/*
				31	* Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
				32	* characters for current buffer.
				33	*
				34	* Depends on the option settings 'iskeyword', 'isident', 'isfname',
				35	* 'isprint' and 'encoding'.
				36	*
				37	* The index in chartab[] depends on 'encoding':
				38	* - For non-multi-byte index with the byte (same as the character).
				39	* - For DBCS index with the first byte.
				40	* - For UTF-8 index with the character (when first byte is up to 0x80 it is
				41	* the same as the character, if the first byte is 0x80 and above it depends
				42	* on further bytes).
				43	*
				44	* The contents of chartab[]:
				45	* - The lower two bits, masked by CT_CELL_MASK, give the number of display
				46	* cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
				47	* - CT_PRINT_CHAR bit is set when the character is printable (no need to
				48	* translate the character before displaying it). Note that only DBCS
				49	* characters can have 2 display cells and still be printable.
				50	* - CT_FNAME_CHAR bit is set when the character can be in a file name.
				51	* - CT_ID_CHAR bit is set when the character can be in an identifier.
				52	*
				53	* Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
				54	* error, OK otherwise.
				55	*/
				56	int
				57	init_chartab()
				58	{
				59	return buf_init_chartab(curbuf, TRUE);
				60	}
				61
				62	int
				63	buf_init_chartab(buf, global)
				64	buf_T *buf;
				65	int global; /* FALSE: only set buf->b_chartab[] */
				66	{
				67	int c;
				68	int c2;
				69	char_u *p;
				70	int i;
				71	int tilde;
				72	int do_isalpha;
				73
				74	if (global)
				75	{
				76	/*
				77	* Set the default size for printable characters:
				78	* From <Space> to '~' is 1 (printable), others are 2 (not printable).
				79	* This also inits all 'isident' and 'isfname' flags to FALSE.
				80	*
				81	* EBCDIC: all chars below ' ' are not printable, all others are
				82	* printable.
				83	*/
				84	c = 0;
				85	while (c < ' ')
				86	chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
				87	#ifdef EBCDIC
				88	while (c < 255)
				89	#else
				90	while (c <= '~')
				91	#endif
				92	chartab[c++] = 1 + CT_PRINT_CHAR;
				93	#ifdef FEAT_FKMAP
				94	if (p_altkeymap)
				95	{
				96	while (c < YE)
				97	chartab[c++] = 1 + CT_PRINT_CHAR;
				98	}
				99	#endif
				100	while (c < 256)
				101	{
				102	#ifdef FEAT_MBYTE
				103	/* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
				104	if (enc_utf8 && c >= 0xa0)
				105	chartab[c++] = CT_PRINT_CHAR + 1;
				106	/* euc-jp characters starting with 0x8e are single width */
				107	else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
				108	chartab[c++] = CT_PRINT_CHAR + 1;
				109	/* other double-byte chars can be printable AND double-width */
				110	else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
				111	chartab[c++] = CT_PRINT_CHAR + 2;
				112	else
				113	#endif
				114	/* the rest is unprintable by default */
				115	chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
				116	}
				117
				118	#ifdef FEAT_MBYTE
				119	/* Assume that every multi-byte char is a filename character. */
				120	for (c = 1; c < 256; ++c)
				121	if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
				122	\|\| (enc_dbcs == DBCS_JPNU && c == 0x8e)
				123	\|\| (enc_utf8 && c >= 0xa0))
				124	chartab[c] \|= CT_FNAME_CHAR;
				125	#endif
				126	}
				127
				128	/*
				129	* Init word char flags all to FALSE
				130	*/
				131	vim_memset(buf->b_chartab, 0, (size_t)32);
				132	#ifdef FEAT_MBYTE
				133	for (c = 0; c < 256; ++c)
				134	{
				135	/* double-byte characters are probably word characters */
				136	if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
				137	SET_CHARTAB(buf, c);
				138	}
				139	#endif
				140
				141	#ifdef FEAT_LISP
				142	/*
				143	* In lisp mode the '-' character is included in keywords.
				144	*/
				145	if (buf->b_p_lisp)
				146	SET_CHARTAB(buf, '-');
				147	#endif
				148
				149	/* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
				150	* options Each option is a list of characters, character numbers or
				151	* ranges, separated by commas, e.g.: "200-210,x,#-178,-"
				152	*/
				153	for (i = global ? 0 : 3; i <= 3; ++i)
				154	{
				155	if (i == 0)
				156	p = p_isi; /* first round: 'isident' */
				157	else if (i == 1)
				158	p = p_isp; /* second round: 'isprint' */
				159	else if (i == 2)
				160	p = p_isf; /* third round: 'isfname' */
				161	else /* i == 3 */
				162	p = buf->b_p_isk; /* fourth round: 'iskeyword' */
				163
				164	while (*p)
				165	{
				166	tilde = FALSE;
				167	do_isalpha = FALSE;
				168	if (*p == '^' && p[1] != NUL)
				169	{
				170	tilde = TRUE;
				171	++p;
				172	}
				173	if (VIM_ISDIGIT(*p))
				174	c = getdigits(&p);
				175	else
				176	c = *p++;
				177	c2 = -1;
				178	if (*p == '-' && p[1] != NUL)
				179	{
				180	++p;
				181	if (VIM_ISDIGIT(*p))
				182	c2 = getdigits(&p);
				183	else
				184	c2 = *p++;
				185	}
				186	if (c <= 0 \|\| (c2 < c && c2 != -1) \|\| c2 >= 256
				187	\|\| !(p == NUL \|\| p == ','))
				188	return FAIL;
				189
				190	if (c2 == -1) /* not a range */
				191	{
				192	/*
				193	* A single '@' (not "@-@"):
				194	* Decide on letters being ID/printable/keyword chars with
				195	* standard function isalpha(). This takes care of locale for
				196	* single-byte characters).
				197	*/
				198	if (c == '@')
				199	{
				200	do_isalpha = TRUE;
				201	c = 1;
				202	c2 = 255;
				203	}
				204	else
				205	c2 = c;
				206	}
				207	while (c <= c2)
				208	{
				209	if (!do_isalpha \|\| isalpha(c)
				210	#ifdef FEAT_FKMAP
				211	\|\| (p_altkeymap && (F_isalpha(c) \|\| F_isdigit(c)))
				212	#endif
				213	)
				214	{
				215	if (i == 0) /* (re)set ID flag */
				216	{
				217	if (tilde)
				218	chartab[c] &= ~CT_ID_CHAR;
				219	else
				220	chartab[c] \|= CT_ID_CHAR;
				221	}
				222	else if (i == 1) /* (re)set printable */
				223	{
				224	if ((c < ' '
				225	#ifndef EBCDIC
				226	\|\| c > '~'
				227	#endif
				228	#ifdef FEAT_FKMAP
				229	\|\| (p_altkeymap
				230	&& (F_isalpha(c) \|\| F_isdigit(c)))
				231	#endif
				232	)
				233	#ifdef FEAT_MBYTE
				234	/* For double-byte we keep the cell width, so
				235	* that we can detect it from the first byte. */
				236	&& !(enc_dbcs && MB_BYTE2LEN(c) == 2)
				237	#endif
				238	)
				239	{
				240	if (tilde)
				241	{
				242	chartab[c] = (chartab[c] & ~CT_CELL_MASK)
				243	+ ((dy_flags & DY_UHEX) ? 4 : 2);
				244	chartab[c] &= ~CT_PRINT_CHAR;
				245	}
				246	else
				247	{
				248	chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
				249	chartab[c] \|= CT_PRINT_CHAR;
				250	}
				251	}
				252	}
				253	else if (i == 2) /* (re)set fname flag */
				254	{
				255	if (tilde)
				256	chartab[c] &= ~CT_FNAME_CHAR;
				257	else
				258	chartab[c] \|= CT_FNAME_CHAR;
				259	}
				260	else /* i == 3 / / (re)set keyword flag */
				261	{
				262	if (tilde)
				263	RESET_CHARTAB(buf, c);
				264	else
				265	SET_CHARTAB(buf, c);
				266	}
				267	}
				268	++c;
				269	}
				270	p = skip_to_option_part(p);
				271	}
				272	}
				273	chartab_initialized = TRUE;
				274	return OK;
				275	}
				276
				277	/*
				278	* Translate any special characters in buf[bufsize] in-place.
				279	* The result is a string with only printable characters, but if there is not
				280	* enough room, not all characters will be translated.
				281	*/
				282	void
				283	trans_characters(buf, bufsize)
				284	char_u *buf;
				285	int bufsize;
				286	{
				287	int len; /* length of string needing translation */
				288	int room; /* room in buffer after string */
				289	char_u trs; / translated character */
				290	int trs_len; /* length of trs[] */
				291
				292	len = (int)STRLEN(buf);
				293	room = bufsize - len;
				294	while (*buf != 0)
				295	{
				296	# ifdef FEAT_MBYTE
				297	/* Assume a multi-byte character doesn't need translation. */
				298	if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
				299	len -= trs_len;
				300	else
				301	# endif
				302	{
				303	trs = transchar_byte(*buf);
				304	trs_len = (int)STRLEN(trs);
				305	if (trs_len > 1)
				306	{
				307	room -= trs_len - 1;
				308	if (room <= 0)
				309	return;
				310	mch_memmove(buf + trs_len, buf + 1, (size_t)len);
				311	}
				312	mch_memmove(buf, trs, (size_t)trs_len);
				313	--len;
				314	}
				315	buf += trs_len;
				316	}
				317	}
				318
				319	#if defined(FEAT_EVAL) \|\| defined(FEAT_TITLE) \|\| defined(PROTO)
				320	/*
				321	* Translate a string into allocated memory, replacing special chars with
				322	* printable chars. Returns NULL when out of memory.
				323	*/
				324	char_u *
				325	transstr(s)
				326	char_u *s;
				327	{
				328	char_u *res;
				329	char_u *p;
				330	#ifdef FEAT_MBYTE
				331	int l, len, c;
				332	char_u hexbuf[11];
				333	#endif
				334
				335	#ifdef FEAT_MBYTE
				336	if (has_mbyte)
				337	{
				338	/* Compute the length of the result, taking account of unprintable
				339	* multi-byte characters. */
				340	len = 0;
				341	p = s;
				342	while (*p != NUL)
				343	{
				344	if ((l = (*mb_ptr2len_check)(p)) > 1)
				345	{
				346	c = (*mb_ptr2char)(p);
				347	p += l;
				348	if (vim_isprintc(c))
				349	len += l;
				350	else
				351	{
				352	transchar_hex(hexbuf, c);
				353	len += STRLEN(hexbuf);
				354	}
				355	}
				356	else
				357	{
				358	l = byte2cells(*p++);
				359	if (l > 0)
				360	len += l;
				361	else
				362	len += 4; /* illegal byte sequence */
				363	}
				364	}
				365	res = alloc((unsigned)(len + 1));
				366	}
				367	else
				368	#endif
				369	res = alloc((unsigned)(vim_strsize(s) + 1));
				370	if (res != NULL)
				371	{
				372	*res = NUL;
				373	p = s;
				374	while (*p != NUL)
				375	{
				376	#ifdef FEAT_MBYTE
				377	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				378	{
				379	c = (*mb_ptr2char)(p);
				380	if (vim_isprintc(c))
				381	STRNCAT(res, p, l); /* append printable multi-byte char */
				382	else
				383	transchar_hex(res + STRLEN(res), c);
				384	p += l;
				385	}
				386	else
				387	#endif
				388	STRCAT(res, transchar_byte(*p++));
				389	}
				390	}
				391	return res;
				392	}
				393	#endif
				394
				395	#if defined(FEAT_SYN_HL) \|\| defined(FEAT_INS_EXPAND) \|\| defined(PROTO)
				396	/*
				397	* Convert the string "p[len]" to do ignore-case comparing. Uses the current
				398	* locale. Returns an allocated string (NULL for out-of-memory).
				399	*/
				400	char_u *
				401	str_foldcase(str, len)
				402	char_u *str;
				403	int len;
				404	{
				405	garray_T ga;
				406	int i;
				407
				408	#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
				409	#define GA_PTR(i) ((char_u *)ga.ga_data + i)
				410
				411	/* Copy "str" into allocated memory, unmodified. */
				412	ga_init2(&ga, 1, 10);
				413	if (ga_grow(&ga, len + 1) == FAIL)
				414	return NULL;
				415	mch_memmove(ga.ga_data, str, (size_t)len);
				416	GA_CHAR(len) = NUL;
				417	ga.ga_len = len;
				418	ga.ga_room -= len;
				419
				420	/* Make each character lower case. */
				421	i = 0;
				422	while (GA_CHAR(i) != NUL)
				423	{
				424	#ifdef FEAT_MBYTE
				425	if (enc_utf8 \|\| (has_mbyte && MB_BYTE2LEN(GA_CHAR(i)) > 1))
				426	{
				427	if (enc_utf8)
				428	{
				429	int c, lc;
				430
				431	c = utf_ptr2char(GA_PTR(i));
				432	lc = utf_tolower(c);
				433	if (c != lc)
				434	{
				435	int ol = utf_char2len(c);
				436	int nl = utf_char2len(lc);
				437
				438	/* If the byte length changes need to shift the following
				439	* characters forward or backward. */
				440	if (ol != nl)
				441	{
				442	if (nl > ol)
				443	if (ga_grow(&ga, nl - ol) == FAIL)
				444	{
				445	/* out of memory, keep old char */
				446	lc = c;
				447	nl = ol;
				448	}
				449	if (ol != nl)
				450	{
				451	mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
				452	STRLEN(GA_PTR(i) + ol) + 1);
				453	ga.ga_len += nl - ol;
				454	ga.ga_room -= nl - ol;
				455	}
				456	}
				457	(void)utf_char2bytes(lc, GA_PTR(i));
				458	}
				459	}
				460	/* skip to next multi-byte char */
				461	i += (*mb_ptr2len_check)(GA_PTR(i));
				462	}
				463	else
				464	#endif
				465	{
				466	GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
				467	++i;
				468	}
				469	}
				470
				471	return (char_u *)ga.ga_data;
				472	}
				473	#endif
				474
				475	/*
				476	* Catch 22: chartab[] can't be initialized before the options are
				477	* initialized, and initializing options may cause transchar() to be called!
				478	* When chartab_initialized == FALSE don't use chartab[].
				479	* Does NOT work for multi-byte characters, c must be <= 255.
				480	* Also doesn't work for the first byte of a multi-byte, "c" must be a
				481	* character!
				482	*/
				483	static char_u transchar_buf[7];
				484
				485	char_u *
				486	transchar(c)
				487	int c;
				488	{
				489	int i;
				490
				491	i = 0;
				492	if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
				493	{
				494	transchar_buf[0] = '~';
				495	transchar_buf[1] = '@';
				496	i = 2;
				497	c = K_SECOND(c);
				498	}
				499
				500	if ((!chartab_initialized && (
				501	#ifdef EBCDIC
				502	(c >= 64 && c < 255)
				503	#else
				504	(c >= ' ' && c <= '~')
				505	#endif
				506	#ifdef FEAT_FKMAP
				507	\|\| F_ischar(c)
				508	#endif
				509	)) \|\| (c < 256 && vim_isprintc_strict(c)))
				510	{
				511	/* printable character */
				512	transchar_buf[i] = c;
				513	transchar_buf[i + 1] = NUL;
				514	}
				515	else
				516	transchar_nonprint(transchar_buf + i, c);
				517	return transchar_buf;
				518	}
				519
				520	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				521	/*
				522	* Like transchar(), but called with a byte instead of a character. Checks
				523	* for an illegal UTF-8 byte.
				524	*/
				525	char_u *
				526	transchar_byte(c)
				527	int c;
				528	{
				529	if (enc_utf8 && c >= 0x80)
				530	{
				531	transchar_nonprint(transchar_buf, c);
				532	return transchar_buf;
				533	}
				534	return transchar(c);
				535	}
				536	#endif
				537
				538	/*
				539	* Convert non-printable character to two or more printable characters in
				540	* "buf[]". "buf" needs to be able to hold five bytes.
				541	* Does NOT work for multi-byte characters, c must be <= 255.
				542	*/
				543	void
				544	transchar_nonprint(buf, c)
				545	char_u *buf;
				546	int c;
				547	{
				548	if (c == NL)
				549	c = NUL; /* we use newline in place of a NUL */
				550	else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
				551	c = NL; /* we use CR in place of NL in this case */
				552
				553	if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
				554	transchar_hex(buf, c);
				555
				556	#ifdef EBCDIC
				557	/* For EBCDIC only the characters 0-63 and 255 are not printable */
				558	else if (CtrlChar(c) != 0 \|\| c == DEL)
				559	#else
				560	else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
				561	#endif
				562	{
				563	buf[0] = '^';
				564	#ifdef EBCDIC
				565	if (c == DEL)
				566	buf[1] = '?'; /* DEL displayed as ^? */
				567	else
				568	buf[1] = CtrlChar(c);
				569	#else
				570	buf[1] = c ^ 0x40; /* DEL displayed as ^? */
				571	#endif
				572
				573	buf[2] = NUL;
				574	}
				575	#ifdef FEAT_MBYTE
				576	else if (enc_utf8 && c >= 0x80)
				577	{
				578	transchar_hex(buf, c);
				579	}
				580	#endif
				581	#ifndef EBCDIC
				582	else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
				583	{
				584	buf[0] = '\|';
				585	buf[1] = c - 0x80;
				586	buf[2] = NUL;
				587	}
				588	#else
				589	else if (c < 64)
				590	{
				591	buf[0] = '~';
				592	buf[1] = MetaChar(c);
				593	buf[2] = NUL;
				594	}
				595	#endif
				596	else /* 0x80 - 0x9f and 0xff */
				597	{
				598	/*
				599	* TODO: EBCDIC I don't know what to do with this chars, so I display
				600	* them as '~?' for now
				601	*/
				602	buf[0] = '~';
				603	#ifdef EBCDIC
				604	buf[1] = '?'; /* 0xff displayed as ~? */
				605	#else
				606	buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
				607	#endif
				608	buf[2] = NUL;
				609	}
				610	}
				611
				612	void
				613	transchar_hex(buf, c)
				614	char_u *buf;
				615	int c;
				616	{
				617	int i = 0;
				618
				619	buf[0] = '<';
				620	#ifdef FEAT_MBYTE
				621	if (c > 255)
				622	{
				623	buf[++i] = nr2hex((unsigned)c >> 12);
				624	buf[++i] = nr2hex((unsigned)c >> 8);
				625	}
				626	#endif
				627	buf[++i] = nr2hex((unsigned)c >> 4);
				628	buf[++i] = nr2hex(c);
				629	buf[++i] = '>';
				630	buf[++i] = NUL;
				631	}
				632
				633	/*
				634	* Convert the lower 4 bits of byte "c" to its hex character.
				635	* Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
				636	* function key 1.
				637	*/
				638	static int
				639	nr2hex(c)
				640	int c;
				641	{
				642	if ((c & 0xf) <= 9)
				643	return (c & 0xf) + '0';
				644	return (c & 0xf) - 10 + 'a';
				645	}
				646
				647	/*
				648	* Return number of display cells occupied by byte "b".
				649	* Caller must make sure 0 <= b <= 255.
				650	* For multi-byte mode "b" must be the first byte of a character.
				651	* A TAB is counted as two cells: "^I".
				652	* For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
				653	* cells depends on further bytes.
				654	*/
				655	int
				656	byte2cells(b)
				657	int b;
				658	{
				659	#ifdef FEAT_MBYTE
				660	if (enc_utf8 && b >= 0x80)
				661	return 0;
				662	#endif
				663	return (chartab[b] & CT_CELL_MASK);
				664	}
				665
				666	/*
				667	* Return number of display cells occupied by character "c".
				668	* "c" can be a special key (negative number) in which case 3 or 4 is returned.
				669	* A TAB is counted as two cells: "^I" or four: "<09>".
				670	*/
				671	int
				672	char2cells(c)
				673	int c;
				674	{
				675	if (IS_SPECIAL(c))
				676	return char2cells(K_SECOND(c)) + 2;
				677	#ifdef FEAT_MBYTE
				678	if (c >= 0x80)
				679	{
				680	/* UTF-8: above 0x80 need to check the value */
				681	if (enc_utf8)
				682	return utf_char2cells(c);
				683	/* DBCS: double-byte means double-width, except for euc-jp with first
				684	* byte 0x8e */
				685	if (enc_dbcs != 0 && c >= 0x100)
				686	{
				687	if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
				688	return 1;
				689	return 2;
				690	}
				691	}
				692	#endif
				693	return (chartab[c & 0xff] & CT_CELL_MASK);
				694	}
				695
				696	/*
				697	* Return number of display cells occupied by character at "*p".
				698	* A TAB is counted as two cells: "^I" or four: "<09>".
				699	*/
				700	int
				701	ptr2cells(p)
				702	char_u *p;
				703	{
				704	#ifdef FEAT_MBYTE
				705	/* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
				706	if (enc_utf8 && *p >= 0x80)
				707	return utf_ptr2cells(p);
				708	/* For DBCS we can tell the cell count from the first byte. */
				709	#endif
				710	return (chartab[*p] & CT_CELL_MASK);
				711	}
				712
				713	/*
				714	* Return the number of characters string "s" will take on the screen,
				715	* counting TABs as two characters: "^I".
				716	*/
				717	int
				718	vim_strsize(s)
				719	char_u *s;
				720	{
				721	return vim_strnsize(s, (int)MAXCOL);
				722	}
				723
				724	/*
				725	* Return the number of characters string "s[len]" will take on the screen,
				726	* counting TABs as two characters: "^I".
				727	*/
				728	int
				729	vim_strnsize(s, len)
				730	char_u *s;
				731	int len;
				732	{
				733	int size = 0;
				734
				735	while (*s != NUL && --len >= 0)
				736	{
				737	#ifdef FEAT_MBYTE
				738	if (has_mbyte)
				739	{
				740	int l = (*mb_ptr2len_check)(s);
				741
				742	size += ptr2cells(s);
				743	s += l;
				744	len -= l - 1;
				745	}
				746	else
				747	#endif
				748	size += byte2cells(*s++);
				749	}
				750	return size;
				751	}
				752
				753	/*
				754	* Return the number of characters 'c' will take on the screen, taking
				755	* into account the size of a tab.
				756	* Use a define to make it fast, this is used very often!!!
				757	* Also see getvcol() below.
				758	*/
				759
				760	#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
				761	if (*(p) == TAB && (!(wp)->w_p_list \|\| lcs_tab1)) \
				762	{ \
				763	int ts; \
				764	ts = (buf)->b_p_ts; \
				765	return (int)(ts - (col % ts)); \
				766	} \
				767	else \
				768	return ptr2cells(p);
				769
				770	#if defined(FEAT_VREPLACE) \|\| defined(FEAT_EX_EXTRA) \|\| defined(FEAT_GUI) \
				771	\|\| defined(FEAT_VIRTUALEDIT) \|\| defined(PROTO)
				772	int
				773	chartabsize(p, col)
				774	char_u *p;
				775	colnr_T col;
				776	{
				777	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
				778	}
				779	#endif
				780
				781	#ifdef FEAT_LINEBREAK
				782	static int
				783	win_chartabsize(wp, p, col)
				784	win_T *wp;
				785	char_u *p;
				786	colnr_T col;
				787	{
				788	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
				789	}
				790	#endif
				791
				792	/*
				793	* return the number of characters the string 's' will take on the screen,
				794	* taking into account the size of a tab
				795	*/
				796	int
				797	linetabsize(s)
				798	char_u *s;
				799	{
				800	colnr_T col = 0;
				801
				802	while (*s != NUL)
				803	col += lbr_chartabsize_adv(&s, col);
				804	return (int)col;
				805	}
				806
				807	/*
				808	* Like linetabsize(), but for a given window instead of the current one.
				809	*/
				810	int
				811	win_linetabsize(wp, p, len)
				812	win_T *wp;
				813	char_u *p;
				814	colnr_T len;
				815	{
				816	colnr_T col = 0;
				817	char_u *s;
				818
Bram Moolenaar	b5bf5b8	2004-12-24 14:35:23 +0000	[diff] [blame^]	819	for (s = p; *s != NUL && (len == MAXCOL \|\| s < p + len); mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	820	col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	821	return (int)col;
				822	}
				823
				824	/*
				825	* return TRUE if 'c' is a normal identifier character
				826	* letters and characters from 'isident' option.
				827	*/
				828	int
				829	vim_isIDc(c)
				830	int c;
				831	{
				832	return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
				833	}
				834
				835	/*
				836	* return TRUE if 'c' is a keyword character: Letters and characters from
				837	* 'iskeyword' option for current buffer.
				838	* For multi-byte characters mb_get_class() is used (builtin rules).
				839	*/
				840	int
				841	vim_iswordc(c)
				842	int c;
				843	{
				844	#ifdef FEAT_MBYTE
				845	if (c >= 0x100)
				846	{
				847	if (enc_dbcs != 0)
				848	return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
				849	if (enc_utf8)
				850	return utf_class(c) >= 2;
				851	}
				852	#endif
				853	return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
				854	}
				855
				856	/*
				857	* Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
				858	*/
				859	int
				860	vim_iswordp(p)
				861	char_u *p;
				862	{
				863	#ifdef FEAT_MBYTE
				864	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				865	return mb_get_class(p) >= 2;
				866	#endif
				867	return GET_CHARTAB(curbuf, *p) != 0;
				868	}
				869
				870	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				871	int
				872	vim_iswordc_buf(p, buf)
				873	char_u *p;
				874	buf_T *buf;
				875	{
				876	# ifdef FEAT_MBYTE
				877	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				878	return mb_get_class(p) >= 2;
				879	# endif
				880	return (GET_CHARTAB(buf, *p) != 0);
				881	}
				882	#endif
				883
				884	/*
				885	* return TRUE if 'c' is a valid file-name character
				886	* Assume characters above 0x100 are valid (multi-byte).
				887	*/
				888	int
				889	vim_isfilec(c)
				890	int c;
				891	{
				892	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
				893	}
				894
				895	/*
				896	* return TRUE if 'c' is a printable character
				897	* Assume characters above 0x100 are printable (multi-byte), except for
				898	* Unicode.
				899	*/
				900	int
				901	vim_isprintc(c)
				902	int c;
				903	{
				904	#ifdef FEAT_MBYTE
				905	if (enc_utf8 && c >= 0x100)
				906	return utf_printable(c);
				907	#endif
				908	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
				909	}
				910
				911	/*
				912	* Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
				913	* byte of a double-byte character.
				914	*/
				915	int
				916	vim_isprintc_strict(c)
				917	int c;
				918	{
				919	#ifdef FEAT_MBYTE
				920	if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
				921	return FALSE;
				922	if (enc_utf8 && c >= 0x100)
				923	return utf_printable(c);
				924	#endif
				925	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
				926	}
				927
				928	/*
				929	* like chartabsize(), but also check for line breaks on the screen
				930	*/
				931	int
				932	lbr_chartabsize(s, col)
				933	unsigned char *s;
				934	colnr_T col;
				935	{
				936	#ifdef FEAT_LINEBREAK
				937	if (!curwin->w_p_lbr && *p_sbr == NUL)
				938	{
				939	#endif
				940	#ifdef FEAT_MBYTE
				941	if (curwin->w_p_wrap)
				942	return win_nolbr_chartabsize(curwin, s, col, NULL);
				943	#endif
				944	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
				945	#ifdef FEAT_LINEBREAK
				946	}
				947	return win_lbr_chartabsize(curwin, s, col, NULL);
				948	#endif
				949	}
				950
				951	/*
				952	* Call lbr_chartabsize() and advance the pointer.
				953	*/
				954	int
				955	lbr_chartabsize_adv(s, col)
				956	char_u **s;
				957	colnr_T col;
				958	{
				959	int retval;
				960
				961	retval = lbr_chartabsize(*s, col);
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	962	mb_ptr_adv(*s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	963	return retval;
				964	}
				965
				966	/*
				967	* This function is used very often, keep it fast!!!!
				968	*
				969	* If "headp" not NULL, set *headp to the size of what we for 'showbreak'
				970	* string at start of line. Warning: *headp is only set if it's a non-zero
				971	* value, init to 0 before calling.
				972	*/
				973	/ARGSUSED/
				974	int
				975	win_lbr_chartabsize(wp, s, col, headp)
				976	win_T *wp;
				977	char_u *s;
				978	colnr_T col;
				979	int *headp;
				980	{
				981	#ifdef FEAT_LINEBREAK
				982	int c;
				983	int size;
				984	colnr_T col2;
				985	colnr_T colmax;
				986	int added;
				987	# ifdef FEAT_MBYTE
				988	int mb_added = 0;
				989	# else
				990	# define mb_added 0
				991	# endif
				992	int numberextra;
				993	char_u *ps;
				994	int tab_corr = (*s == TAB);
				995
				996	/*
				997	* No 'linebreak' and 'showbreak': return quickly.
				998	*/
				999	if (!wp->w_p_lbr && *p_sbr == NUL)
				1000	#endif
				1001	{
				1002	#ifdef FEAT_MBYTE
				1003	if (wp->w_p_wrap)
				1004	return win_nolbr_chartabsize(wp, s, col, headp);
				1005	#endif
				1006	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
				1007	}
				1008
				1009	#ifdef FEAT_LINEBREAK
				1010	/*
				1011	* First get normal size, without 'linebreak'
				1012	*/
				1013	size = win_chartabsize(wp, s, col);
				1014	c = *s;
				1015
				1016	/*
				1017	* If 'linebreak' set check at a blank before a non-blank if the line
				1018	* needs a break here
				1019	*/
				1020	if (wp->w_p_lbr
				1021	&& vim_isbreak(c)
				1022	&& !vim_isbreak(s[1])
				1023	&& !wp->w_p_list
				1024	&& wp->w_p_wrap
				1025	# ifdef FEAT_VERTSPLIT
				1026	&& wp->w_width != 0
				1027	# endif
				1028	)
				1029	{
				1030	/*
				1031	* Count all characters from first non-blank after a blank up to next
				1032	* non-blank after a blank.
				1033	*/
				1034	numberextra = win_col_off(wp);
				1035	col2 = col;
				1036	colmax = W_WIDTH(wp) - numberextra;
				1037	if (col >= colmax)
				1038	colmax += (((col - colmax)
				1039	/ (colmax + win_col_off2(wp))) + 1)
				1040	* (colmax + win_col_off2(wp));
				1041	for (;;)
				1042	{
				1043	ps = s;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1044	mb_ptr_adv(s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1045	c = *s;
				1046	if (!(c != NUL
				1047	&& (vim_isbreak(c)
				1048	\|\| (!vim_isbreak(c)
				1049	&& (col2 == col \|\| !vim_isbreak(*ps))))))
				1050	break;
				1051
				1052	col2 += win_chartabsize(wp, s, col2);
				1053	if (col2 >= colmax) /* doesn't fit */
				1054	{
				1055	size = colmax - col;
				1056	tab_corr = FALSE;
				1057	break;
				1058	}
				1059	}
				1060	}
				1061	# ifdef FEAT_MBYTE
				1062	else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
				1063	&& wp->w_p_wrap && in_win_border(wp, col))
				1064	{
				1065	++size; /* Count the ">" in the last column. */
				1066	mb_added = 1;
				1067	}
				1068	# endif
				1069
				1070	/*
				1071	* May have to add something for 'showbreak' string at start of line
				1072	* Set *headp to the size of what we add.
				1073	*/
				1074	added = 0;
				1075	if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
				1076	{
				1077	numberextra = win_col_off(wp);
				1078	col += numberextra + mb_added;
				1079	if (col >= (colnr_T)W_WIDTH(wp))
				1080	{
				1081	col -= W_WIDTH(wp);
				1082	numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
				1083	if (numberextra > 0)
				1084	col = col % numberextra;
				1085	}
				1086	if (col == 0 \|\| col + size > (colnr_T)W_WIDTH(wp))
				1087	{
				1088	added = vim_strsize(p_sbr);
				1089	if (tab_corr)
				1090	size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
				1091	else
				1092	size += added;
				1093	if (col != 0)
				1094	added = 0;
				1095	}
				1096	}
				1097	if (headp != NULL)
				1098	*headp = added + mb_added;
				1099	return size;
				1100	#endif
				1101	}
				1102
				1103	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1104	/*
				1105	* Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
				1106	* 'wrap' is on. This means we need to check for a double-byte character that
				1107	* doesn't fit at the end of the screen line.
				1108	*/
				1109	static int
				1110	win_nolbr_chartabsize(wp, s, col, headp)
				1111	win_T *wp;
				1112	char_u *s;
				1113	colnr_T col;
				1114	int *headp;
				1115	{
				1116	int n;
				1117
				1118	if (*s == TAB && (!wp->w_p_list \|\| lcs_tab1))
				1119	{
				1120	n = wp->w_buffer->b_p_ts;
				1121	return (int)(n - (col % n));
				1122	}
				1123	n = ptr2cells(s);
				1124	/* Add one cell for a double-width character in the last column of the
				1125	* window, displayed with a ">". */
				1126	if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
				1127	{
				1128	if (headp != NULL)
				1129	*headp = 1;
				1130	return 3;
				1131	}
				1132	return n;
				1133	}
				1134
				1135	/*
				1136	* Return TRUE if virtual column "vcol" is in the rightmost column of window
				1137	* "wp".
				1138	*/
				1139	int
				1140	in_win_border(wp, vcol)
				1141	win_T *wp;
				1142	colnr_T vcol;
				1143	{
				1144	colnr_T width1; /* width of first line (after line number) */
				1145	colnr_T width2; /* width of further lines */
				1146
				1147	#ifdef FEAT_VERTSPLIT
				1148	if (wp->w_width == 0) /* there is no border */
				1149	return FALSE;
				1150	#endif
				1151	width1 = W_WIDTH(wp) - win_col_off(wp);
				1152	if (vcol < width1 - 1)
				1153	return FALSE;
				1154	if (vcol == width1 - 1)
				1155	return TRUE;
				1156	width2 = width1 + win_col_off2(wp);
				1157	return ((vcol - width1) % width2 == width2 - 1);
				1158	}
				1159	#endif /* FEAT_MBYTE */
				1160
				1161	/*
				1162	* Get virtual column number of pos.
				1163	* start: on the first position of this character (TAB, ctrl)
				1164	* cursor: where the cursor is on this character (first char, except for TAB)
				1165	* end: on the last position of this character (TAB, ctrl)
				1166	*
				1167	* This is used very often, keep it fast!
				1168	*/
				1169	void
				1170	getvcol(wp, pos, start, cursor, end)
				1171	win_T *wp;
				1172	pos_T *pos;
				1173	colnr_T *start;
				1174	colnr_T *cursor;
				1175	colnr_T *end;
				1176	{
				1177	colnr_T vcol;
				1178	char_u ptr; / points to current char */
				1179	char_u posptr; / points to char at pos->col */
				1180	int incr;
				1181	int head;
				1182	int ts = wp->w_buffer->b_p_ts;
				1183	int c;
				1184
				1185	vcol = 0;
				1186	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
				1187	posptr = ptr + pos->col;
				1188
				1189	/*
				1190	* This function is used very often, do some speed optimizations.
				1191	* When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
				1192	* Also use this when 'list' is set but tabs take their normal size.
				1193	*/
				1194	if ((!wp->w_p_list \|\| lcs_tab1 != NUL)
				1195	#ifdef FEAT_LINEBREAK
				1196	&& !wp->w_p_lbr && *p_sbr == NUL
				1197	#endif
				1198	)
				1199	{
				1200	#ifndef FEAT_MBYTE
				1201	head = 0;
				1202	#endif
				1203	for (;;)
				1204	{
				1205	#ifdef FEAT_MBYTE
				1206	head = 0;
				1207	#endif
				1208	c = *ptr;
				1209	/* make sure we don't go past the end of the line */
				1210	if (c == NUL)
				1211	{
				1212	incr = 1; /* NUL at end of line only takes one column */
				1213	break;
				1214	}
				1215	/* A tab gets expanded, depending on the current column */
				1216	if (c == TAB)
				1217	incr = ts - (vcol % ts);
				1218	else
				1219	{
				1220	#ifdef FEAT_MBYTE
				1221	if (has_mbyte)
				1222	{
				1223	/* For utf-8, if the byte is >= 0x80, need to look at
				1224	* further bytes to find the cell width. */
				1225	if (enc_utf8 && c >= 0x80)
				1226	incr = utf_ptr2cells(ptr);
				1227	else
				1228	incr = CHARSIZE(c);
				1229
				1230	/* If a double-cell char doesn't fit at the end of a line
				1231	* it wraps to the next line, it's like this char is three
				1232	* cells wide. */
				1233	if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
				1234	{
				1235	++incr;
				1236	head = 1;
				1237	}
				1238	}
				1239	else
				1240	#endif
				1241	incr = CHARSIZE(c);
				1242	}
				1243
				1244	if (ptr >= posptr) /* character at pos->col */
				1245	break;
				1246
				1247	vcol += incr;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1248	mb_ptr_adv(ptr);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1249	}
				1250	}
				1251	else
				1252	{
				1253	for (;;)
				1254	{
				1255	/* A tab gets expanded, depending on the current column */
				1256	head = 0;
				1257	incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
				1258	/* make sure we don't go past the end of the line */
				1259	if (*ptr == NUL)
				1260	{
				1261	incr = 1; /* NUL at end of line only takes one column */
				1262	break;
				1263	}
				1264
				1265	if (ptr >= posptr) /* character at pos->col */
				1266	break;
				1267
				1268	vcol += incr;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1269	mb_ptr_adv(ptr);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1270	}
				1271	}
				1272	if (start != NULL)
				1273	*start = vcol + head;
				1274	if (end != NULL)
				1275	*end = vcol + incr - 1;
				1276	if (cursor != NULL)
				1277	{
				1278	if (*ptr == TAB
				1279	&& (State & NORMAL)
				1280	&& !wp->w_p_list
				1281	&& !virtual_active()
				1282	#ifdef FEAT_VISUAL
				1283	&& !(VIsual_active
				1284	&& (p_sel == 'e' \|\| ltoreq(pos, VIsual)))
				1285	#endif
				1286	)
				1287	cursor = vcol + incr - 1; / cursor at end */
				1288	else
				1289	cursor = vcol + head; / cursor at start */
				1290	}
				1291	}
				1292
				1293	/*
				1294	* Get virtual cursor column in the current window, pretending 'list' is off.
				1295	*/
				1296	colnr_T
				1297	getvcol_nolist(posp)
				1298	pos_T *posp;
				1299	{
				1300	int list_save = curwin->w_p_list;
				1301	colnr_T vcol;
				1302
				1303	curwin->w_p_list = FALSE;
				1304	getvcol(curwin, posp, NULL, &vcol, NULL);
				1305	curwin->w_p_list = list_save;
				1306	return vcol;
				1307	}
				1308
				1309	#if defined(FEAT_VIRTUALEDIT) \|\| defined(PROTO)
				1310	/*
				1311	* Get virtual column in virtual mode.
				1312	*/
				1313	void
				1314	getvvcol(wp, pos, start, cursor, end)
				1315	win_T *wp;
				1316	pos_T *pos;
				1317	colnr_T *start;
				1318	colnr_T *cursor;
				1319	colnr_T *end;
				1320	{
				1321	colnr_T col;
				1322	colnr_T coladd;
				1323	colnr_T endadd;
				1324	# ifdef FEAT_MBYTE
				1325	char_u *ptr;
				1326	# endif
				1327
				1328	if (virtual_active())
				1329	{
				1330	/* For virtual mode, only want one value */
				1331	getvcol(wp, pos, &col, NULL, NULL);
				1332
				1333	coladd = pos->coladd;
				1334	endadd = 0;
				1335	# ifdef FEAT_MBYTE
				1336	/* Cannot put the cursor on part of a wide character. */
				1337	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
				1338	if (pos->col < STRLEN(ptr))
				1339	{
				1340	int c = (*mb_ptr2char)(ptr + pos->col);
				1341
				1342	if (c != TAB && vim_isprintc(c))
				1343	{
				1344	endadd = char2cells(c) - 1;
				1345	if (coladd >= endadd)
				1346	coladd -= endadd;
				1347	else
				1348	coladd = 0;
				1349	}
				1350	}
				1351	# endif
				1352	col += coladd;
				1353	if (start != NULL)
				1354	*start = col;
				1355	if (cursor != NULL)
				1356	*cursor = col;
				1357	if (end != NULL)
				1358	*end = col + endadd;
				1359	}
				1360	else
				1361	getvcol(wp, pos, start, cursor, end);
				1362	}
				1363	#endif
				1364
				1365	#if defined(FEAT_VISUAL) \|\| defined(PROTO)
				1366	/*
				1367	* Get the leftmost and rightmost virtual column of pos1 and pos2.
				1368	* Used for Visual block mode.
				1369	*/
				1370	void
				1371	getvcols(wp, pos1, pos2, left, right)
				1372	win_T *wp;
				1373	pos_T pos1, pos2;
				1374	colnr_T left, right;
				1375	{
				1376	colnr_T from1, from2, to1, to2;
				1377
				1378	if (ltp(pos1, pos2))
				1379	{
				1380	getvvcol(wp, pos1, &from1, NULL, &to1);
				1381	getvvcol(wp, pos2, &from2, NULL, &to2);
				1382	}
				1383	else
				1384	{
				1385	getvvcol(wp, pos2, &from1, NULL, &to1);
				1386	getvvcol(wp, pos1, &from2, NULL, &to2);
				1387	}
				1388	if (from2 < from1)
				1389	*left = from2;
				1390	else
				1391	*left = from1;
				1392	if (to2 > to1)
				1393	{
				1394	if (*p_sel == 'e' && from2 - 1 >= to1)
				1395	*right = from2 - 1;
				1396	else
				1397	*right = to2;
				1398	}
				1399	else
				1400	*right = to1;
				1401	}
				1402	#endif
				1403
				1404	/*
				1405	* skipwhite: skip over ' ' and '\t'.
				1406	*/
				1407	char_u *
				1408	skipwhite(p)
				1409	char_u *p;
				1410	{
				1411	while (vim_iswhite(p)) / skip to next non-white */
				1412	++p;
				1413	return p;
				1414	}
				1415
				1416	/*
				1417	* skipdigits: skip over digits;
				1418	*/
				1419	char_u *
				1420	skipdigits(p)
				1421	char_u *p;
				1422	{
				1423	while (VIM_ISDIGIT(p)) / skip to next non-digit */
				1424	++p;
				1425	return p;
				1426	}
				1427
				1428	/*
				1429	* Variant of isdigit() that can handle characters > 0x100.
				1430	* We don't use isdigit() here, because on some systems it also considers
				1431	* superscript 1 to be a digit.
				1432	* Use the VIM_ISDIGIT() macro for simple arguments.
				1433	*/
				1434	int
				1435	vim_isdigit(c)
				1436	int c;
				1437	{
				1438	return (c >= '0' && c <= '9');
				1439	}
				1440
				1441	/*
				1442	* Variant of isxdigit() that can handle characters > 0x100.
				1443	* We don't use isxdigit() here, because on some systems it also considers
				1444	* superscript 1 to be a digit.
				1445	*/
				1446	int
				1447	vim_isxdigit(c)
				1448	int c;
				1449	{
				1450	return (c >= '0' && c <= '9')
				1451	\|\| (c >= 'a' && c <= 'f')
				1452	\|\| (c >= 'A' && c <= 'F');
				1453	}
				1454
				1455	/*
				1456	* skiptowhite: skip over text until ' ' or '\t' or NUL.
				1457	*/
				1458	char_u *
				1459	skiptowhite(p)
				1460	char_u *p;
				1461	{
				1462	while (p != ' ' && p != '\t' && *p != NUL)
				1463	++p;
				1464	return p;
				1465	}
				1466
				1467	#if defined(FEAT_LISTCMDS) \|\| defined(FEAT_SIGNS) \|\| defined(FEAT_SNIFF) \
				1468	\|\| defined(PROTO)
				1469	/*
				1470	* skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
				1471	*/
				1472	char_u *
				1473	skiptowhite_esc(p)
				1474	char_u *p;
				1475	{
				1476	while (p != ' ' && p != '\t' && *p != NUL)
				1477	{
				1478	if ((p == '\\' \|\| p == Ctrl_V) && *(p + 1) != NUL)
				1479	++p;
				1480	++p;
				1481	}
				1482	return p;
				1483	}
				1484	#endif
				1485
				1486	/*
				1487	* Getdigits: Get a number from a string and skip over it.
				1488	* Note: the argument is a pointer to a char_u pointer!
				1489	*/
				1490	long
				1491	getdigits(pp)
				1492	char_u **pp;
				1493	{
				1494	char_u *p;
				1495	long retval;
				1496
				1497	p = *pp;
				1498	retval = atol((char *)p);
				1499	if (p == '-') / skip negative sign */
				1500	++p;
				1501	p = skipdigits(p); /* skip to next non-digit */
				1502	*pp = p;
				1503	return retval;
				1504	}
				1505
				1506	/*
				1507	* Return TRUE if "lbuf" is empty or only contains blanks.
				1508	*/
				1509	int
				1510	vim_isblankline(lbuf)
				1511	char_u *lbuf;
				1512	{
				1513	char_u *p;
				1514
				1515	p = skipwhite(lbuf);
				1516	return (p == NUL \|\| p == '\r' \|\| *p == '\n');
				1517	}
				1518
				1519	/*
				1520	* Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar	2df6dcc	2004-07-12 15:53:54 +0000	[diff] [blame]	1521	* hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1522	* If "hexp" is not NULL, returns a flag to indicate the type of the number:
				1523	* 0 decimal
				1524	* '0' octal
				1525	* 'X' hex
				1526	* 'x' hex
				1527	* If "len" is not NULL, the length of the number in characters is returned.
				1528	* If "nptr" is not NULL, the signed result is returned in it.
				1529	* If "unptr" is not NULL, the unsigned result is returned in it.
				1530	*/
				1531	void
				1532	vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
				1533	char_u *start;
				1534	int hexp; / return: type of number 0 = decimal, 'x'
				1535	or 'X' is hex, '0' = octal */
				1536	int len; / return: detected length of number */
				1537	int dooct; /* recognize octal number */
				1538	int dohex; /* recognize hex number */
				1539	long nptr; / return: signed result */
				1540	unsigned long unptr; / return: unsigned result */
				1541	{
				1542	char_u *ptr = start;
				1543	int hex = 0; /* default is decimal */
				1544	int negative = FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1545	unsigned long un = 0;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1546	int n;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1547
				1548	if (ptr[0] == '-')
				1549	{
				1550	negative = TRUE;
				1551	++ptr;
				1552	}
				1553
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1554	/* Recognize hex and octal. */
				1555	if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1556	{
				1557	hex = ptr[1];
				1558	if (dohex && (hex == 'X' \|\| hex == 'x') && vim_isxdigit(ptr[2]))
				1559	ptr += 2; /* hexadecimal */
				1560	else
				1561	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1562	hex = 0; /* default is decimal */
				1563	if (dooct)
				1564	{
				1565	/* Don't interpret "0", "08" or "0129" as octal. */
				1566	for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
				1567	{
				1568	if (ptr[n] > '7')
				1569	{
				1570	hex = 0; /* can't be octal */
				1571	break;
				1572	}
				1573	if (ptr[n] > '0')
				1574	hex = '0'; /* assume octal */
				1575	}
				1576	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1577	}
				1578	}
				1579
				1580	/*
				1581	* Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
				1582	*/
				1583	if (hex)
				1584	{
				1585	if (hex == '0')
				1586	{
				1587	/* octal */
				1588	while ('0' <= ptr && ptr <= '7')
				1589	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1590	un = 8 * un + (unsigned long)(*ptr - '0');
				1591	++ptr;
				1592	}
				1593	}
				1594	else
				1595	{
				1596	/* hex */
				1597	while (vim_isxdigit(*ptr))
				1598	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1599	un = 16 * un + (unsigned long)hex2nr(*ptr);
				1600	++ptr;
				1601	}
				1602	}
				1603	}
				1604	else
				1605	{
				1606	/* decimal */
				1607	while (VIM_ISDIGIT(*ptr))
				1608	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1609	un = 10 * un + (unsigned long)(*ptr - '0');
				1610	++ptr;
				1611	}
				1612	}
				1613
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1614	if (hexp != NULL)
				1615	*hexp = hex;
				1616	if (len != NULL)
				1617	*len = (int)(ptr - start);
				1618	if (nptr != NULL)
Bram Moolenaar	2df6dcc	2004-07-12 15:53:54 +0000	[diff] [blame]	1619	{
				1620	if (negative) /* account for leading '-' for decimal numbers */
				1621	*nptr = -(long)un;
				1622	else
				1623	*nptr = (long)un;
				1624	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1625	if (unptr != NULL)
				1626	*unptr = un;
				1627	}
				1628
				1629	/*
				1630	* Return the value of a single hex character.
				1631	* Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
				1632	*/
				1633	int
				1634	hex2nr(c)
				1635	int c;
				1636	{
				1637	if (c >= 'a' && c <= 'f')
				1638	return c - 'a' + 10;
				1639	if (c >= 'A' && c <= 'F')
				1640	return c - 'A' + 10;
				1641	return c - '0';
				1642	}
				1643
				1644	#if defined(FEAT_TERMRESPONSE) \
				1645	\|\| (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) \|\| defined(PROTO)
				1646	/*
				1647	* Convert two hex characters to a byte.
				1648	* Return -1 if one of the characters is not hex.
				1649	*/
				1650	int
				1651	hexhex2nr(p)
				1652	char_u *p;
				1653	{
				1654	if (!vim_isxdigit(p[0]) \|\| !vim_isxdigit(p[1]))
				1655	return -1;
				1656	return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
				1657	}
				1658	#endif
				1659
				1660	/*
				1661	* Return TRUE if "str" starts with a backslash that should be removed.
				1662	* For MS-DOS, WIN32 and OS/2 this is only done when the character after the
				1663	* backslash is not a normal file name character.
				1664	* '$' is a valid file name character, we don't remove the backslash before
				1665	* it. This means it is not possible to use an environment variable after a
				1666	* backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
				1667	* Although "\ name" is valid, the backslash in "Program\ files" must be
				1668	* removed. Assume a file name doesn't start with a space.
				1669	* For multi-byte names, never remove a backslash before a non-ascii
				1670	* character, assume that all multi-byte characters are valid file name
				1671	* characters.
				1672	*/
				1673	int
				1674	rem_backslash(str)
				1675	char_u *str;
				1676	{
				1677	#ifdef BACKSLASH_IN_FILENAME
				1678	return (str[0] == '\\'
				1679	# ifdef FEAT_MBYTE
				1680	&& str[1] < 0x80
				1681	# endif
				1682	&& (str[1] == ' '
				1683	\|\| (str[1] != NUL
				1684	&& str[1] != '*'
				1685	&& str[1] != '?'
				1686	&& !vim_isfilec(str[1]))));
				1687	#else
				1688	return (str[0] == '\\' && str[1] != NUL);
				1689	#endif
				1690	}
				1691
				1692	/*
				1693	* Halve the number of backslashes in a file name argument.
				1694	* For MS-DOS we only do this if the character after the backslash
				1695	* is not a normal file character.
				1696	*/
				1697	void
				1698	backslash_halve(p)
				1699	char_u *p;
				1700	{
				1701	for ( ; *p; ++p)
				1702	if (rem_backslash(p))
				1703	STRCPY(p, p + 1);
				1704	}
				1705
				1706	/*
				1707	* backslash_halve() plus save the result in allocated memory.
				1708	*/
				1709	char_u *
				1710	backslash_halve_save(p)
				1711	char_u *p;
				1712	{
				1713	char_u *res;
				1714
				1715	res = vim_strsave(p);
				1716	if (res == NULL)
				1717	return p;
				1718	backslash_halve(res);
				1719	return res;
				1720	}
				1721
				1722	#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) \|\| defined(PROTO)
				1723	/*
				1724	* Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
				1725	* The first 64 entries have been added to map control characters defined in
				1726	* ascii.h
				1727	*/
				1728	static char_u ebcdic2ascii_tab[256] =
				1729	{
				1730	0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
				1731	0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
				1732	0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
				1733	0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
				1734	0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
				1735	0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
				1736	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
				1737	0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
				1738	0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
				1739	0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
				1740	0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
				1741	0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
				1742	0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
				1743	0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
				1744	0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
				1745	0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
				1746	0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
				1747	0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
				1748	0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
				1749	0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
				1750	0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
				1751	0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
				1752	0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
				1753	0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
				1754	0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
				1755	0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
				1756	0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
				1757	0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
				1758	0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
				1759	0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
				1760	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
				1761	0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
				1762	};
				1763
				1764	/*
				1765	* Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
				1766	* wanting 7-bit ASCII characters out the other end.
				1767	*/
				1768	void
				1769	ebcdic2ascii(buffer, len)
				1770	char_u *buffer;
				1771	int len;
				1772	{
				1773	int i;
				1774
				1775	for (i = 0; i < len; i++)
				1776	buffer[i] = ebcdic2ascii_tab[buffer[i]];
				1777	}
				1778	#endif