Blame - src/charset.c - android_external_vim

blob: 25680f677d769a57fd14f9303adb00499a943466 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	#include "vim.h"
				11
				12	#ifdef FEAT_LINEBREAK
				13	static int win_chartabsize __ARGS((win_T wp, char_u p, colnr_T col));
				14	#endif
				15
				16	#ifdef FEAT_MBYTE
				17	static int win_nolbr_chartabsize __ARGS((win_T wp, char_u s, colnr_T col, int *headp));
				18	#endif
				19
				20	static int nr2hex __ARGS((int c));
				21
				22	static int chartab_initialized = FALSE;
				23
				24	/* b_chartab[] is an array of 32 bytes, each bit representing one of the
				25	* characters 0-255. */
				26	#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] \|= (1 << ((c) & 0x7))
				27	#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
				28	#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
				29
				30	/*
				31	* Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
				32	* characters for current buffer.
				33	*
				34	* Depends on the option settings 'iskeyword', 'isident', 'isfname',
				35	* 'isprint' and 'encoding'.
				36	*
				37	* The index in chartab[] depends on 'encoding':
				38	* - For non-multi-byte index with the byte (same as the character).
				39	* - For DBCS index with the first byte.
				40	* - For UTF-8 index with the character (when first byte is up to 0x80 it is
				41	* the same as the character, if the first byte is 0x80 and above it depends
				42	* on further bytes).
				43	*
				44	* The contents of chartab[]:
				45	* - The lower two bits, masked by CT_CELL_MASK, give the number of display
				46	* cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
				47	* - CT_PRINT_CHAR bit is set when the character is printable (no need to
				48	* translate the character before displaying it). Note that only DBCS
				49	* characters can have 2 display cells and still be printable.
				50	* - CT_FNAME_CHAR bit is set when the character can be in a file name.
				51	* - CT_ID_CHAR bit is set when the character can be in an identifier.
				52	*
				53	* Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
				54	* error, OK otherwise.
				55	*/
				56	int
				57	init_chartab()
				58	{
				59	return buf_init_chartab(curbuf, TRUE);
				60	}
				61
				62	int
				63	buf_init_chartab(buf, global)
				64	buf_T *buf;
				65	int global; /* FALSE: only set buf->b_chartab[] */
				66	{
				67	int c;
				68	int c2;
				69	char_u *p;
				70	int i;
				71	int tilde;
				72	int do_isalpha;
				73
				74	if (global)
				75	{
				76	/*
				77	* Set the default size for printable characters:
				78	* From <Space> to '~' is 1 (printable), others are 2 (not printable).
				79	* This also inits all 'isident' and 'isfname' flags to FALSE.
				80	*
				81	* EBCDIC: all chars below ' ' are not printable, all others are
				82	* printable.
				83	*/
				84	c = 0;
				85	while (c < ' ')
				86	chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
				87	#ifdef EBCDIC
				88	while (c < 255)
				89	#else
				90	while (c <= '~')
				91	#endif
				92	chartab[c++] = 1 + CT_PRINT_CHAR;
				93	#ifdef FEAT_FKMAP
				94	if (p_altkeymap)
				95	{
				96	while (c < YE)
				97	chartab[c++] = 1 + CT_PRINT_CHAR;
				98	}
				99	#endif
				100	while (c < 256)
				101	{
				102	#ifdef FEAT_MBYTE
				103	/* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
				104	if (enc_utf8 && c >= 0xa0)
				105	chartab[c++] = CT_PRINT_CHAR + 1;
				106	/* euc-jp characters starting with 0x8e are single width */
				107	else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
				108	chartab[c++] = CT_PRINT_CHAR + 1;
				109	/* other double-byte chars can be printable AND double-width */
				110	else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
				111	chartab[c++] = CT_PRINT_CHAR + 2;
				112	else
				113	#endif
				114	/* the rest is unprintable by default */
				115	chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
				116	}
				117
				118	#ifdef FEAT_MBYTE
				119	/* Assume that every multi-byte char is a filename character. */
				120	for (c = 1; c < 256; ++c)
				121	if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
				122	\|\| (enc_dbcs == DBCS_JPNU && c == 0x8e)
				123	\|\| (enc_utf8 && c >= 0xa0))
				124	chartab[c] \|= CT_FNAME_CHAR;
				125	#endif
				126	}
				127
				128	/*
				129	* Init word char flags all to FALSE
				130	*/
				131	vim_memset(buf->b_chartab, 0, (size_t)32);
				132	#ifdef FEAT_MBYTE
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	133	if (enc_dbcs != 0)
				134	for (c = 0; c < 256; ++c)
				135	{
				136	/* double-byte characters are probably word characters */
				137	if (MB_BYTE2LEN(c) == 2)
				138	SET_CHARTAB(buf, c);
				139	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	140	#endif
				141
				142	#ifdef FEAT_LISP
				143	/*
				144	* In lisp mode the '-' character is included in keywords.
				145	*/
				146	if (buf->b_p_lisp)
				147	SET_CHARTAB(buf, '-');
				148	#endif
				149
				150	/* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
				151	* options Each option is a list of characters, character numbers or
				152	* ranges, separated by commas, e.g.: "200-210,x,#-178,-"
				153	*/
				154	for (i = global ? 0 : 3; i <= 3; ++i)
				155	{
				156	if (i == 0)
				157	p = p_isi; /* first round: 'isident' */
				158	else if (i == 1)
				159	p = p_isp; /* second round: 'isprint' */
				160	else if (i == 2)
				161	p = p_isf; /* third round: 'isfname' */
				162	else /* i == 3 */
				163	p = buf->b_p_isk; /* fourth round: 'iskeyword' */
				164
				165	while (*p)
				166	{
				167	tilde = FALSE;
				168	do_isalpha = FALSE;
				169	if (*p == '^' && p[1] != NUL)
				170	{
				171	tilde = TRUE;
				172	++p;
				173	}
				174	if (VIM_ISDIGIT(*p))
				175	c = getdigits(&p);
				176	else
				177	c = *p++;
				178	c2 = -1;
				179	if (*p == '-' && p[1] != NUL)
				180	{
				181	++p;
				182	if (VIM_ISDIGIT(*p))
				183	c2 = getdigits(&p);
				184	else
				185	c2 = *p++;
				186	}
				187	if (c <= 0 \|\| (c2 < c && c2 != -1) \|\| c2 >= 256
				188	\|\| !(p == NUL \|\| p == ','))
				189	return FAIL;
				190
				191	if (c2 == -1) /* not a range */
				192	{
				193	/*
				194	* A single '@' (not "@-@"):
				195	* Decide on letters being ID/printable/keyword chars with
				196	* standard function isalpha(). This takes care of locale for
				197	* single-byte characters).
				198	*/
				199	if (c == '@')
				200	{
				201	do_isalpha = TRUE;
				202	c = 1;
				203	c2 = 255;
				204	}
				205	else
				206	c2 = c;
				207	}
				208	while (c <= c2)
				209	{
				210	if (!do_isalpha \|\| isalpha(c)
				211	#ifdef FEAT_FKMAP
				212	\|\| (p_altkeymap && (F_isalpha(c) \|\| F_isdigit(c)))
				213	#endif
				214	)
				215	{
				216	if (i == 0) /* (re)set ID flag */
				217	{
				218	if (tilde)
				219	chartab[c] &= ~CT_ID_CHAR;
				220	else
				221	chartab[c] \|= CT_ID_CHAR;
				222	}
				223	else if (i == 1) /* (re)set printable */
				224	{
				225	if ((c < ' '
				226	#ifndef EBCDIC
				227	\|\| c > '~'
				228	#endif
				229	#ifdef FEAT_FKMAP
				230	\|\| (p_altkeymap
				231	&& (F_isalpha(c) \|\| F_isdigit(c)))
				232	#endif
				233	)
				234	#ifdef FEAT_MBYTE
				235	/* For double-byte we keep the cell width, so
				236	* that we can detect it from the first byte. */
				237	&& !(enc_dbcs && MB_BYTE2LEN(c) == 2)
				238	#endif
				239	)
				240	{
				241	if (tilde)
				242	{
				243	chartab[c] = (chartab[c] & ~CT_CELL_MASK)
				244	+ ((dy_flags & DY_UHEX) ? 4 : 2);
				245	chartab[c] &= ~CT_PRINT_CHAR;
				246	}
				247	else
				248	{
				249	chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
				250	chartab[c] \|= CT_PRINT_CHAR;
				251	}
				252	}
				253	}
				254	else if (i == 2) /* (re)set fname flag */
				255	{
				256	if (tilde)
				257	chartab[c] &= ~CT_FNAME_CHAR;
				258	else
				259	chartab[c] \|= CT_FNAME_CHAR;
				260	}
				261	else /* i == 3 / / (re)set keyword flag */
				262	{
				263	if (tilde)
				264	RESET_CHARTAB(buf, c);
				265	else
				266	SET_CHARTAB(buf, c);
				267	}
				268	}
				269	++c;
				270	}
				271	p = skip_to_option_part(p);
				272	}
				273	}
				274	chartab_initialized = TRUE;
				275	return OK;
				276	}
				277
				278	/*
				279	* Translate any special characters in buf[bufsize] in-place.
				280	* The result is a string with only printable characters, but if there is not
				281	* enough room, not all characters will be translated.
				282	*/
				283	void
				284	trans_characters(buf, bufsize)
				285	char_u *buf;
				286	int bufsize;
				287	{
				288	int len; /* length of string needing translation */
				289	int room; /* room in buffer after string */
				290	char_u trs; / translated character */
				291	int trs_len; /* length of trs[] */
				292
				293	len = (int)STRLEN(buf);
				294	room = bufsize - len;
				295	while (*buf != 0)
				296	{
				297	# ifdef FEAT_MBYTE
				298	/* Assume a multi-byte character doesn't need translation. */
				299	if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
				300	len -= trs_len;
				301	else
				302	# endif
				303	{
				304	trs = transchar_byte(*buf);
				305	trs_len = (int)STRLEN(trs);
				306	if (trs_len > 1)
				307	{
				308	room -= trs_len - 1;
				309	if (room <= 0)
				310	return;
				311	mch_memmove(buf + trs_len, buf + 1, (size_t)len);
				312	}
				313	mch_memmove(buf, trs, (size_t)trs_len);
				314	--len;
				315	}
				316	buf += trs_len;
				317	}
				318	}
				319
				320	#if defined(FEAT_EVAL) \|\| defined(FEAT_TITLE) \|\| defined(PROTO)
				321	/*
				322	* Translate a string into allocated memory, replacing special chars with
				323	* printable chars. Returns NULL when out of memory.
				324	*/
				325	char_u *
				326	transstr(s)
				327	char_u *s;
				328	{
				329	char_u *res;
				330	char_u *p;
				331	#ifdef FEAT_MBYTE
				332	int l, len, c;
				333	char_u hexbuf[11];
				334	#endif
				335
				336	#ifdef FEAT_MBYTE
				337	if (has_mbyte)
				338	{
				339	/* Compute the length of the result, taking account of unprintable
				340	* multi-byte characters. */
				341	len = 0;
				342	p = s;
				343	while (*p != NUL)
				344	{
				345	if ((l = (*mb_ptr2len_check)(p)) > 1)
				346	{
				347	c = (*mb_ptr2char)(p);
				348	p += l;
				349	if (vim_isprintc(c))
				350	len += l;
				351	else
				352	{
				353	transchar_hex(hexbuf, c);
				354	len += STRLEN(hexbuf);
				355	}
				356	}
				357	else
				358	{
				359	l = byte2cells(*p++);
				360	if (l > 0)
				361	len += l;
				362	else
				363	len += 4; /* illegal byte sequence */
				364	}
				365	}
				366	res = alloc((unsigned)(len + 1));
				367	}
				368	else
				369	#endif
				370	res = alloc((unsigned)(vim_strsize(s) + 1));
				371	if (res != NULL)
				372	{
				373	*res = NUL;
				374	p = s;
				375	while (*p != NUL)
				376	{
				377	#ifdef FEAT_MBYTE
				378	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				379	{
				380	c = (*mb_ptr2char)(p);
				381	if (vim_isprintc(c))
				382	STRNCAT(res, p, l); /* append printable multi-byte char */
				383	else
				384	transchar_hex(res + STRLEN(res), c);
				385	p += l;
				386	}
				387	else
				388	#endif
				389	STRCAT(res, transchar_byte(*p++));
				390	}
				391	}
				392	return res;
				393	}
				394	#endif
				395
				396	#if defined(FEAT_SYN_HL) \|\| defined(FEAT_INS_EXPAND) \|\| defined(PROTO)
				397	/*
Bram Moolenaar	217ad92	2005-03-20 22:37:15 +0000	[diff] [blame]	398	* Convert the string "str[orglen]" to do ignore-case comparing. Uses the
				399	* current locale.
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	400	* When "buf" is NULL returns an allocated string (NULL for out-of-memory).
				401	* Otherwise puts the result in "buf[buflen]".
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	402	*/
				403	char_u *
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	404	str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	405	char_u *str;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	406	int orglen;
				407	char_u *buf;
				408	int buflen;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	409	{
				410	garray_T ga;
				411	int i;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	412	int len = orglen;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	413
				414	#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
				415	#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	416	#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
				417	#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	418
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	419	/* Copy "str" into "buf" or allocated memory, unmodified. */
				420	if (buf == NULL)
				421	{
				422	ga_init2(&ga, 1, 10);
				423	if (ga_grow(&ga, len + 1) == FAIL)
				424	return NULL;
				425	mch_memmove(ga.ga_data, str, (size_t)len);
				426	ga.ga_len = len;
				427	}
				428	else
				429	{
				430	if (len >= buflen) /* Ugly! */
				431	len = buflen - 1;
				432	mch_memmove(buf, str, (size_t)len);
				433	}
				434	if (buf == NULL)
				435	GA_CHAR(len) = NUL;
				436	else
				437	buf[len] = NUL;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	438
				439	/* Make each character lower case. */
				440	i = 0;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	441	while (STR_CHAR(i) != NUL)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	442	{
				443	#ifdef FEAT_MBYTE
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	444	if (enc_utf8 \|\| (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	445	{
				446	if (enc_utf8)
				447	{
				448	int c, lc;
				449
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	450	c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	451	lc = utf_tolower(c);
				452	if (c != lc)
				453	{
				454	int ol = utf_char2len(c);
				455	int nl = utf_char2len(lc);
				456
				457	/* If the byte length changes need to shift the following
				458	* characters forward or backward. */
				459	if (ol != nl)
				460	{
				461	if (nl > ol)
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	462	{
				463	if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
				464	: len + nl - ol >= buflen)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	465	{
				466	/* out of memory, keep old char */
				467	lc = c;
				468	nl = ol;
				469	}
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	470	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	471	if (ol != nl)
				472	{
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	473	if (buf == NULL)
				474	{
				475	mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	476	STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	477	ga.ga_len += nl - ol;
				478	}
				479	else
				480	{
				481	mch_memmove(buf + i + nl, buf + i + ol,
				482	STRLEN(buf + i + ol) + 1);
				483	len += nl - ol;
				484	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	485	}
				486	}
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	487	(void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	488	}
				489	}
				490	/* skip to next multi-byte char */
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	491	i += (*mb_ptr2len_check)(STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	492	}
				493	else
				494	#endif
				495	{
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	496	if (buf == NULL)
				497	GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
				498	else
				499	buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	500	++i;
				501	}
				502	}
				503
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	504	if (buf == NULL)
				505	return (char_u *)ga.ga_data;
				506	return buf;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	507	}
				508	#endif
				509
				510	/*
				511	* Catch 22: chartab[] can't be initialized before the options are
				512	* initialized, and initializing options may cause transchar() to be called!
				513	* When chartab_initialized == FALSE don't use chartab[].
				514	* Does NOT work for multi-byte characters, c must be <= 255.
				515	* Also doesn't work for the first byte of a multi-byte, "c" must be a
				516	* character!
				517	*/
				518	static char_u transchar_buf[7];
				519
				520	char_u *
				521	transchar(c)
				522	int c;
				523	{
				524	int i;
				525
				526	i = 0;
				527	if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
				528	{
				529	transchar_buf[0] = '~';
				530	transchar_buf[1] = '@';
				531	i = 2;
				532	c = K_SECOND(c);
				533	}
				534
				535	if ((!chartab_initialized && (
				536	#ifdef EBCDIC
				537	(c >= 64 && c < 255)
				538	#else
				539	(c >= ' ' && c <= '~')
				540	#endif
				541	#ifdef FEAT_FKMAP
				542	\|\| F_ischar(c)
				543	#endif
				544	)) \|\| (c < 256 && vim_isprintc_strict(c)))
				545	{
				546	/* printable character */
				547	transchar_buf[i] = c;
				548	transchar_buf[i + 1] = NUL;
				549	}
				550	else
				551	transchar_nonprint(transchar_buf + i, c);
				552	return transchar_buf;
				553	}
				554
				555	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				556	/*
				557	* Like transchar(), but called with a byte instead of a character. Checks
				558	* for an illegal UTF-8 byte.
				559	*/
				560	char_u *
				561	transchar_byte(c)
				562	int c;
				563	{
				564	if (enc_utf8 && c >= 0x80)
				565	{
				566	transchar_nonprint(transchar_buf, c);
				567	return transchar_buf;
				568	}
				569	return transchar(c);
				570	}
				571	#endif
				572
				573	/*
				574	* Convert non-printable character to two or more printable characters in
				575	* "buf[]". "buf" needs to be able to hold five bytes.
				576	* Does NOT work for multi-byte characters, c must be <= 255.
				577	*/
				578	void
				579	transchar_nonprint(buf, c)
				580	char_u *buf;
				581	int c;
				582	{
				583	if (c == NL)
				584	c = NUL; /* we use newline in place of a NUL */
				585	else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
				586	c = NL; /* we use CR in place of NL in this case */
				587
				588	if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
				589	transchar_hex(buf, c);
				590
				591	#ifdef EBCDIC
				592	/* For EBCDIC only the characters 0-63 and 255 are not printable */
				593	else if (CtrlChar(c) != 0 \|\| c == DEL)
				594	#else
				595	else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
				596	#endif
				597	{
				598	buf[0] = '^';
				599	#ifdef EBCDIC
				600	if (c == DEL)
				601	buf[1] = '?'; /* DEL displayed as ^? */
				602	else
				603	buf[1] = CtrlChar(c);
				604	#else
				605	buf[1] = c ^ 0x40; /* DEL displayed as ^? */
				606	#endif
				607
				608	buf[2] = NUL;
				609	}
				610	#ifdef FEAT_MBYTE
				611	else if (enc_utf8 && c >= 0x80)
				612	{
				613	transchar_hex(buf, c);
				614	}
				615	#endif
				616	#ifndef EBCDIC
				617	else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
				618	{
				619	buf[0] = '\|';
				620	buf[1] = c - 0x80;
				621	buf[2] = NUL;
				622	}
				623	#else
				624	else if (c < 64)
				625	{
				626	buf[0] = '~';
				627	buf[1] = MetaChar(c);
				628	buf[2] = NUL;
				629	}
				630	#endif
				631	else /* 0x80 - 0x9f and 0xff */
				632	{
				633	/*
				634	* TODO: EBCDIC I don't know what to do with this chars, so I display
				635	* them as '~?' for now
				636	*/
				637	buf[0] = '~';
				638	#ifdef EBCDIC
				639	buf[1] = '?'; /* 0xff displayed as ~? */
				640	#else
				641	buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
				642	#endif
				643	buf[2] = NUL;
				644	}
				645	}
				646
				647	void
				648	transchar_hex(buf, c)
				649	char_u *buf;
				650	int c;
				651	{
				652	int i = 0;
				653
				654	buf[0] = '<';
				655	#ifdef FEAT_MBYTE
				656	if (c > 255)
				657	{
				658	buf[++i] = nr2hex((unsigned)c >> 12);
				659	buf[++i] = nr2hex((unsigned)c >> 8);
				660	}
				661	#endif
				662	buf[++i] = nr2hex((unsigned)c >> 4);
				663	buf[++i] = nr2hex(c);
				664	buf[++i] = '>';
				665	buf[++i] = NUL;
				666	}
				667
				668	/*
				669	* Convert the lower 4 bits of byte "c" to its hex character.
				670	* Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
				671	* function key 1.
				672	*/
				673	static int
				674	nr2hex(c)
				675	int c;
				676	{
				677	if ((c & 0xf) <= 9)
				678	return (c & 0xf) + '0';
				679	return (c & 0xf) - 10 + 'a';
				680	}
				681
				682	/*
				683	* Return number of display cells occupied by byte "b".
				684	* Caller must make sure 0 <= b <= 255.
				685	* For multi-byte mode "b" must be the first byte of a character.
				686	* A TAB is counted as two cells: "^I".
				687	* For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
				688	* cells depends on further bytes.
				689	*/
				690	int
				691	byte2cells(b)
				692	int b;
				693	{
				694	#ifdef FEAT_MBYTE
				695	if (enc_utf8 && b >= 0x80)
				696	return 0;
				697	#endif
				698	return (chartab[b] & CT_CELL_MASK);
				699	}
				700
				701	/*
				702	* Return number of display cells occupied by character "c".
				703	* "c" can be a special key (negative number) in which case 3 or 4 is returned.
				704	* A TAB is counted as two cells: "^I" or four: "<09>".
				705	*/
				706	int
				707	char2cells(c)
				708	int c;
				709	{
				710	if (IS_SPECIAL(c))
				711	return char2cells(K_SECOND(c)) + 2;
				712	#ifdef FEAT_MBYTE
				713	if (c >= 0x80)
				714	{
				715	/* UTF-8: above 0x80 need to check the value */
				716	if (enc_utf8)
				717	return utf_char2cells(c);
				718	/* DBCS: double-byte means double-width, except for euc-jp with first
				719	* byte 0x8e */
				720	if (enc_dbcs != 0 && c >= 0x100)
				721	{
				722	if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
				723	return 1;
				724	return 2;
				725	}
				726	}
				727	#endif
				728	return (chartab[c & 0xff] & CT_CELL_MASK);
				729	}
				730
				731	/*
				732	* Return number of display cells occupied by character at "*p".
				733	* A TAB is counted as two cells: "^I" or four: "<09>".
				734	*/
				735	int
				736	ptr2cells(p)
				737	char_u *p;
				738	{
				739	#ifdef FEAT_MBYTE
				740	/* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
				741	if (enc_utf8 && *p >= 0x80)
				742	return utf_ptr2cells(p);
				743	/* For DBCS we can tell the cell count from the first byte. */
				744	#endif
				745	return (chartab[*p] & CT_CELL_MASK);
				746	}
				747
				748	/*
				749	* Return the number of characters string "s" will take on the screen,
				750	* counting TABs as two characters: "^I".
				751	*/
				752	int
				753	vim_strsize(s)
				754	char_u *s;
				755	{
				756	return vim_strnsize(s, (int)MAXCOL);
				757	}
				758
				759	/*
				760	* Return the number of characters string "s[len]" will take on the screen,
				761	* counting TABs as two characters: "^I".
				762	*/
				763	int
				764	vim_strnsize(s, len)
				765	char_u *s;
				766	int len;
				767	{
				768	int size = 0;
				769
				770	while (*s != NUL && --len >= 0)
				771	{
				772	#ifdef FEAT_MBYTE
				773	if (has_mbyte)
				774	{
				775	int l = (*mb_ptr2len_check)(s);
				776
				777	size += ptr2cells(s);
				778	s += l;
				779	len -= l - 1;
				780	}
				781	else
				782	#endif
				783	size += byte2cells(*s++);
				784	}
				785	return size;
				786	}
				787
				788	/*
				789	* Return the number of characters 'c' will take on the screen, taking
				790	* into account the size of a tab.
				791	* Use a define to make it fast, this is used very often!!!
				792	* Also see getvcol() below.
				793	*/
				794
				795	#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
				796	if (*(p) == TAB && (!(wp)->w_p_list \|\| lcs_tab1)) \
				797	{ \
				798	int ts; \
				799	ts = (buf)->b_p_ts; \
				800	return (int)(ts - (col % ts)); \
				801	} \
				802	else \
				803	return ptr2cells(p);
				804
				805	#if defined(FEAT_VREPLACE) \|\| defined(FEAT_EX_EXTRA) \|\| defined(FEAT_GUI) \
				806	\|\| defined(FEAT_VIRTUALEDIT) \|\| defined(PROTO)
				807	int
				808	chartabsize(p, col)
				809	char_u *p;
				810	colnr_T col;
				811	{
				812	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
				813	}
				814	#endif
				815
				816	#ifdef FEAT_LINEBREAK
				817	static int
				818	win_chartabsize(wp, p, col)
				819	win_T *wp;
				820	char_u *p;
				821	colnr_T col;
				822	{
				823	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
				824	}
				825	#endif
				826
				827	/*
				828	* return the number of characters the string 's' will take on the screen,
				829	* taking into account the size of a tab
				830	*/
				831	int
				832	linetabsize(s)
				833	char_u *s;
				834	{
				835	colnr_T col = 0;
				836
				837	while (*s != NUL)
				838	col += lbr_chartabsize_adv(&s, col);
				839	return (int)col;
				840	}
				841
				842	/*
				843	* Like linetabsize(), but for a given window instead of the current one.
				844	*/
				845	int
				846	win_linetabsize(wp, p, len)
				847	win_T *wp;
				848	char_u *p;
				849	colnr_T len;
				850	{
				851	colnr_T col = 0;
				852	char_u *s;
				853
Bram Moolenaar	b5bf5b8	2004-12-24 14:35:23 +0000	[diff] [blame]	854	for (s = p; *s != NUL && (len == MAXCOL \|\| s < p + len); mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	855	col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	856	return (int)col;
				857	}
				858
				859	/*
Bram Moolenaar	8169525	2004-12-29 20:58:21 +0000	[diff] [blame]	860	* Return TRUE if 'c' is a normal identifier character:
				861	* Letters and characters from the 'isident' option.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	862	*/
				863	int
				864	vim_isIDc(c)
				865	int c;
				866	{
				867	return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
				868	}
				869
				870	/*
				871	* return TRUE if 'c' is a keyword character: Letters and characters from
				872	* 'iskeyword' option for current buffer.
				873	* For multi-byte characters mb_get_class() is used (builtin rules).
				874	*/
				875	int
				876	vim_iswordc(c)
				877	int c;
				878	{
				879	#ifdef FEAT_MBYTE
				880	if (c >= 0x100)
				881	{
				882	if (enc_dbcs != 0)
				883	return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
				884	if (enc_utf8)
				885	return utf_class(c) >= 2;
				886	}
				887	#endif
				888	return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
				889	}
				890
				891	/*
				892	* Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
				893	*/
				894	int
				895	vim_iswordp(p)
				896	char_u *p;
				897	{
				898	#ifdef FEAT_MBYTE
				899	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				900	return mb_get_class(p) >= 2;
				901	#endif
				902	return GET_CHARTAB(curbuf, *p) != 0;
				903	}
				904
				905	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				906	int
				907	vim_iswordc_buf(p, buf)
				908	char_u *p;
				909	buf_T *buf;
				910	{
				911	# ifdef FEAT_MBYTE
				912	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				913	return mb_get_class(p) >= 2;
				914	# endif
				915	return (GET_CHARTAB(buf, *p) != 0);
				916	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	917
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	918	/*
				919	* The tables used for spelling. These are only used for the first 256
				920	* characters.
				921	*/
				922	typedef struct spelltab_S
				923	{
				924	char_u st_isw[256]; /* flags: is word char */
				925	char_u st_isu[256]; /* flags: is uppercase char */
				926	char_u st_fold[256]; /* chars: folded case */
				927	} spelltab_T;
				928
				929	static spelltab_T spelltab;
				930	static int did_set_spelltab;
				931
				932	#define SPELL_ISWORD 1
				933	#define SPELL_ISUPPER 2
				934
				935	static void clear_spell_chartab __ARGS((spelltab_T *sp));
				936	static int set_spell_finish __ARGS((spelltab_T *new_st));
				937
				938	/*
				939	* Init the chartab used for spelling for ASCII.
				940	* EBCDIC is not supported!
				941	*/
				942	static void
				943	clear_spell_chartab(sp)
				944	spelltab_T *sp;
				945	{
				946	int i;
				947
				948	/* Init everything to FALSE. */
				949	vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
				950	vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
				951	for (i = 0; i < 256; ++i)
				952	sp->st_fold[i] = i;
				953
				954	/* We include digits. A word shouldn't start with a digit, but handling
				955	* that is done separately. */
				956	for (i = '0'; i <= '9'; ++i)
				957	sp->st_isw[i] = TRUE;
				958	for (i = 'A'; i <= 'Z'; ++i)
				959	{
				960	sp->st_isw[i] = TRUE;
				961	sp->st_isu[i] = TRUE;
				962	sp->st_fold[i] = i + 0x20;
				963	}
				964	for (i = 'a'; i <= 'z'; ++i)
				965	sp->st_isw[i] = TRUE;
				966	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	967
				968	/*
				969	* Init the chartab used for spelling. Only depends on 'encoding'.
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	970	* Called once while starting up and when 'encoding' changes.
				971	* The default is to use isalpha(), but the spell file should define the word
				972	* characters to make it possible that 'encoding' differs from the current
				973	* locale.
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	974	*/
				975	void
				976	init_spell_chartab()
				977	{
				978	int i;
				979
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	980	did_set_spelltab = FALSE;
				981	clear_spell_chartab(&spelltab);
				982
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	983	#ifdef FEAT_MBYTE
				984	if (enc_dbcs)
				985	{
				986	/* DBCS: assume double-wide characters are word characters. */
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	987	for (i = 128; i <= 255; ++i)
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	988	if (MB_BYTE2LEN(i) == 2)
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	989	spelltab.st_isw[i] = TRUE;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	990	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	991	else
				992	#endif
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	993	{
Bram Moolenaar	c91506a	2005-04-24 22:04:21 +0000	[diff] [blame]	994	/* Rough guess: use isalpha() and isupper() for characters above 128.
				995	* */
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	996	for (i = 128; i < 256; ++i)
				997	{
Bram Moolenaar	c91506a	2005-04-24 22:04:21 +0000	[diff] [blame]	998	spelltab.st_isw[i] = MB_ISUPPER(i) \|\| MB_ISLOWER(i);
				999	if (MB_ISUPPER(i))
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1000	{
				1001	spelltab.st_isu[i] = TRUE;
Bram Moolenaar	c91506a	2005-04-24 22:04:21 +0000	[diff] [blame]	1002	spelltab.st_fold[i] = MB_TOLOWER(i);
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1003	}
				1004	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1005	}
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1006	}
				1007
Bram Moolenaar	051b782	2005-05-19 21:00:46 +0000	[diff] [blame]	1008	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1009	static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
				1010	static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
				1011
				1012	/*
				1013	* Set the spell character tables from strings in the affix file.
				1014	*/
				1015	int
				1016	set_spell_chartab(fol, low, upp)
				1017	char_u *fol;
				1018	char_u *low;
				1019	char_u *upp;
				1020	{
				1021	/* We build the new tables here first, so that we can compare with the
				1022	* previous one. */
				1023	spelltab_T new_st;
				1024	char_u pf = fol, pl = low, *pu = upp;
				1025	int f, l, u;
				1026
				1027	clear_spell_chartab(&new_st);
				1028
				1029	while (*pf != NUL)
				1030	{
				1031	if (pl == NUL \|\| pu == NUL)
				1032	{
				1033	EMSG(_(e_affform));
				1034	return FAIL;
				1035	}
				1036	#ifdef FEAT_MBYTE
				1037	f = mb_ptr2char_adv(&pf);
				1038	l = mb_ptr2char_adv(&pl);
				1039	u = mb_ptr2char_adv(&pu);
				1040	#else
				1041	f = *pf++;
				1042	l = *pl++;
				1043	u = *pu++;
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1044	#endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1045	/* Every character that appears is a word character. */
				1046	if (f < 256)
				1047	new_st.st_isw[f] = TRUE;
				1048	if (l < 256)
				1049	new_st.st_isw[l] = TRUE;
				1050	if (u < 256)
				1051	new_st.st_isw[u] = TRUE;
				1052
				1053	/* if "LOW" and "FOL" are not the same the "LOW" char needs
				1054	* case-folding */
				1055	if (l < 256 && l != f)
				1056	{
				1057	if (f >= 256)
				1058	{
				1059	EMSG(_(e_affrange));
				1060	return FAIL;
				1061	}
				1062	new_st.st_fold[l] = f;
				1063	}
				1064
				1065	/* if "UPP" and "FOL" are not the same the "UPP" char needs
				1066	* case-folding and it's upper case. */
				1067	if (u < 256 && u != f)
				1068	{
				1069	if (f >= 256)
				1070	{
				1071	EMSG(_(e_affrange));
				1072	return FAIL;
				1073	}
				1074	new_st.st_fold[u] = f;
				1075	new_st.st_isu[u] = TRUE;
				1076	}
				1077	}
				1078
				1079	if (pl != NUL \|\| pu != NUL)
				1080	{
				1081	EMSG(_(e_affform));
				1082	return FAIL;
				1083	}
				1084
				1085	return set_spell_finish(&new_st);
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1086	}
Bram Moolenaar	051b782	2005-05-19 21:00:46 +0000	[diff] [blame]	1087	#endif
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1088
				1089	/*
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1090	* Set the spell character tables from strings in the .spl file.
				1091	*/
				1092	int
				1093	set_spell_charflags(flags, cnt, upp)
				1094	char_u *flags;
				1095	int cnt;
				1096	char_u *upp;
				1097	{
				1098	/* We build the new tables here first, so that we can compare with the
				1099	* previous one. */
				1100	spelltab_T new_st;
				1101	int i;
				1102	char_u *p = upp;
				1103
				1104	clear_spell_chartab(&new_st);
				1105
				1106	for (i = 0; i < cnt; ++i)
				1107	{
				1108	new_st.st_isw[i + 128] = (flags[i] & SPELL_ISWORD) != 0;
				1109	new_st.st_isu[i + 128] = (flags[i] & SPELL_ISUPPER) != 0;
				1110
				1111	if (*p == NUL)
				1112	return FAIL;
				1113	#ifdef FEAT_MBYTE
				1114	new_st.st_fold[i + 128] = mb_ptr2char_adv(&p);
				1115	#else
				1116	new_st.st_fold[i + 128] = *p++;
				1117	#endif
				1118	}
				1119
				1120	return set_spell_finish(&new_st);
				1121	}
				1122
				1123	static int
				1124	set_spell_finish(new_st)
				1125	spelltab_T *new_st;
				1126	{
				1127	int i;
				1128
				1129	if (did_set_spelltab)
				1130	{
				1131	/* check that it's the same table */
				1132	for (i = 0; i < 256; ++i)
				1133	{
				1134	if (spelltab.st_isw[i] != new_st->st_isw[i]
				1135	\|\| spelltab.st_isu[i] != new_st->st_isu[i]
				1136	\|\| spelltab.st_fold[i] != new_st->st_fold[i])
				1137	{
				1138	EMSG(_("E763: Word characters differ between spell files"));
				1139	return FAIL;
				1140	}
				1141	}
				1142	}
				1143	else
				1144	{
				1145	/* copy the new spelltab into the one being used */
				1146	spelltab = *new_st;
				1147	did_set_spelltab = TRUE;
				1148	}
				1149
				1150	return OK;
				1151	}
				1152
				1153	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1154	/*
				1155	* Write the current tables into the .spl file.
Bram Moolenaar	c91506a	2005-04-24 22:04:21 +0000	[diff] [blame]	1156	* This makes sure the same characters are recognized as word characters when
				1157	* generating an when using a spell file.
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1158	*/
				1159	void
				1160	write_spell_chartab(fd)
				1161	FILE *fd;
				1162	{
				1163	char_u charbuf[256 * 4];
				1164	int len = 0;
				1165	int flags;
				1166	int i;
				1167
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1168	fputc(128, fd); /* <charflagslen> */
				1169	for (i = 128; i < 256; ++i)
				1170	{
				1171	flags = 0;
				1172	if (spelltab.st_isw[i])
				1173	flags \|= SPELL_ISWORD;
				1174	if (spelltab.st_isu[i])
				1175	flags \|= SPELL_ISUPPER;
				1176	fputc(flags, fd); /* <charflags> */
				1177
				1178	len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
				1179	}
				1180
				1181	put_bytes(fd, (long_u)len, 2); /* <fcharlen> */
				1182	fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */
				1183	}
				1184	#endif
				1185
				1186	/*
				1187	* Return TRUE if "p" points to a word character for spelling.
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1188	*/
				1189	int
				1190	spell_iswordc(p)
				1191	char_u *p;
				1192	{
				1193	# ifdef FEAT_MBYTE
				1194	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				1195	return mb_get_class(p) >= 2;
				1196	# endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1197	return spelltab.st_isw[*p];
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1198	}
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1199
				1200	/*
				1201	* Return TRUE if "c" is an upper-case character for spelling.
				1202	*/
				1203	int
				1204	spell_isupper(c)
				1205	int c;
				1206	{
				1207	# ifdef FEAT_MBYTE
				1208	if (enc_utf8)
				1209	{
				1210	/* For Unicode we can call utf_isupper(), but don't do that for ASCII,
				1211	* because we don't want to use 'casemap' here. */
				1212	if (c >= 128)
				1213	return utf_isupper(c);
				1214	}
				1215	else if (has_mbyte && c > 256)
				1216	{
				1217	/* For characters above 255 we don't have something specfied.
				1218	* Fall back to locale-dependent iswupper(). If not available
				1219	* simply return FALSE. */
				1220	# ifdef HAVE_ISWUPPER
				1221	return iswupper(c);
				1222	# else
				1223	return FALSE;
				1224	# endif
				1225	}
				1226	# endif
				1227	return spelltab.st_isu[c];
				1228	}
				1229
				1230	/*
				1231	* case-fold "p[len]" into "buf[buflen]". Used for spell checking.
				1232	* Returns FAIL when something wrong.
				1233	*/
				1234	int
				1235	spell_casefold(p, len, buf, buflen)
				1236	char_u *p;
				1237	int len;
				1238	char_u *buf;
				1239	int buflen;
				1240	{
				1241	int i;
				1242
				1243	if (len >= buflen)
				1244	{
				1245	buf[0] = NUL;
				1246	return FAIL; /* result will not fit */
				1247	}
				1248
				1249	#ifdef FEAT_MBYTE
				1250	if (has_mbyte)
				1251	{
				1252	int c;
				1253	int outi = 0;
				1254
				1255	/* Fold one character at a time. */
				1256	for (i = 0; i < len; i += mb_ptr2len_check(p + i))
				1257	{
				1258	c = mb_ptr2char(p + i);
				1259	if (enc_utf8)
				1260	/* For Unicode case folding is always the same, no need to use
				1261	* the table from the spell file. */
				1262	c = utf_fold(c);
				1263	else if (c < 256)
				1264	/* Use the table from the spell file. */
				1265	c = spelltab.st_fold[c];
				1266	# ifdef HAVE_TOWLOWER
				1267	else
				1268	/* We don't know what to do, fall back to towlower(), it
				1269	* depends on the current locale. */
				1270	c = towlower(c);
				1271	# endif
				1272	if (outi + MB_MAXBYTES > buflen)
				1273	{
				1274	buf[outi] = NUL;
				1275	return FAIL;
				1276	}
				1277	outi += mb_char2bytes(c, buf + outi);
				1278	}
				1279	buf[outi] = NUL;
				1280	}
				1281	else
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1282	#endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1283	{
				1284	/* Be quick for non-multibyte encodings. */
				1285	for (i = 0; i < len; ++i)
				1286	buf[i] = spelltab.st_fold[p[i]];
				1287	buf[i] = NUL;
				1288	}
				1289
				1290	return OK;
				1291	}
				1292
				1293	#endif /* FEAT_SYN_HL */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1294
				1295	/*
				1296	* return TRUE if 'c' is a valid file-name character
				1297	* Assume characters above 0x100 are valid (multi-byte).
				1298	*/
				1299	int
				1300	vim_isfilec(c)
				1301	int c;
				1302	{
				1303	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
				1304	}
				1305
				1306	/*
				1307	* return TRUE if 'c' is a printable character
				1308	* Assume characters above 0x100 are printable (multi-byte), except for
				1309	* Unicode.
				1310	*/
				1311	int
				1312	vim_isprintc(c)
				1313	int c;
				1314	{
				1315	#ifdef FEAT_MBYTE
				1316	if (enc_utf8 && c >= 0x100)
				1317	return utf_printable(c);
				1318	#endif
				1319	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
				1320	}
				1321
				1322	/*
				1323	* Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
				1324	* byte of a double-byte character.
				1325	*/
				1326	int
				1327	vim_isprintc_strict(c)
				1328	int c;
				1329	{
				1330	#ifdef FEAT_MBYTE
				1331	if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
				1332	return FALSE;
				1333	if (enc_utf8 && c >= 0x100)
				1334	return utf_printable(c);
				1335	#endif
				1336	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
				1337	}
				1338
				1339	/*
				1340	* like chartabsize(), but also check for line breaks on the screen
				1341	*/
				1342	int
				1343	lbr_chartabsize(s, col)
				1344	unsigned char *s;
				1345	colnr_T col;
				1346	{
				1347	#ifdef FEAT_LINEBREAK
				1348	if (!curwin->w_p_lbr && *p_sbr == NUL)
				1349	{
				1350	#endif
				1351	#ifdef FEAT_MBYTE
				1352	if (curwin->w_p_wrap)
				1353	return win_nolbr_chartabsize(curwin, s, col, NULL);
				1354	#endif
				1355	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
				1356	#ifdef FEAT_LINEBREAK
				1357	}
				1358	return win_lbr_chartabsize(curwin, s, col, NULL);
				1359	#endif
				1360	}
				1361
				1362	/*
				1363	* Call lbr_chartabsize() and advance the pointer.
				1364	*/
				1365	int
				1366	lbr_chartabsize_adv(s, col)
				1367	char_u **s;
				1368	colnr_T col;
				1369	{
				1370	int retval;
				1371
				1372	retval = lbr_chartabsize(*s, col);
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1373	mb_ptr_adv(*s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1374	return retval;
				1375	}
				1376
				1377	/*
				1378	* This function is used very often, keep it fast!!!!
				1379	*
				1380	* If "headp" not NULL, set *headp to the size of what we for 'showbreak'
				1381	* string at start of line. Warning: *headp is only set if it's a non-zero
				1382	* value, init to 0 before calling.
				1383	*/
				1384	/ARGSUSED/
				1385	int
				1386	win_lbr_chartabsize(wp, s, col, headp)
				1387	win_T *wp;
				1388	char_u *s;
				1389	colnr_T col;
				1390	int *headp;
				1391	{
				1392	#ifdef FEAT_LINEBREAK
				1393	int c;
				1394	int size;
				1395	colnr_T col2;
				1396	colnr_T colmax;
				1397	int added;
				1398	# ifdef FEAT_MBYTE
				1399	int mb_added = 0;
				1400	# else
				1401	# define mb_added 0
				1402	# endif
				1403	int numberextra;
				1404	char_u *ps;
				1405	int tab_corr = (*s == TAB);
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1406	int n;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1407
				1408	/*
				1409	* No 'linebreak' and 'showbreak': return quickly.
				1410	*/
				1411	if (!wp->w_p_lbr && *p_sbr == NUL)
				1412	#endif
				1413	{
				1414	#ifdef FEAT_MBYTE
				1415	if (wp->w_p_wrap)
				1416	return win_nolbr_chartabsize(wp, s, col, headp);
				1417	#endif
				1418	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
				1419	}
				1420
				1421	#ifdef FEAT_LINEBREAK
				1422	/*
				1423	* First get normal size, without 'linebreak'
				1424	*/
				1425	size = win_chartabsize(wp, s, col);
				1426	c = *s;
				1427
				1428	/*
				1429	* If 'linebreak' set check at a blank before a non-blank if the line
				1430	* needs a break here
				1431	*/
				1432	if (wp->w_p_lbr
				1433	&& vim_isbreak(c)
				1434	&& !vim_isbreak(s[1])
				1435	&& !wp->w_p_list
				1436	&& wp->w_p_wrap
				1437	# ifdef FEAT_VERTSPLIT
				1438	&& wp->w_width != 0
				1439	# endif
				1440	)
				1441	{
				1442	/*
				1443	* Count all characters from first non-blank after a blank up to next
				1444	* non-blank after a blank.
				1445	*/
				1446	numberextra = win_col_off(wp);
				1447	col2 = col;
				1448	colmax = W_WIDTH(wp) - numberextra;
				1449	if (col >= colmax)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1450	{
				1451	n = colmax + win_col_off2(wp);
				1452	if (n > 0)
				1453	colmax += (((col - colmax) / n) + 1) * n;
				1454	}
				1455
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1456	for (;;)
				1457	{
				1458	ps = s;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1459	mb_ptr_adv(s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1460	c = *s;
				1461	if (!(c != NUL
				1462	&& (vim_isbreak(c)
				1463	\|\| (!vim_isbreak(c)
				1464	&& (col2 == col \|\| !vim_isbreak(*ps))))))
				1465	break;
				1466
				1467	col2 += win_chartabsize(wp, s, col2);
				1468	if (col2 >= colmax) /* doesn't fit */
				1469	{
				1470	size = colmax - col;
				1471	tab_corr = FALSE;
				1472	break;
				1473	}
				1474	}
				1475	}
				1476	# ifdef FEAT_MBYTE
				1477	else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
				1478	&& wp->w_p_wrap && in_win_border(wp, col))
				1479	{
				1480	++size; /* Count the ">" in the last column. */
				1481	mb_added = 1;
				1482	}
				1483	# endif
				1484
				1485	/*
				1486	* May have to add something for 'showbreak' string at start of line
				1487	* Set *headp to the size of what we add.
				1488	*/
				1489	added = 0;
				1490	if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
				1491	{
				1492	numberextra = win_col_off(wp);
				1493	col += numberextra + mb_added;
				1494	if (col >= (colnr_T)W_WIDTH(wp))
				1495	{
				1496	col -= W_WIDTH(wp);
				1497	numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
				1498	if (numberextra > 0)
				1499	col = col % numberextra;
				1500	}
				1501	if (col == 0 \|\| col + size > (colnr_T)W_WIDTH(wp))
				1502	{
				1503	added = vim_strsize(p_sbr);
				1504	if (tab_corr)
				1505	size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
				1506	else
				1507	size += added;
				1508	if (col != 0)
				1509	added = 0;
				1510	}
				1511	}
				1512	if (headp != NULL)
				1513	*headp = added + mb_added;
				1514	return size;
				1515	#endif
				1516	}
				1517
				1518	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1519	/*
				1520	* Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
				1521	* 'wrap' is on. This means we need to check for a double-byte character that
				1522	* doesn't fit at the end of the screen line.
				1523	*/
				1524	static int
				1525	win_nolbr_chartabsize(wp, s, col, headp)
				1526	win_T *wp;
				1527	char_u *s;
				1528	colnr_T col;
				1529	int *headp;
				1530	{
				1531	int n;
				1532
				1533	if (*s == TAB && (!wp->w_p_list \|\| lcs_tab1))
				1534	{
				1535	n = wp->w_buffer->b_p_ts;
				1536	return (int)(n - (col % n));
				1537	}
				1538	n = ptr2cells(s);
				1539	/* Add one cell for a double-width character in the last column of the
				1540	* window, displayed with a ">". */
				1541	if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
				1542	{
				1543	if (headp != NULL)
				1544	*headp = 1;
				1545	return 3;
				1546	}
				1547	return n;
				1548	}
				1549
				1550	/*
				1551	* Return TRUE if virtual column "vcol" is in the rightmost column of window
				1552	* "wp".
				1553	*/
				1554	int
				1555	in_win_border(wp, vcol)
				1556	win_T *wp;
				1557	colnr_T vcol;
				1558	{
				1559	colnr_T width1; /* width of first line (after line number) */
				1560	colnr_T width2; /* width of further lines */
				1561
				1562	#ifdef FEAT_VERTSPLIT
				1563	if (wp->w_width == 0) /* there is no border */
				1564	return FALSE;
				1565	#endif
				1566	width1 = W_WIDTH(wp) - win_col_off(wp);
				1567	if (vcol < width1 - 1)
				1568	return FALSE;
				1569	if (vcol == width1 - 1)
				1570	return TRUE;
				1571	width2 = width1 + win_col_off2(wp);
				1572	return ((vcol - width1) % width2 == width2 - 1);
				1573	}
				1574	#endif /* FEAT_MBYTE */
				1575
				1576	/*
				1577	* Get virtual column number of pos.
				1578	* start: on the first position of this character (TAB, ctrl)
				1579	* cursor: where the cursor is on this character (first char, except for TAB)
				1580	* end: on the last position of this character (TAB, ctrl)
				1581	*
				1582	* This is used very often, keep it fast!
				1583	*/
				1584	void
				1585	getvcol(wp, pos, start, cursor, end)
				1586	win_T *wp;
				1587	pos_T *pos;
				1588	colnr_T *start;
				1589	colnr_T *cursor;
				1590	colnr_T *end;
				1591	{
				1592	colnr_T vcol;
				1593	char_u ptr; / points to current char */
				1594	char_u posptr; / points to char at pos->col */
				1595	int incr;
				1596	int head;
				1597	int ts = wp->w_buffer->b_p_ts;
				1598	int c;
				1599
				1600	vcol = 0;
				1601	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
				1602	posptr = ptr + pos->col;
				1603
				1604	/*
				1605	* This function is used very often, do some speed optimizations.
				1606	* When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
				1607	* Also use this when 'list' is set but tabs take their normal size.
				1608	*/
				1609	if ((!wp->w_p_list \|\| lcs_tab1 != NUL)
				1610	#ifdef FEAT_LINEBREAK
				1611	&& !wp->w_p_lbr && *p_sbr == NUL
				1612	#endif
				1613	)
				1614	{
				1615	#ifndef FEAT_MBYTE
				1616	head = 0;
				1617	#endif
				1618	for (;;)
				1619	{
				1620	#ifdef FEAT_MBYTE
				1621	head = 0;
				1622	#endif
				1623	c = *ptr;
				1624	/* make sure we don't go past the end of the line */
				1625	if (c == NUL)
				1626	{
				1627	incr = 1; /* NUL at end of line only takes one column */
				1628	break;
				1629	}
				1630	/* A tab gets expanded, depending on the current column */
				1631	if (c == TAB)
				1632	incr = ts - (vcol % ts);
				1633	else
				1634	{
				1635	#ifdef FEAT_MBYTE
				1636	if (has_mbyte)
				1637	{
				1638	/* For utf-8, if the byte is >= 0x80, need to look at
				1639	* further bytes to find the cell width. */
				1640	if (enc_utf8 && c >= 0x80)
				1641	incr = utf_ptr2cells(ptr);
				1642	else
				1643	incr = CHARSIZE(c);
				1644
				1645	/* If a double-cell char doesn't fit at the end of a line
				1646	* it wraps to the next line, it's like this char is three
				1647	* cells wide. */
				1648	if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
				1649	{
				1650	++incr;
				1651	head = 1;
				1652	}
				1653	}
				1654	else
				1655	#endif
				1656	incr = CHARSIZE(c);
				1657	}
				1658
				1659	if (ptr >= posptr) /* character at pos->col */
				1660	break;
				1661
				1662	vcol += incr;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1663	mb_ptr_adv(ptr);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1664	}
				1665	}
				1666	else
				1667	{
				1668	for (;;)
				1669	{
				1670	/* A tab gets expanded, depending on the current column */
				1671	head = 0;
				1672	incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
				1673	/* make sure we don't go past the end of the line */
				1674	if (*ptr == NUL)
				1675	{
				1676	incr = 1; /* NUL at end of line only takes one column */
				1677	break;
				1678	}
				1679
				1680	if (ptr >= posptr) /* character at pos->col */
				1681	break;
				1682
				1683	vcol += incr;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1684	mb_ptr_adv(ptr);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1685	}
				1686	}
				1687	if (start != NULL)
				1688	*start = vcol + head;
				1689	if (end != NULL)
				1690	*end = vcol + incr - 1;
				1691	if (cursor != NULL)
				1692	{
				1693	if (*ptr == TAB
				1694	&& (State & NORMAL)
				1695	&& !wp->w_p_list
				1696	&& !virtual_active()
				1697	#ifdef FEAT_VISUAL
				1698	&& !(VIsual_active
				1699	&& (p_sel == 'e' \|\| ltoreq(pos, VIsual)))
				1700	#endif
				1701	)
				1702	cursor = vcol + incr - 1; / cursor at end */
				1703	else
				1704	cursor = vcol + head; / cursor at start */
				1705	}
				1706	}
				1707
				1708	/*
				1709	* Get virtual cursor column in the current window, pretending 'list' is off.
				1710	*/
				1711	colnr_T
				1712	getvcol_nolist(posp)
				1713	pos_T *posp;
				1714	{
				1715	int list_save = curwin->w_p_list;
				1716	colnr_T vcol;
				1717
				1718	curwin->w_p_list = FALSE;
				1719	getvcol(curwin, posp, NULL, &vcol, NULL);
				1720	curwin->w_p_list = list_save;
				1721	return vcol;
				1722	}
				1723
				1724	#if defined(FEAT_VIRTUALEDIT) \|\| defined(PROTO)
				1725	/*
				1726	* Get virtual column in virtual mode.
				1727	*/
				1728	void
				1729	getvvcol(wp, pos, start, cursor, end)
				1730	win_T *wp;
				1731	pos_T *pos;
				1732	colnr_T *start;
				1733	colnr_T *cursor;
				1734	colnr_T *end;
				1735	{
				1736	colnr_T col;
				1737	colnr_T coladd;
				1738	colnr_T endadd;
				1739	# ifdef FEAT_MBYTE
				1740	char_u *ptr;
				1741	# endif
				1742
				1743	if (virtual_active())
				1744	{
				1745	/* For virtual mode, only want one value */
				1746	getvcol(wp, pos, &col, NULL, NULL);
				1747
				1748	coladd = pos->coladd;
				1749	endadd = 0;
				1750	# ifdef FEAT_MBYTE
				1751	/* Cannot put the cursor on part of a wide character. */
				1752	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
				1753	if (pos->col < STRLEN(ptr))
				1754	{
				1755	int c = (*mb_ptr2char)(ptr + pos->col);
				1756
				1757	if (c != TAB && vim_isprintc(c))
				1758	{
				1759	endadd = char2cells(c) - 1;
				1760	if (coladd >= endadd)
				1761	coladd -= endadd;
				1762	else
				1763	coladd = 0;
				1764	}
				1765	}
				1766	# endif
				1767	col += coladd;
				1768	if (start != NULL)
				1769	*start = col;
				1770	if (cursor != NULL)
				1771	*cursor = col;
				1772	if (end != NULL)
				1773	*end = col + endadd;
				1774	}
				1775	else
				1776	getvcol(wp, pos, start, cursor, end);
				1777	}
				1778	#endif
				1779
				1780	#if defined(FEAT_VISUAL) \|\| defined(PROTO)
				1781	/*
				1782	* Get the leftmost and rightmost virtual column of pos1 and pos2.
				1783	* Used for Visual block mode.
				1784	*/
				1785	void
				1786	getvcols(wp, pos1, pos2, left, right)
				1787	win_T *wp;
				1788	pos_T pos1, pos2;
				1789	colnr_T left, right;
				1790	{
				1791	colnr_T from1, from2, to1, to2;
				1792
				1793	if (ltp(pos1, pos2))
				1794	{
				1795	getvvcol(wp, pos1, &from1, NULL, &to1);
				1796	getvvcol(wp, pos2, &from2, NULL, &to2);
				1797	}
				1798	else
				1799	{
				1800	getvvcol(wp, pos2, &from1, NULL, &to1);
				1801	getvvcol(wp, pos1, &from2, NULL, &to2);
				1802	}
				1803	if (from2 < from1)
				1804	*left = from2;
				1805	else
				1806	*left = from1;
				1807	if (to2 > to1)
				1808	{
				1809	if (*p_sel == 'e' && from2 - 1 >= to1)
				1810	*right = from2 - 1;
				1811	else
				1812	*right = to2;
				1813	}
				1814	else
				1815	*right = to1;
				1816	}
				1817	#endif
				1818
				1819	/*
				1820	* skipwhite: skip over ' ' and '\t'.
				1821	*/
				1822	char_u *
				1823	skipwhite(p)
				1824	char_u *p;
				1825	{
				1826	while (vim_iswhite(p)) / skip to next non-white */
				1827	++p;
				1828	return p;
				1829	}
				1830
				1831	/*
Bram Moolenaar	5c06f8b	2005-05-31 22:14:58 +0000	[diff] [blame]	1832	* skip over digits
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1833	*/
				1834	char_u *
				1835	skipdigits(p)
				1836	char_u *p;
				1837	{
				1838	while (VIM_ISDIGIT(p)) / skip to next non-digit */
				1839	++p;
				1840	return p;
				1841	}
				1842
Bram Moolenaar	5c06f8b	2005-05-31 22:14:58 +0000	[diff] [blame]	1843	#if defined(FEAT_EX_EXTRA) \|\| defined(PROTO)
				1844	/*
				1845	* skip to digit (or NUL after the string)
				1846	*/
				1847	char_u *
				1848	skiptodigit(p)
				1849	char_u *p;
				1850	{
				1851	while (p != NUL && !VIM_ISDIGIT(p)) /* skip to next digit */
				1852	++p;
				1853	return p;
				1854	}
				1855
				1856	/*
				1857	* skip to hex character (or NUL after the string)
				1858	*/
				1859	char_u *
				1860	skiptohex(p)
				1861	char_u *p;
				1862	{
				1863	while (p != NUL && !vim_isxdigit(p)) /* skip to next digit */
				1864	++p;
				1865	return p;
				1866	}
				1867	#endif
				1868
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1869	/*
				1870	* Variant of isdigit() that can handle characters > 0x100.
				1871	* We don't use isdigit() here, because on some systems it also considers
				1872	* superscript 1 to be a digit.
				1873	* Use the VIM_ISDIGIT() macro for simple arguments.
				1874	*/
				1875	int
				1876	vim_isdigit(c)
				1877	int c;
				1878	{
				1879	return (c >= '0' && c <= '9');
				1880	}
				1881
				1882	/*
				1883	* Variant of isxdigit() that can handle characters > 0x100.
				1884	* We don't use isxdigit() here, because on some systems it also considers
				1885	* superscript 1 to be a digit.
				1886	*/
				1887	int
				1888	vim_isxdigit(c)
				1889	int c;
				1890	{
				1891	return (c >= '0' && c <= '9')
				1892	\|\| (c >= 'a' && c <= 'f')
				1893	\|\| (c >= 'A' && c <= 'F');
				1894	}
				1895
				1896	/*
				1897	* skiptowhite: skip over text until ' ' or '\t' or NUL.
				1898	*/
				1899	char_u *
				1900	skiptowhite(p)
				1901	char_u *p;
				1902	{
				1903	while (p != ' ' && p != '\t' && *p != NUL)
				1904	++p;
				1905	return p;
				1906	}
				1907
				1908	#if defined(FEAT_LISTCMDS) \|\| defined(FEAT_SIGNS) \|\| defined(FEAT_SNIFF) \
				1909	\|\| defined(PROTO)
				1910	/*
				1911	* skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
				1912	*/
				1913	char_u *
				1914	skiptowhite_esc(p)
				1915	char_u *p;
				1916	{
				1917	while (p != ' ' && p != '\t' && *p != NUL)
				1918	{
				1919	if ((p == '\\' \|\| p == Ctrl_V) && *(p + 1) != NUL)
				1920	++p;
				1921	++p;
				1922	}
				1923	return p;
				1924	}
				1925	#endif
				1926
				1927	/*
				1928	* Getdigits: Get a number from a string and skip over it.
				1929	* Note: the argument is a pointer to a char_u pointer!
				1930	*/
				1931	long
				1932	getdigits(pp)
				1933	char_u **pp;
				1934	{
				1935	char_u *p;
				1936	long retval;
				1937
				1938	p = *pp;
				1939	retval = atol((char *)p);
				1940	if (p == '-') / skip negative sign */
				1941	++p;
				1942	p = skipdigits(p); /* skip to next non-digit */
				1943	*pp = p;
				1944	return retval;
				1945	}
				1946
				1947	/*
				1948	* Return TRUE if "lbuf" is empty or only contains blanks.
				1949	*/
				1950	int
				1951	vim_isblankline(lbuf)
				1952	char_u *lbuf;
				1953	{
				1954	char_u *p;
				1955
				1956	p = skipwhite(lbuf);
				1957	return (p == NUL \|\| p == '\r' \|\| *p == '\n');
				1958	}
				1959
				1960	/*
				1961	* Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar	2df6dcc	2004-07-12 15:53:54 +0000	[diff] [blame]	1962	* hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1963	* If "hexp" is not NULL, returns a flag to indicate the type of the number:
				1964	* 0 decimal
				1965	* '0' octal
				1966	* 'X' hex
				1967	* 'x' hex
				1968	* If "len" is not NULL, the length of the number in characters is returned.
				1969	* If "nptr" is not NULL, the signed result is returned in it.
				1970	* If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar	5c06f8b	2005-05-31 22:14:58 +0000	[diff] [blame]	1971	* If "dooct" is non-zero recognize octal numbers, when > 1 always assume
				1972	* octal number.
				1973	* If "dohext" is non-zero recognize hex numbers, when > 1 always assume
				1974	* hex number.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1975	*/
				1976	void
				1977	vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
				1978	char_u *start;
				1979	int hexp; / return: type of number 0 = decimal, 'x'
				1980	or 'X' is hex, '0' = octal */
				1981	int len; / return: detected length of number */
				1982	int dooct; /* recognize octal number */
				1983	int dohex; /* recognize hex number */
				1984	long nptr; / return: signed result */
				1985	unsigned long unptr; / return: unsigned result */
				1986	{
				1987	char_u *ptr = start;
				1988	int hex = 0; /* default is decimal */
				1989	int negative = FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1990	unsigned long un = 0;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1991	int n;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1992
				1993	if (ptr[0] == '-')
				1994	{
				1995	negative = TRUE;
				1996	++ptr;
				1997	}
				1998
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1999	/* Recognize hex and octal. */
				2000	if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2001	{
				2002	hex = ptr[1];
				2003	if (dohex && (hex == 'X' \|\| hex == 'x') && vim_isxdigit(ptr[2]))
				2004	ptr += 2; /* hexadecimal */
				2005	else
				2006	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	2007	hex = 0; /* default is decimal */
				2008	if (dooct)
				2009	{
				2010	/* Don't interpret "0", "08" or "0129" as octal. */
				2011	for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
				2012	{
				2013	if (ptr[n] > '7')
				2014	{
				2015	hex = 0; /* can't be octal */
				2016	break;
				2017	}
				2018	if (ptr[n] > '0')
				2019	hex = '0'; /* assume octal */
				2020	}
				2021	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2022	}
				2023	}
				2024
				2025	/*
				2026	* Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
				2027	*/
Bram Moolenaar	5c06f8b	2005-05-31 22:14:58 +0000	[diff] [blame]	2028	if (hex == '0' \|\| dooct > 1)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2029	{
Bram Moolenaar	5c06f8b	2005-05-31 22:14:58 +0000	[diff] [blame]	2030	/* octal */
				2031	while ('0' <= ptr && ptr <= '7')
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2032	{
Bram Moolenaar	5c06f8b	2005-05-31 22:14:58 +0000	[diff] [blame]	2033	un = 8 * un + (unsigned long)(*ptr - '0');
				2034	++ptr;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2035	}
Bram Moolenaar	5c06f8b	2005-05-31 22:14:58 +0000	[diff] [blame]	2036	}
				2037	else if (hex != 0 \|\| dohex > 1)
				2038	{
				2039	/* hex */
				2040	while (vim_isxdigit(*ptr))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2041	{
Bram Moolenaar	5c06f8b	2005-05-31 22:14:58 +0000	[diff] [blame]	2042	un = 16 * un + (unsigned long)hex2nr(*ptr);
				2043	++ptr;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2044	}
				2045	}
				2046	else
				2047	{
				2048	/* decimal */
				2049	while (VIM_ISDIGIT(*ptr))
				2050	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2051	un = 10 * un + (unsigned long)(*ptr - '0');
				2052	++ptr;
				2053	}
				2054	}
				2055
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2056	if (hexp != NULL)
				2057	*hexp = hex;
				2058	if (len != NULL)
				2059	*len = (int)(ptr - start);
				2060	if (nptr != NULL)
Bram Moolenaar	2df6dcc	2004-07-12 15:53:54 +0000	[diff] [blame]	2061	{
				2062	if (negative) /* account for leading '-' for decimal numbers */
				2063	*nptr = -(long)un;
				2064	else
				2065	*nptr = (long)un;
				2066	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2067	if (unptr != NULL)
				2068	*unptr = un;
				2069	}
				2070
				2071	/*
				2072	* Return the value of a single hex character.
				2073	* Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
				2074	*/
				2075	int
				2076	hex2nr(c)
				2077	int c;
				2078	{
				2079	if (c >= 'a' && c <= 'f')
				2080	return c - 'a' + 10;
				2081	if (c >= 'A' && c <= 'F')
				2082	return c - 'A' + 10;
				2083	return c - '0';
				2084	}
				2085
				2086	#if defined(FEAT_TERMRESPONSE) \
				2087	\|\| (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) \|\| defined(PROTO)
				2088	/*
				2089	* Convert two hex characters to a byte.
				2090	* Return -1 if one of the characters is not hex.
				2091	*/
				2092	int
				2093	hexhex2nr(p)
				2094	char_u *p;
				2095	{
				2096	if (!vim_isxdigit(p[0]) \|\| !vim_isxdigit(p[1]))
				2097	return -1;
				2098	return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
				2099	}
				2100	#endif
				2101
				2102	/*
				2103	* Return TRUE if "str" starts with a backslash that should be removed.
				2104	* For MS-DOS, WIN32 and OS/2 this is only done when the character after the
				2105	* backslash is not a normal file name character.
				2106	* '$' is a valid file name character, we don't remove the backslash before
				2107	* it. This means it is not possible to use an environment variable after a
				2108	* backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
				2109	* Although "\ name" is valid, the backslash in "Program\ files" must be
				2110	* removed. Assume a file name doesn't start with a space.
				2111	* For multi-byte names, never remove a backslash before a non-ascii
				2112	* character, assume that all multi-byte characters are valid file name
				2113	* characters.
				2114	*/
				2115	int
				2116	rem_backslash(str)
				2117	char_u *str;
				2118	{
				2119	#ifdef BACKSLASH_IN_FILENAME
				2120	return (str[0] == '\\'
				2121	# ifdef FEAT_MBYTE
				2122	&& str[1] < 0x80
				2123	# endif
				2124	&& (str[1] == ' '
				2125	\|\| (str[1] != NUL
				2126	&& str[1] != '*'
				2127	&& str[1] != '?'
				2128	&& !vim_isfilec(str[1]))));
				2129	#else
				2130	return (str[0] == '\\' && str[1] != NUL);
				2131	#endif
				2132	}
				2133
				2134	/*
				2135	* Halve the number of backslashes in a file name argument.
				2136	* For MS-DOS we only do this if the character after the backslash
				2137	* is not a normal file character.
				2138	*/
				2139	void
				2140	backslash_halve(p)
				2141	char_u *p;
				2142	{
				2143	for ( ; *p; ++p)
				2144	if (rem_backslash(p))
				2145	STRCPY(p, p + 1);
				2146	}
				2147
				2148	/*
				2149	* backslash_halve() plus save the result in allocated memory.
				2150	*/
				2151	char_u *
				2152	backslash_halve_save(p)
				2153	char_u *p;
				2154	{
				2155	char_u *res;
				2156
				2157	res = vim_strsave(p);
				2158	if (res == NULL)
				2159	return p;
				2160	backslash_halve(res);
				2161	return res;
				2162	}
				2163
				2164	#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) \|\| defined(PROTO)
				2165	/*
				2166	* Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
				2167	* The first 64 entries have been added to map control characters defined in
				2168	* ascii.h
				2169	*/
				2170	static char_u ebcdic2ascii_tab[256] =
				2171	{
				2172	0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
				2173	0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
				2174	0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
				2175	0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
				2176	0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
				2177	0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
				2178	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
				2179	0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
				2180	0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
				2181	0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
				2182	0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
				2183	0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
				2184	0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
				2185	0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
				2186	0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
				2187	0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
				2188	0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
				2189	0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
				2190	0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
				2191	0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
				2192	0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
				2193	0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
				2194	0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
				2195	0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
				2196	0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
				2197	0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
				2198	0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
				2199	0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
				2200	0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
				2201	0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
				2202	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
				2203	0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
				2204	};
				2205
				2206	/*
				2207	* Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
				2208	* wanting 7-bit ASCII characters out the other end.
				2209	*/
				2210	void
				2211	ebcdic2ascii(buffer, len)
				2212	char_u *buffer;
				2213	int len;
				2214	{
				2215	int i;
				2216
				2217	for (i = 0; i < len; i++)
				2218	buffer[i] = ebcdic2ascii_tab[buffer[i]];
				2219	}
				2220	#endif