Blame - src/charset.c - android_external_vim

blob: 00a5326c8919f5b146153bca8ff3056ce02179a6 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	#include "vim.h"
				11
				12	#ifdef FEAT_LINEBREAK
				13	static int win_chartabsize __ARGS((win_T wp, char_u p, colnr_T col));
				14	#endif
				15
				16	#ifdef FEAT_MBYTE
				17	static int win_nolbr_chartabsize __ARGS((win_T wp, char_u s, colnr_T col, int *headp));
				18	#endif
				19
				20	static int nr2hex __ARGS((int c));
				21
				22	static int chartab_initialized = FALSE;
				23
				24	/* b_chartab[] is an array of 32 bytes, each bit representing one of the
				25	* characters 0-255. */
				26	#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] \|= (1 << ((c) & 0x7))
				27	#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
				28	#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
				29
				30	/*
				31	* Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
				32	* characters for current buffer.
				33	*
				34	* Depends on the option settings 'iskeyword', 'isident', 'isfname',
				35	* 'isprint' and 'encoding'.
				36	*
				37	* The index in chartab[] depends on 'encoding':
				38	* - For non-multi-byte index with the byte (same as the character).
				39	* - For DBCS index with the first byte.
				40	* - For UTF-8 index with the character (when first byte is up to 0x80 it is
				41	* the same as the character, if the first byte is 0x80 and above it depends
				42	* on further bytes).
				43	*
				44	* The contents of chartab[]:
				45	* - The lower two bits, masked by CT_CELL_MASK, give the number of display
				46	* cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
				47	* - CT_PRINT_CHAR bit is set when the character is printable (no need to
				48	* translate the character before displaying it). Note that only DBCS
				49	* characters can have 2 display cells and still be printable.
				50	* - CT_FNAME_CHAR bit is set when the character can be in a file name.
				51	* - CT_ID_CHAR bit is set when the character can be in an identifier.
				52	*
				53	* Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
				54	* error, OK otherwise.
				55	*/
				56	int
				57	init_chartab()
				58	{
				59	return buf_init_chartab(curbuf, TRUE);
				60	}
				61
				62	int
				63	buf_init_chartab(buf, global)
				64	buf_T *buf;
				65	int global; /* FALSE: only set buf->b_chartab[] */
				66	{
				67	int c;
				68	int c2;
				69	char_u *p;
				70	int i;
				71	int tilde;
				72	int do_isalpha;
				73
				74	if (global)
				75	{
				76	/*
				77	* Set the default size for printable characters:
				78	* From <Space> to '~' is 1 (printable), others are 2 (not printable).
				79	* This also inits all 'isident' and 'isfname' flags to FALSE.
				80	*
				81	* EBCDIC: all chars below ' ' are not printable, all others are
				82	* printable.
				83	*/
				84	c = 0;
				85	while (c < ' ')
				86	chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
				87	#ifdef EBCDIC
				88	while (c < 255)
				89	#else
				90	while (c <= '~')
				91	#endif
				92	chartab[c++] = 1 + CT_PRINT_CHAR;
				93	#ifdef FEAT_FKMAP
				94	if (p_altkeymap)
				95	{
				96	while (c < YE)
				97	chartab[c++] = 1 + CT_PRINT_CHAR;
				98	}
				99	#endif
				100	while (c < 256)
				101	{
				102	#ifdef FEAT_MBYTE
				103	/* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
				104	if (enc_utf8 && c >= 0xa0)
				105	chartab[c++] = CT_PRINT_CHAR + 1;
				106	/* euc-jp characters starting with 0x8e are single width */
				107	else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
				108	chartab[c++] = CT_PRINT_CHAR + 1;
				109	/* other double-byte chars can be printable AND double-width */
				110	else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
				111	chartab[c++] = CT_PRINT_CHAR + 2;
				112	else
				113	#endif
				114	/* the rest is unprintable by default */
				115	chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
				116	}
				117
				118	#ifdef FEAT_MBYTE
				119	/* Assume that every multi-byte char is a filename character. */
				120	for (c = 1; c < 256; ++c)
				121	if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
				122	\|\| (enc_dbcs == DBCS_JPNU && c == 0x8e)
				123	\|\| (enc_utf8 && c >= 0xa0))
				124	chartab[c] \|= CT_FNAME_CHAR;
				125	#endif
				126	}
				127
				128	/*
				129	* Init word char flags all to FALSE
				130	*/
				131	vim_memset(buf->b_chartab, 0, (size_t)32);
				132	#ifdef FEAT_MBYTE
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	133	if (enc_dbcs != 0)
				134	for (c = 0; c < 256; ++c)
				135	{
				136	/* double-byte characters are probably word characters */
				137	if (MB_BYTE2LEN(c) == 2)
				138	SET_CHARTAB(buf, c);
				139	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	140	#endif
				141
				142	#ifdef FEAT_LISP
				143	/*
				144	* In lisp mode the '-' character is included in keywords.
				145	*/
				146	if (buf->b_p_lisp)
				147	SET_CHARTAB(buf, '-');
				148	#endif
				149
				150	/* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
				151	* options Each option is a list of characters, character numbers or
				152	* ranges, separated by commas, e.g.: "200-210,x,#-178,-"
				153	*/
				154	for (i = global ? 0 : 3; i <= 3; ++i)
				155	{
				156	if (i == 0)
				157	p = p_isi; /* first round: 'isident' */
				158	else if (i == 1)
				159	p = p_isp; /* second round: 'isprint' */
				160	else if (i == 2)
				161	p = p_isf; /* third round: 'isfname' */
				162	else /* i == 3 */
				163	p = buf->b_p_isk; /* fourth round: 'iskeyword' */
				164
				165	while (*p)
				166	{
				167	tilde = FALSE;
				168	do_isalpha = FALSE;
				169	if (*p == '^' && p[1] != NUL)
				170	{
				171	tilde = TRUE;
				172	++p;
				173	}
				174	if (VIM_ISDIGIT(*p))
				175	c = getdigits(&p);
				176	else
				177	c = *p++;
				178	c2 = -1;
				179	if (*p == '-' && p[1] != NUL)
				180	{
				181	++p;
				182	if (VIM_ISDIGIT(*p))
				183	c2 = getdigits(&p);
				184	else
				185	c2 = *p++;
				186	}
				187	if (c <= 0 \|\| (c2 < c && c2 != -1) \|\| c2 >= 256
				188	\|\| !(p == NUL \|\| p == ','))
				189	return FAIL;
				190
				191	if (c2 == -1) /* not a range */
				192	{
				193	/*
				194	* A single '@' (not "@-@"):
				195	* Decide on letters being ID/printable/keyword chars with
				196	* standard function isalpha(). This takes care of locale for
				197	* single-byte characters).
				198	*/
				199	if (c == '@')
				200	{
				201	do_isalpha = TRUE;
				202	c = 1;
				203	c2 = 255;
				204	}
				205	else
				206	c2 = c;
				207	}
				208	while (c <= c2)
				209	{
				210	if (!do_isalpha \|\| isalpha(c)
				211	#ifdef FEAT_FKMAP
				212	\|\| (p_altkeymap && (F_isalpha(c) \|\| F_isdigit(c)))
				213	#endif
				214	)
				215	{
				216	if (i == 0) /* (re)set ID flag */
				217	{
				218	if (tilde)
				219	chartab[c] &= ~CT_ID_CHAR;
				220	else
				221	chartab[c] \|= CT_ID_CHAR;
				222	}
				223	else if (i == 1) /* (re)set printable */
				224	{
				225	if ((c < ' '
				226	#ifndef EBCDIC
				227	\|\| c > '~'
				228	#endif
				229	#ifdef FEAT_FKMAP
				230	\|\| (p_altkeymap
				231	&& (F_isalpha(c) \|\| F_isdigit(c)))
				232	#endif
				233	)
				234	#ifdef FEAT_MBYTE
				235	/* For double-byte we keep the cell width, so
				236	* that we can detect it from the first byte. */
				237	&& !(enc_dbcs && MB_BYTE2LEN(c) == 2)
				238	#endif
				239	)
				240	{
				241	if (tilde)
				242	{
				243	chartab[c] = (chartab[c] & ~CT_CELL_MASK)
				244	+ ((dy_flags & DY_UHEX) ? 4 : 2);
				245	chartab[c] &= ~CT_PRINT_CHAR;
				246	}
				247	else
				248	{
				249	chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
				250	chartab[c] \|= CT_PRINT_CHAR;
				251	}
				252	}
				253	}
				254	else if (i == 2) /* (re)set fname flag */
				255	{
				256	if (tilde)
				257	chartab[c] &= ~CT_FNAME_CHAR;
				258	else
				259	chartab[c] \|= CT_FNAME_CHAR;
				260	}
				261	else /* i == 3 / / (re)set keyword flag */
				262	{
				263	if (tilde)
				264	RESET_CHARTAB(buf, c);
				265	else
				266	SET_CHARTAB(buf, c);
				267	}
				268	}
				269	++c;
				270	}
				271	p = skip_to_option_part(p);
				272	}
				273	}
				274	chartab_initialized = TRUE;
				275	return OK;
				276	}
				277
				278	/*
				279	* Translate any special characters in buf[bufsize] in-place.
				280	* The result is a string with only printable characters, but if there is not
				281	* enough room, not all characters will be translated.
				282	*/
				283	void
				284	trans_characters(buf, bufsize)
				285	char_u *buf;
				286	int bufsize;
				287	{
				288	int len; /* length of string needing translation */
				289	int room; /* room in buffer after string */
				290	char_u trs; / translated character */
				291	int trs_len; /* length of trs[] */
				292
				293	len = (int)STRLEN(buf);
				294	room = bufsize - len;
				295	while (*buf != 0)
				296	{
				297	# ifdef FEAT_MBYTE
				298	/* Assume a multi-byte character doesn't need translation. */
				299	if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
				300	len -= trs_len;
				301	else
				302	# endif
				303	{
				304	trs = transchar_byte(*buf);
				305	trs_len = (int)STRLEN(trs);
				306	if (trs_len > 1)
				307	{
				308	room -= trs_len - 1;
				309	if (room <= 0)
				310	return;
				311	mch_memmove(buf + trs_len, buf + 1, (size_t)len);
				312	}
				313	mch_memmove(buf, trs, (size_t)trs_len);
				314	--len;
				315	}
				316	buf += trs_len;
				317	}
				318	}
				319
				320	#if defined(FEAT_EVAL) \|\| defined(FEAT_TITLE) \|\| defined(PROTO)
				321	/*
				322	* Translate a string into allocated memory, replacing special chars with
				323	* printable chars. Returns NULL when out of memory.
				324	*/
				325	char_u *
				326	transstr(s)
				327	char_u *s;
				328	{
				329	char_u *res;
				330	char_u *p;
				331	#ifdef FEAT_MBYTE
				332	int l, len, c;
				333	char_u hexbuf[11];
				334	#endif
				335
				336	#ifdef FEAT_MBYTE
				337	if (has_mbyte)
				338	{
				339	/* Compute the length of the result, taking account of unprintable
				340	* multi-byte characters. */
				341	len = 0;
				342	p = s;
				343	while (*p != NUL)
				344	{
				345	if ((l = (*mb_ptr2len_check)(p)) > 1)
				346	{
				347	c = (*mb_ptr2char)(p);
				348	p += l;
				349	if (vim_isprintc(c))
				350	len += l;
				351	else
				352	{
				353	transchar_hex(hexbuf, c);
				354	len += STRLEN(hexbuf);
				355	}
				356	}
				357	else
				358	{
				359	l = byte2cells(*p++);
				360	if (l > 0)
				361	len += l;
				362	else
				363	len += 4; /* illegal byte sequence */
				364	}
				365	}
				366	res = alloc((unsigned)(len + 1));
				367	}
				368	else
				369	#endif
				370	res = alloc((unsigned)(vim_strsize(s) + 1));
				371	if (res != NULL)
				372	{
				373	*res = NUL;
				374	p = s;
				375	while (*p != NUL)
				376	{
				377	#ifdef FEAT_MBYTE
				378	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				379	{
				380	c = (*mb_ptr2char)(p);
				381	if (vim_isprintc(c))
				382	STRNCAT(res, p, l); /* append printable multi-byte char */
				383	else
				384	transchar_hex(res + STRLEN(res), c);
				385	p += l;
				386	}
				387	else
				388	#endif
				389	STRCAT(res, transchar_byte(*p++));
				390	}
				391	}
				392	return res;
				393	}
				394	#endif
				395
				396	#if defined(FEAT_SYN_HL) \|\| defined(FEAT_INS_EXPAND) \|\| defined(PROTO)
				397	/*
Bram Moolenaar	217ad92	2005-03-20 22:37:15 +0000	[diff] [blame]	398	* Convert the string "str[orglen]" to do ignore-case comparing. Uses the
				399	* current locale.
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	400	* When "buf" is NULL returns an allocated string (NULL for out-of-memory).
				401	* Otherwise puts the result in "buf[buflen]".
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	402	*/
				403	char_u *
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	404	str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	405	char_u *str;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	406	int orglen;
				407	char_u *buf;
				408	int buflen;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	409	{
				410	garray_T ga;
				411	int i;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	412	int len = orglen;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	413
				414	#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
				415	#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	416	#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
				417	#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	418
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	419	/* Copy "str" into "buf" or allocated memory, unmodified. */
				420	if (buf == NULL)
				421	{
				422	ga_init2(&ga, 1, 10);
				423	if (ga_grow(&ga, len + 1) == FAIL)
				424	return NULL;
				425	mch_memmove(ga.ga_data, str, (size_t)len);
				426	ga.ga_len = len;
				427	}
				428	else
				429	{
				430	if (len >= buflen) /* Ugly! */
				431	len = buflen - 1;
				432	mch_memmove(buf, str, (size_t)len);
				433	}
				434	if (buf == NULL)
				435	GA_CHAR(len) = NUL;
				436	else
				437	buf[len] = NUL;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	438
				439	/* Make each character lower case. */
				440	i = 0;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	441	while (STR_CHAR(i) != NUL)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	442	{
				443	#ifdef FEAT_MBYTE
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	444	if (enc_utf8 \|\| (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	445	{
				446	if (enc_utf8)
				447	{
				448	int c, lc;
				449
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	450	c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	451	lc = utf_tolower(c);
				452	if (c != lc)
				453	{
				454	int ol = utf_char2len(c);
				455	int nl = utf_char2len(lc);
				456
				457	/* If the byte length changes need to shift the following
				458	* characters forward or backward. */
				459	if (ol != nl)
				460	{
				461	if (nl > ol)
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	462	{
				463	if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
				464	: len + nl - ol >= buflen)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	465	{
				466	/* out of memory, keep old char */
				467	lc = c;
				468	nl = ol;
				469	}
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	470	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	471	if (ol != nl)
				472	{
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	473	if (buf == NULL)
				474	{
				475	mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	476	STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	477	ga.ga_len += nl - ol;
				478	}
				479	else
				480	{
				481	mch_memmove(buf + i + nl, buf + i + ol,
				482	STRLEN(buf + i + ol) + 1);
				483	len += nl - ol;
				484	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	485	}
				486	}
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	487	(void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	488	}
				489	}
				490	/* skip to next multi-byte char */
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	491	i += (*mb_ptr2len_check)(STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	492	}
				493	else
				494	#endif
				495	{
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	496	if (buf == NULL)
				497	GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
				498	else
				499	buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	500	++i;
				501	}
				502	}
				503
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	504	if (buf == NULL)
				505	return (char_u *)ga.ga_data;
				506	return buf;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	507	}
				508	#endif
				509
				510	/*
				511	* Catch 22: chartab[] can't be initialized before the options are
				512	* initialized, and initializing options may cause transchar() to be called!
				513	* When chartab_initialized == FALSE don't use chartab[].
				514	* Does NOT work for multi-byte characters, c must be <= 255.
				515	* Also doesn't work for the first byte of a multi-byte, "c" must be a
				516	* character!
				517	*/
				518	static char_u transchar_buf[7];
				519
				520	char_u *
				521	transchar(c)
				522	int c;
				523	{
				524	int i;
				525
				526	i = 0;
				527	if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
				528	{
				529	transchar_buf[0] = '~';
				530	transchar_buf[1] = '@';
				531	i = 2;
				532	c = K_SECOND(c);
				533	}
				534
				535	if ((!chartab_initialized && (
				536	#ifdef EBCDIC
				537	(c >= 64 && c < 255)
				538	#else
				539	(c >= ' ' && c <= '~')
				540	#endif
				541	#ifdef FEAT_FKMAP
				542	\|\| F_ischar(c)
				543	#endif
				544	)) \|\| (c < 256 && vim_isprintc_strict(c)))
				545	{
				546	/* printable character */
				547	transchar_buf[i] = c;
				548	transchar_buf[i + 1] = NUL;
				549	}
				550	else
				551	transchar_nonprint(transchar_buf + i, c);
				552	return transchar_buf;
				553	}
				554
				555	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				556	/*
				557	* Like transchar(), but called with a byte instead of a character. Checks
				558	* for an illegal UTF-8 byte.
				559	*/
				560	char_u *
				561	transchar_byte(c)
				562	int c;
				563	{
				564	if (enc_utf8 && c >= 0x80)
				565	{
				566	transchar_nonprint(transchar_buf, c);
				567	return transchar_buf;
				568	}
				569	return transchar(c);
				570	}
				571	#endif
				572
				573	/*
				574	* Convert non-printable character to two or more printable characters in
				575	* "buf[]". "buf" needs to be able to hold five bytes.
				576	* Does NOT work for multi-byte characters, c must be <= 255.
				577	*/
				578	void
				579	transchar_nonprint(buf, c)
				580	char_u *buf;
				581	int c;
				582	{
				583	if (c == NL)
				584	c = NUL; /* we use newline in place of a NUL */
				585	else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
				586	c = NL; /* we use CR in place of NL in this case */
				587
				588	if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
				589	transchar_hex(buf, c);
				590
				591	#ifdef EBCDIC
				592	/* For EBCDIC only the characters 0-63 and 255 are not printable */
				593	else if (CtrlChar(c) != 0 \|\| c == DEL)
				594	#else
				595	else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
				596	#endif
				597	{
				598	buf[0] = '^';
				599	#ifdef EBCDIC
				600	if (c == DEL)
				601	buf[1] = '?'; /* DEL displayed as ^? */
				602	else
				603	buf[1] = CtrlChar(c);
				604	#else
				605	buf[1] = c ^ 0x40; /* DEL displayed as ^? */
				606	#endif
				607
				608	buf[2] = NUL;
				609	}
				610	#ifdef FEAT_MBYTE
				611	else if (enc_utf8 && c >= 0x80)
				612	{
				613	transchar_hex(buf, c);
				614	}
				615	#endif
				616	#ifndef EBCDIC
				617	else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
				618	{
				619	buf[0] = '\|';
				620	buf[1] = c - 0x80;
				621	buf[2] = NUL;
				622	}
				623	#else
				624	else if (c < 64)
				625	{
				626	buf[0] = '~';
				627	buf[1] = MetaChar(c);
				628	buf[2] = NUL;
				629	}
				630	#endif
				631	else /* 0x80 - 0x9f and 0xff */
				632	{
				633	/*
				634	* TODO: EBCDIC I don't know what to do with this chars, so I display
				635	* them as '~?' for now
				636	*/
				637	buf[0] = '~';
				638	#ifdef EBCDIC
				639	buf[1] = '?'; /* 0xff displayed as ~? */
				640	#else
				641	buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
				642	#endif
				643	buf[2] = NUL;
				644	}
				645	}
				646
				647	void
				648	transchar_hex(buf, c)
				649	char_u *buf;
				650	int c;
				651	{
				652	int i = 0;
				653
				654	buf[0] = '<';
				655	#ifdef FEAT_MBYTE
				656	if (c > 255)
				657	{
				658	buf[++i] = nr2hex((unsigned)c >> 12);
				659	buf[++i] = nr2hex((unsigned)c >> 8);
				660	}
				661	#endif
				662	buf[++i] = nr2hex((unsigned)c >> 4);
				663	buf[++i] = nr2hex(c);
				664	buf[++i] = '>';
				665	buf[++i] = NUL;
				666	}
				667
				668	/*
				669	* Convert the lower 4 bits of byte "c" to its hex character.
				670	* Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
				671	* function key 1.
				672	*/
				673	static int
				674	nr2hex(c)
				675	int c;
				676	{
				677	if ((c & 0xf) <= 9)
				678	return (c & 0xf) + '0';
				679	return (c & 0xf) - 10 + 'a';
				680	}
				681
				682	/*
				683	* Return number of display cells occupied by byte "b".
				684	* Caller must make sure 0 <= b <= 255.
				685	* For multi-byte mode "b" must be the first byte of a character.
				686	* A TAB is counted as two cells: "^I".
				687	* For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
				688	* cells depends on further bytes.
				689	*/
				690	int
				691	byte2cells(b)
				692	int b;
				693	{
				694	#ifdef FEAT_MBYTE
				695	if (enc_utf8 && b >= 0x80)
				696	return 0;
				697	#endif
				698	return (chartab[b] & CT_CELL_MASK);
				699	}
				700
				701	/*
				702	* Return number of display cells occupied by character "c".
				703	* "c" can be a special key (negative number) in which case 3 or 4 is returned.
				704	* A TAB is counted as two cells: "^I" or four: "<09>".
				705	*/
				706	int
				707	char2cells(c)
				708	int c;
				709	{
				710	if (IS_SPECIAL(c))
				711	return char2cells(K_SECOND(c)) + 2;
				712	#ifdef FEAT_MBYTE
				713	if (c >= 0x80)
				714	{
				715	/* UTF-8: above 0x80 need to check the value */
				716	if (enc_utf8)
				717	return utf_char2cells(c);
				718	/* DBCS: double-byte means double-width, except for euc-jp with first
				719	* byte 0x8e */
				720	if (enc_dbcs != 0 && c >= 0x100)
				721	{
				722	if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
				723	return 1;
				724	return 2;
				725	}
				726	}
				727	#endif
				728	return (chartab[c & 0xff] & CT_CELL_MASK);
				729	}
				730
				731	/*
				732	* Return number of display cells occupied by character at "*p".
				733	* A TAB is counted as two cells: "^I" or four: "<09>".
				734	*/
				735	int
				736	ptr2cells(p)
				737	char_u *p;
				738	{
				739	#ifdef FEAT_MBYTE
				740	/* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
				741	if (enc_utf8 && *p >= 0x80)
				742	return utf_ptr2cells(p);
				743	/* For DBCS we can tell the cell count from the first byte. */
				744	#endif
				745	return (chartab[*p] & CT_CELL_MASK);
				746	}
				747
				748	/*
				749	* Return the number of characters string "s" will take on the screen,
				750	* counting TABs as two characters: "^I".
				751	*/
				752	int
				753	vim_strsize(s)
				754	char_u *s;
				755	{
				756	return vim_strnsize(s, (int)MAXCOL);
				757	}
				758
				759	/*
				760	* Return the number of characters string "s[len]" will take on the screen,
				761	* counting TABs as two characters: "^I".
				762	*/
				763	int
				764	vim_strnsize(s, len)
				765	char_u *s;
				766	int len;
				767	{
				768	int size = 0;
				769
				770	while (*s != NUL && --len >= 0)
				771	{
				772	#ifdef FEAT_MBYTE
				773	if (has_mbyte)
				774	{
				775	int l = (*mb_ptr2len_check)(s);
				776
				777	size += ptr2cells(s);
				778	s += l;
				779	len -= l - 1;
				780	}
				781	else
				782	#endif
				783	size += byte2cells(*s++);
				784	}
				785	return size;
				786	}
				787
				788	/*
				789	* Return the number of characters 'c' will take on the screen, taking
				790	* into account the size of a tab.
				791	* Use a define to make it fast, this is used very often!!!
				792	* Also see getvcol() below.
				793	*/
				794
				795	#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
				796	if (*(p) == TAB && (!(wp)->w_p_list \|\| lcs_tab1)) \
				797	{ \
				798	int ts; \
				799	ts = (buf)->b_p_ts; \
				800	return (int)(ts - (col % ts)); \
				801	} \
				802	else \
				803	return ptr2cells(p);
				804
				805	#if defined(FEAT_VREPLACE) \|\| defined(FEAT_EX_EXTRA) \|\| defined(FEAT_GUI) \
				806	\|\| defined(FEAT_VIRTUALEDIT) \|\| defined(PROTO)
				807	int
				808	chartabsize(p, col)
				809	char_u *p;
				810	colnr_T col;
				811	{
				812	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
				813	}
				814	#endif
				815
				816	#ifdef FEAT_LINEBREAK
				817	static int
				818	win_chartabsize(wp, p, col)
				819	win_T *wp;
				820	char_u *p;
				821	colnr_T col;
				822	{
				823	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
				824	}
				825	#endif
				826
				827	/*
				828	* return the number of characters the string 's' will take on the screen,
				829	* taking into account the size of a tab
				830	*/
				831	int
				832	linetabsize(s)
				833	char_u *s;
				834	{
				835	colnr_T col = 0;
				836
				837	while (*s != NUL)
				838	col += lbr_chartabsize_adv(&s, col);
				839	return (int)col;
				840	}
				841
				842	/*
				843	* Like linetabsize(), but for a given window instead of the current one.
				844	*/
				845	int
				846	win_linetabsize(wp, p, len)
				847	win_T *wp;
				848	char_u *p;
				849	colnr_T len;
				850	{
				851	colnr_T col = 0;
				852	char_u *s;
				853
Bram Moolenaar	b5bf5b8	2004-12-24 14:35:23 +0000	[diff] [blame]	854	for (s = p; *s != NUL && (len == MAXCOL \|\| s < p + len); mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	855	col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	856	return (int)col;
				857	}
				858
				859	/*
Bram Moolenaar	8169525	2004-12-29 20:58:21 +0000	[diff] [blame]	860	* Return TRUE if 'c' is a normal identifier character:
				861	* Letters and characters from the 'isident' option.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	862	*/
				863	int
				864	vim_isIDc(c)
				865	int c;
				866	{
				867	return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
				868	}
				869
				870	/*
				871	* return TRUE if 'c' is a keyword character: Letters and characters from
				872	* 'iskeyword' option for current buffer.
				873	* For multi-byte characters mb_get_class() is used (builtin rules).
				874	*/
				875	int
				876	vim_iswordc(c)
				877	int c;
				878	{
				879	#ifdef FEAT_MBYTE
				880	if (c >= 0x100)
				881	{
				882	if (enc_dbcs != 0)
				883	return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
				884	if (enc_utf8)
				885	return utf_class(c) >= 2;
				886	}
				887	#endif
				888	return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
				889	}
				890
				891	/*
				892	* Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
				893	*/
				894	int
				895	vim_iswordp(p)
				896	char_u *p;
				897	{
				898	#ifdef FEAT_MBYTE
				899	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				900	return mb_get_class(p) >= 2;
				901	#endif
				902	return GET_CHARTAB(curbuf, *p) != 0;
				903	}
				904
				905	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				906	int
				907	vim_iswordc_buf(p, buf)
				908	char_u *p;
				909	buf_T *buf;
				910	{
				911	# ifdef FEAT_MBYTE
				912	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				913	return mb_get_class(p) >= 2;
				914	# endif
				915	return (GET_CHARTAB(buf, *p) != 0);
				916	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	917
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	918	/*
				919	* The tables used for spelling. These are only used for the first 256
				920	* characters.
				921	*/
				922	typedef struct spelltab_S
				923	{
				924	char_u st_isw[256]; /* flags: is word char */
				925	char_u st_isu[256]; /* flags: is uppercase char */
				926	char_u st_fold[256]; /* chars: folded case */
				927	} spelltab_T;
				928
				929	static spelltab_T spelltab;
				930	static int did_set_spelltab;
				931
				932	#define SPELL_ISWORD 1
				933	#define SPELL_ISUPPER 2
				934
				935	static void clear_spell_chartab __ARGS((spelltab_T *sp));
				936	static int set_spell_finish __ARGS((spelltab_T *new_st));
				937
				938	/*
				939	* Init the chartab used for spelling for ASCII.
				940	* EBCDIC is not supported!
				941	*/
				942	static void
				943	clear_spell_chartab(sp)
				944	spelltab_T *sp;
				945	{
				946	int i;
				947
				948	/* Init everything to FALSE. */
				949	vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
				950	vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
				951	for (i = 0; i < 256; ++i)
				952	sp->st_fold[i] = i;
				953
				954	/* We include digits. A word shouldn't start with a digit, but handling
				955	* that is done separately. */
				956	for (i = '0'; i <= '9'; ++i)
				957	sp->st_isw[i] = TRUE;
				958	for (i = 'A'; i <= 'Z'; ++i)
				959	{
				960	sp->st_isw[i] = TRUE;
				961	sp->st_isu[i] = TRUE;
				962	sp->st_fold[i] = i + 0x20;
				963	}
				964	for (i = 'a'; i <= 'z'; ++i)
				965	sp->st_isw[i] = TRUE;
				966	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	967
				968	/*
				969	* Init the chartab used for spelling. Only depends on 'encoding'.
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	970	* Called once while starting up and when 'encoding' changes.
				971	* The default is to use isalpha(), but the spell file should define the word
				972	* characters to make it possible that 'encoding' differs from the current
				973	* locale.
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	974	*/
				975	void
				976	init_spell_chartab()
				977	{
				978	int i;
				979
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	980	did_set_spelltab = FALSE;
				981	clear_spell_chartab(&spelltab);
				982
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	983	#ifdef FEAT_MBYTE
				984	if (enc_dbcs)
				985	{
				986	/* DBCS: assume double-wide characters are word characters. */
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	987	for (i = 128; i <= 255; ++i)
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	988	if (MB_BYTE2LEN(i) == 2)
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	989	spelltab.st_isw[i] = TRUE;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	990	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	991	else
				992	#endif
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	993	{
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	994	/* Rough guess: use isalpha() for characters above 128. */
				995	for (i = 128; i < 256; ++i)
				996	{
				997	spelltab.st_isw[i] = isalpha(i);
				998	if (isupper(i))
				999	{
				1000	spelltab.st_isu[i] = TRUE;
				1001	spelltab.st_fold[i] = tolower(i);
				1002	}
				1003	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1004	}
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1005	}
				1006
				1007	static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
				1008	static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
				1009
				1010	/*
				1011	* Set the spell character tables from strings in the affix file.
				1012	*/
				1013	int
				1014	set_spell_chartab(fol, low, upp)
				1015	char_u *fol;
				1016	char_u *low;
				1017	char_u *upp;
				1018	{
				1019	/* We build the new tables here first, so that we can compare with the
				1020	* previous one. */
				1021	spelltab_T new_st;
				1022	char_u pf = fol, pl = low, *pu = upp;
				1023	int f, l, u;
				1024
				1025	clear_spell_chartab(&new_st);
				1026
				1027	while (*pf != NUL)
				1028	{
				1029	if (pl == NUL \|\| pu == NUL)
				1030	{
				1031	EMSG(_(e_affform));
				1032	return FAIL;
				1033	}
				1034	#ifdef FEAT_MBYTE
				1035	f = mb_ptr2char_adv(&pf);
				1036	l = mb_ptr2char_adv(&pl);
				1037	u = mb_ptr2char_adv(&pu);
				1038	#else
				1039	f = *pf++;
				1040	l = *pl++;
				1041	u = *pu++;
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1042	#endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1043	/* Every character that appears is a word character. */
				1044	if (f < 256)
				1045	new_st.st_isw[f] = TRUE;
				1046	if (l < 256)
				1047	new_st.st_isw[l] = TRUE;
				1048	if (u < 256)
				1049	new_st.st_isw[u] = TRUE;
				1050
				1051	/* if "LOW" and "FOL" are not the same the "LOW" char needs
				1052	* case-folding */
				1053	if (l < 256 && l != f)
				1054	{
				1055	if (f >= 256)
				1056	{
				1057	EMSG(_(e_affrange));
				1058	return FAIL;
				1059	}
				1060	new_st.st_fold[l] = f;
				1061	}
				1062
				1063	/* if "UPP" and "FOL" are not the same the "UPP" char needs
				1064	* case-folding and it's upper case. */
				1065	if (u < 256 && u != f)
				1066	{
				1067	if (f >= 256)
				1068	{
				1069	EMSG(_(e_affrange));
				1070	return FAIL;
				1071	}
				1072	new_st.st_fold[u] = f;
				1073	new_st.st_isu[u] = TRUE;
				1074	}
				1075	}
				1076
				1077	if (pl != NUL \|\| pu != NUL)
				1078	{
				1079	EMSG(_(e_affform));
				1080	return FAIL;
				1081	}
				1082
				1083	return set_spell_finish(&new_st);
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1084	}
				1085
				1086	/*
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1087	* Set the spell character tables from strings in the .spl file.
				1088	*/
				1089	int
				1090	set_spell_charflags(flags, cnt, upp)
				1091	char_u *flags;
				1092	int cnt;
				1093	char_u *upp;
				1094	{
				1095	/* We build the new tables here first, so that we can compare with the
				1096	* previous one. */
				1097	spelltab_T new_st;
				1098	int i;
				1099	char_u *p = upp;
				1100
				1101	clear_spell_chartab(&new_st);
				1102
				1103	for (i = 0; i < cnt; ++i)
				1104	{
				1105	new_st.st_isw[i + 128] = (flags[i] & SPELL_ISWORD) != 0;
				1106	new_st.st_isu[i + 128] = (flags[i] & SPELL_ISUPPER) != 0;
				1107
				1108	if (*p == NUL)
				1109	return FAIL;
				1110	#ifdef FEAT_MBYTE
				1111	new_st.st_fold[i + 128] = mb_ptr2char_adv(&p);
				1112	#else
				1113	new_st.st_fold[i + 128] = *p++;
				1114	#endif
				1115	}
				1116
				1117	return set_spell_finish(&new_st);
				1118	}
				1119
				1120	static int
				1121	set_spell_finish(new_st)
				1122	spelltab_T *new_st;
				1123	{
				1124	int i;
				1125
				1126	if (did_set_spelltab)
				1127	{
				1128	/* check that it's the same table */
				1129	for (i = 0; i < 256; ++i)
				1130	{
				1131	if (spelltab.st_isw[i] != new_st->st_isw[i]
				1132	\|\| spelltab.st_isu[i] != new_st->st_isu[i]
				1133	\|\| spelltab.st_fold[i] != new_st->st_fold[i])
				1134	{
				1135	EMSG(_("E763: Word characters differ between spell files"));
				1136	return FAIL;
				1137	}
				1138	}
				1139	}
				1140	else
				1141	{
				1142	/* copy the new spelltab into the one being used */
				1143	spelltab = *new_st;
				1144	did_set_spelltab = TRUE;
				1145	}
				1146
				1147	return OK;
				1148	}
				1149
				1150	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1151	/*
				1152	* Write the current tables into the .spl file.
				1153	*/
				1154	void
				1155	write_spell_chartab(fd)
				1156	FILE *fd;
				1157	{
				1158	char_u charbuf[256 * 4];
				1159	int len = 0;
				1160	int flags;
				1161	int i;
				1162
				1163	if (!did_set_spelltab)
				1164	{
				1165	/* No character table specified, write zero counts. */
				1166	fputc(0, fd);
				1167	fputc(0, fd);
				1168	fputc(0, fd);
				1169	return;
				1170	}
				1171
				1172	fputc(128, fd); /* <charflagslen> */
				1173	for (i = 128; i < 256; ++i)
				1174	{
				1175	flags = 0;
				1176	if (spelltab.st_isw[i])
				1177	flags \|= SPELL_ISWORD;
				1178	if (spelltab.st_isu[i])
				1179	flags \|= SPELL_ISUPPER;
				1180	fputc(flags, fd); /* <charflags> */
				1181
				1182	len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
				1183	}
				1184
				1185	put_bytes(fd, (long_u)len, 2); /* <fcharlen> */
				1186	fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */
				1187	}
				1188	#endif
				1189
				1190	/*
				1191	* Return TRUE if "p" points to a word character for spelling.
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1192	*/
				1193	int
				1194	spell_iswordc(p)
				1195	char_u *p;
				1196	{
				1197	# ifdef FEAT_MBYTE
				1198	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				1199	return mb_get_class(p) >= 2;
				1200	# endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1201	return spelltab.st_isw[*p];
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1202	}
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1203
				1204	/*
				1205	* Return TRUE if "c" is an upper-case character for spelling.
				1206	*/
				1207	int
				1208	spell_isupper(c)
				1209	int c;
				1210	{
				1211	# ifdef FEAT_MBYTE
				1212	if (enc_utf8)
				1213	{
				1214	/* For Unicode we can call utf_isupper(), but don't do that for ASCII,
				1215	* because we don't want to use 'casemap' here. */
				1216	if (c >= 128)
				1217	return utf_isupper(c);
				1218	}
				1219	else if (has_mbyte && c > 256)
				1220	{
				1221	/* For characters above 255 we don't have something specfied.
				1222	* Fall back to locale-dependent iswupper(). If not available
				1223	* simply return FALSE. */
				1224	# ifdef HAVE_ISWUPPER
				1225	return iswupper(c);
				1226	# else
				1227	return FALSE;
				1228	# endif
				1229	}
				1230	# endif
				1231	return spelltab.st_isu[c];
				1232	}
				1233
				1234	/*
				1235	* case-fold "p[len]" into "buf[buflen]". Used for spell checking.
				1236	* Returns FAIL when something wrong.
				1237	*/
				1238	int
				1239	spell_casefold(p, len, buf, buflen)
				1240	char_u *p;
				1241	int len;
				1242	char_u *buf;
				1243	int buflen;
				1244	{
				1245	int i;
				1246
				1247	if (len >= buflen)
				1248	{
				1249	buf[0] = NUL;
				1250	return FAIL; /* result will not fit */
				1251	}
				1252
				1253	#ifdef FEAT_MBYTE
				1254	if (has_mbyte)
				1255	{
				1256	int c;
				1257	int outi = 0;
				1258
				1259	/* Fold one character at a time. */
				1260	for (i = 0; i < len; i += mb_ptr2len_check(p + i))
				1261	{
				1262	c = mb_ptr2char(p + i);
				1263	if (enc_utf8)
				1264	/* For Unicode case folding is always the same, no need to use
				1265	* the table from the spell file. */
				1266	c = utf_fold(c);
				1267	else if (c < 256)
				1268	/* Use the table from the spell file. */
				1269	c = spelltab.st_fold[c];
				1270	# ifdef HAVE_TOWLOWER
				1271	else
				1272	/* We don't know what to do, fall back to towlower(), it
				1273	* depends on the current locale. */
				1274	c = towlower(c);
				1275	# endif
				1276	if (outi + MB_MAXBYTES > buflen)
				1277	{
				1278	buf[outi] = NUL;
				1279	return FAIL;
				1280	}
				1281	outi += mb_char2bytes(c, buf + outi);
				1282	}
				1283	buf[outi] = NUL;
				1284	}
				1285	else
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1286	#endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1287	{
				1288	/* Be quick for non-multibyte encodings. */
				1289	for (i = 0; i < len; ++i)
				1290	buf[i] = spelltab.st_fold[p[i]];
				1291	buf[i] = NUL;
				1292	}
				1293
				1294	return OK;
				1295	}
				1296
				1297	#endif /* FEAT_SYN_HL */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1298
				1299	/*
				1300	* return TRUE if 'c' is a valid file-name character
				1301	* Assume characters above 0x100 are valid (multi-byte).
				1302	*/
				1303	int
				1304	vim_isfilec(c)
				1305	int c;
				1306	{
				1307	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
				1308	}
				1309
				1310	/*
				1311	* return TRUE if 'c' is a printable character
				1312	* Assume characters above 0x100 are printable (multi-byte), except for
				1313	* Unicode.
				1314	*/
				1315	int
				1316	vim_isprintc(c)
				1317	int c;
				1318	{
				1319	#ifdef FEAT_MBYTE
				1320	if (enc_utf8 && c >= 0x100)
				1321	return utf_printable(c);
				1322	#endif
				1323	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
				1324	}
				1325
				1326	/*
				1327	* Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
				1328	* byte of a double-byte character.
				1329	*/
				1330	int
				1331	vim_isprintc_strict(c)
				1332	int c;
				1333	{
				1334	#ifdef FEAT_MBYTE
				1335	if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
				1336	return FALSE;
				1337	if (enc_utf8 && c >= 0x100)
				1338	return utf_printable(c);
				1339	#endif
				1340	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
				1341	}
				1342
				1343	/*
				1344	* like chartabsize(), but also check for line breaks on the screen
				1345	*/
				1346	int
				1347	lbr_chartabsize(s, col)
				1348	unsigned char *s;
				1349	colnr_T col;
				1350	{
				1351	#ifdef FEAT_LINEBREAK
				1352	if (!curwin->w_p_lbr && *p_sbr == NUL)
				1353	{
				1354	#endif
				1355	#ifdef FEAT_MBYTE
				1356	if (curwin->w_p_wrap)
				1357	return win_nolbr_chartabsize(curwin, s, col, NULL);
				1358	#endif
				1359	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
				1360	#ifdef FEAT_LINEBREAK
				1361	}
				1362	return win_lbr_chartabsize(curwin, s, col, NULL);
				1363	#endif
				1364	}
				1365
				1366	/*
				1367	* Call lbr_chartabsize() and advance the pointer.
				1368	*/
				1369	int
				1370	lbr_chartabsize_adv(s, col)
				1371	char_u **s;
				1372	colnr_T col;
				1373	{
				1374	int retval;
				1375
				1376	retval = lbr_chartabsize(*s, col);
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1377	mb_ptr_adv(*s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1378	return retval;
				1379	}
				1380
				1381	/*
				1382	* This function is used very often, keep it fast!!!!
				1383	*
				1384	* If "headp" not NULL, set *headp to the size of what we for 'showbreak'
				1385	* string at start of line. Warning: *headp is only set if it's a non-zero
				1386	* value, init to 0 before calling.
				1387	*/
				1388	/ARGSUSED/
				1389	int
				1390	win_lbr_chartabsize(wp, s, col, headp)
				1391	win_T *wp;
				1392	char_u *s;
				1393	colnr_T col;
				1394	int *headp;
				1395	{
				1396	#ifdef FEAT_LINEBREAK
				1397	int c;
				1398	int size;
				1399	colnr_T col2;
				1400	colnr_T colmax;
				1401	int added;
				1402	# ifdef FEAT_MBYTE
				1403	int mb_added = 0;
				1404	# else
				1405	# define mb_added 0
				1406	# endif
				1407	int numberextra;
				1408	char_u *ps;
				1409	int tab_corr = (*s == TAB);
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1410	int n;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1411
				1412	/*
				1413	* No 'linebreak' and 'showbreak': return quickly.
				1414	*/
				1415	if (!wp->w_p_lbr && *p_sbr == NUL)
				1416	#endif
				1417	{
				1418	#ifdef FEAT_MBYTE
				1419	if (wp->w_p_wrap)
				1420	return win_nolbr_chartabsize(wp, s, col, headp);
				1421	#endif
				1422	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
				1423	}
				1424
				1425	#ifdef FEAT_LINEBREAK
				1426	/*
				1427	* First get normal size, without 'linebreak'
				1428	*/
				1429	size = win_chartabsize(wp, s, col);
				1430	c = *s;
				1431
				1432	/*
				1433	* If 'linebreak' set check at a blank before a non-blank if the line
				1434	* needs a break here
				1435	*/
				1436	if (wp->w_p_lbr
				1437	&& vim_isbreak(c)
				1438	&& !vim_isbreak(s[1])
				1439	&& !wp->w_p_list
				1440	&& wp->w_p_wrap
				1441	# ifdef FEAT_VERTSPLIT
				1442	&& wp->w_width != 0
				1443	# endif
				1444	)
				1445	{
				1446	/*
				1447	* Count all characters from first non-blank after a blank up to next
				1448	* non-blank after a blank.
				1449	*/
				1450	numberextra = win_col_off(wp);
				1451	col2 = col;
				1452	colmax = W_WIDTH(wp) - numberextra;
				1453	if (col >= colmax)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1454	{
				1455	n = colmax + win_col_off2(wp);
				1456	if (n > 0)
				1457	colmax += (((col - colmax) / n) + 1) * n;
				1458	}
				1459
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1460	for (;;)
				1461	{
				1462	ps = s;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1463	mb_ptr_adv(s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1464	c = *s;
				1465	if (!(c != NUL
				1466	&& (vim_isbreak(c)
				1467	\|\| (!vim_isbreak(c)
				1468	&& (col2 == col \|\| !vim_isbreak(*ps))))))
				1469	break;
				1470
				1471	col2 += win_chartabsize(wp, s, col2);
				1472	if (col2 >= colmax) /* doesn't fit */
				1473	{
				1474	size = colmax - col;
				1475	tab_corr = FALSE;
				1476	break;
				1477	}
				1478	}
				1479	}
				1480	# ifdef FEAT_MBYTE
				1481	else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
				1482	&& wp->w_p_wrap && in_win_border(wp, col))
				1483	{
				1484	++size; /* Count the ">" in the last column. */
				1485	mb_added = 1;
				1486	}
				1487	# endif
				1488
				1489	/*
				1490	* May have to add something for 'showbreak' string at start of line
				1491	* Set *headp to the size of what we add.
				1492	*/
				1493	added = 0;
				1494	if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
				1495	{
				1496	numberextra = win_col_off(wp);
				1497	col += numberextra + mb_added;
				1498	if (col >= (colnr_T)W_WIDTH(wp))
				1499	{
				1500	col -= W_WIDTH(wp);
				1501	numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
				1502	if (numberextra > 0)
				1503	col = col % numberextra;
				1504	}
				1505	if (col == 0 \|\| col + size > (colnr_T)W_WIDTH(wp))
				1506	{
				1507	added = vim_strsize(p_sbr);
				1508	if (tab_corr)
				1509	size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
				1510	else
				1511	size += added;
				1512	if (col != 0)
				1513	added = 0;
				1514	}
				1515	}
				1516	if (headp != NULL)
				1517	*headp = added + mb_added;
				1518	return size;
				1519	#endif
				1520	}
				1521
				1522	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1523	/*
				1524	* Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
				1525	* 'wrap' is on. This means we need to check for a double-byte character that
				1526	* doesn't fit at the end of the screen line.
				1527	*/
				1528	static int
				1529	win_nolbr_chartabsize(wp, s, col, headp)
				1530	win_T *wp;
				1531	char_u *s;
				1532	colnr_T col;
				1533	int *headp;
				1534	{
				1535	int n;
				1536
				1537	if (*s == TAB && (!wp->w_p_list \|\| lcs_tab1))
				1538	{
				1539	n = wp->w_buffer->b_p_ts;
				1540	return (int)(n - (col % n));
				1541	}
				1542	n = ptr2cells(s);
				1543	/* Add one cell for a double-width character in the last column of the
				1544	* window, displayed with a ">". */
				1545	if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
				1546	{
				1547	if (headp != NULL)
				1548	*headp = 1;
				1549	return 3;
				1550	}
				1551	return n;
				1552	}
				1553
				1554	/*
				1555	* Return TRUE if virtual column "vcol" is in the rightmost column of window
				1556	* "wp".
				1557	*/
				1558	int
				1559	in_win_border(wp, vcol)
				1560	win_T *wp;
				1561	colnr_T vcol;
				1562	{
				1563	colnr_T width1; /* width of first line (after line number) */
				1564	colnr_T width2; /* width of further lines */
				1565
				1566	#ifdef FEAT_VERTSPLIT
				1567	if (wp->w_width == 0) /* there is no border */
				1568	return FALSE;
				1569	#endif
				1570	width1 = W_WIDTH(wp) - win_col_off(wp);
				1571	if (vcol < width1 - 1)
				1572	return FALSE;
				1573	if (vcol == width1 - 1)
				1574	return TRUE;
				1575	width2 = width1 + win_col_off2(wp);
				1576	return ((vcol - width1) % width2 == width2 - 1);
				1577	}
				1578	#endif /* FEAT_MBYTE */
				1579
				1580	/*
				1581	* Get virtual column number of pos.
				1582	* start: on the first position of this character (TAB, ctrl)
				1583	* cursor: where the cursor is on this character (first char, except for TAB)
				1584	* end: on the last position of this character (TAB, ctrl)
				1585	*
				1586	* This is used very often, keep it fast!
				1587	*/
				1588	void
				1589	getvcol(wp, pos, start, cursor, end)
				1590	win_T *wp;
				1591	pos_T *pos;
				1592	colnr_T *start;
				1593	colnr_T *cursor;
				1594	colnr_T *end;
				1595	{
				1596	colnr_T vcol;
				1597	char_u ptr; / points to current char */
				1598	char_u posptr; / points to char at pos->col */
				1599	int incr;
				1600	int head;
				1601	int ts = wp->w_buffer->b_p_ts;
				1602	int c;
				1603
				1604	vcol = 0;
				1605	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
				1606	posptr = ptr + pos->col;
				1607
				1608	/*
				1609	* This function is used very often, do some speed optimizations.
				1610	* When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
				1611	* Also use this when 'list' is set but tabs take their normal size.
				1612	*/
				1613	if ((!wp->w_p_list \|\| lcs_tab1 != NUL)
				1614	#ifdef FEAT_LINEBREAK
				1615	&& !wp->w_p_lbr && *p_sbr == NUL
				1616	#endif
				1617	)
				1618	{
				1619	#ifndef FEAT_MBYTE
				1620	head = 0;
				1621	#endif
				1622	for (;;)
				1623	{
				1624	#ifdef FEAT_MBYTE
				1625	head = 0;
				1626	#endif
				1627	c = *ptr;
				1628	/* make sure we don't go past the end of the line */
				1629	if (c == NUL)
				1630	{
				1631	incr = 1; /* NUL at end of line only takes one column */
				1632	break;
				1633	}
				1634	/* A tab gets expanded, depending on the current column */
				1635	if (c == TAB)
				1636	incr = ts - (vcol % ts);
				1637	else
				1638	{
				1639	#ifdef FEAT_MBYTE
				1640	if (has_mbyte)
				1641	{
				1642	/* For utf-8, if the byte is >= 0x80, need to look at
				1643	* further bytes to find the cell width. */
				1644	if (enc_utf8 && c >= 0x80)
				1645	incr = utf_ptr2cells(ptr);
				1646	else
				1647	incr = CHARSIZE(c);
				1648
				1649	/* If a double-cell char doesn't fit at the end of a line
				1650	* it wraps to the next line, it's like this char is three
				1651	* cells wide. */
				1652	if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
				1653	{
				1654	++incr;
				1655	head = 1;
				1656	}
				1657	}
				1658	else
				1659	#endif
				1660	incr = CHARSIZE(c);
				1661	}
				1662
				1663	if (ptr >= posptr) /* character at pos->col */
				1664	break;
				1665
				1666	vcol += incr;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1667	mb_ptr_adv(ptr);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1668	}
				1669	}
				1670	else
				1671	{
				1672	for (;;)
				1673	{
				1674	/* A tab gets expanded, depending on the current column */
				1675	head = 0;
				1676	incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
				1677	/* make sure we don't go past the end of the line */
				1678	if (*ptr == NUL)
				1679	{
				1680	incr = 1; /* NUL at end of line only takes one column */
				1681	break;
				1682	}
				1683
				1684	if (ptr >= posptr) /* character at pos->col */
				1685	break;
				1686
				1687	vcol += incr;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1688	mb_ptr_adv(ptr);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1689	}
				1690	}
				1691	if (start != NULL)
				1692	*start = vcol + head;
				1693	if (end != NULL)
				1694	*end = vcol + incr - 1;
				1695	if (cursor != NULL)
				1696	{
				1697	if (*ptr == TAB
				1698	&& (State & NORMAL)
				1699	&& !wp->w_p_list
				1700	&& !virtual_active()
				1701	#ifdef FEAT_VISUAL
				1702	&& !(VIsual_active
				1703	&& (p_sel == 'e' \|\| ltoreq(pos, VIsual)))
				1704	#endif
				1705	)
				1706	cursor = vcol + incr - 1; / cursor at end */
				1707	else
				1708	cursor = vcol + head; / cursor at start */
				1709	}
				1710	}
				1711
				1712	/*
				1713	* Get virtual cursor column in the current window, pretending 'list' is off.
				1714	*/
				1715	colnr_T
				1716	getvcol_nolist(posp)
				1717	pos_T *posp;
				1718	{
				1719	int list_save = curwin->w_p_list;
				1720	colnr_T vcol;
				1721
				1722	curwin->w_p_list = FALSE;
				1723	getvcol(curwin, posp, NULL, &vcol, NULL);
				1724	curwin->w_p_list = list_save;
				1725	return vcol;
				1726	}
				1727
				1728	#if defined(FEAT_VIRTUALEDIT) \|\| defined(PROTO)
				1729	/*
				1730	* Get virtual column in virtual mode.
				1731	*/
				1732	void
				1733	getvvcol(wp, pos, start, cursor, end)
				1734	win_T *wp;
				1735	pos_T *pos;
				1736	colnr_T *start;
				1737	colnr_T *cursor;
				1738	colnr_T *end;
				1739	{
				1740	colnr_T col;
				1741	colnr_T coladd;
				1742	colnr_T endadd;
				1743	# ifdef FEAT_MBYTE
				1744	char_u *ptr;
				1745	# endif
				1746
				1747	if (virtual_active())
				1748	{
				1749	/* For virtual mode, only want one value */
				1750	getvcol(wp, pos, &col, NULL, NULL);
				1751
				1752	coladd = pos->coladd;
				1753	endadd = 0;
				1754	# ifdef FEAT_MBYTE
				1755	/* Cannot put the cursor on part of a wide character. */
				1756	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
				1757	if (pos->col < STRLEN(ptr))
				1758	{
				1759	int c = (*mb_ptr2char)(ptr + pos->col);
				1760
				1761	if (c != TAB && vim_isprintc(c))
				1762	{
				1763	endadd = char2cells(c) - 1;
				1764	if (coladd >= endadd)
				1765	coladd -= endadd;
				1766	else
				1767	coladd = 0;
				1768	}
				1769	}
				1770	# endif
				1771	col += coladd;
				1772	if (start != NULL)
				1773	*start = col;
				1774	if (cursor != NULL)
				1775	*cursor = col;
				1776	if (end != NULL)
				1777	*end = col + endadd;
				1778	}
				1779	else
				1780	getvcol(wp, pos, start, cursor, end);
				1781	}
				1782	#endif
				1783
				1784	#if defined(FEAT_VISUAL) \|\| defined(PROTO)
				1785	/*
				1786	* Get the leftmost and rightmost virtual column of pos1 and pos2.
				1787	* Used for Visual block mode.
				1788	*/
				1789	void
				1790	getvcols(wp, pos1, pos2, left, right)
				1791	win_T *wp;
				1792	pos_T pos1, pos2;
				1793	colnr_T left, right;
				1794	{
				1795	colnr_T from1, from2, to1, to2;
				1796
				1797	if (ltp(pos1, pos2))
				1798	{
				1799	getvvcol(wp, pos1, &from1, NULL, &to1);
				1800	getvvcol(wp, pos2, &from2, NULL, &to2);
				1801	}
				1802	else
				1803	{
				1804	getvvcol(wp, pos2, &from1, NULL, &to1);
				1805	getvvcol(wp, pos1, &from2, NULL, &to2);
				1806	}
				1807	if (from2 < from1)
				1808	*left = from2;
				1809	else
				1810	*left = from1;
				1811	if (to2 > to1)
				1812	{
				1813	if (*p_sel == 'e' && from2 - 1 >= to1)
				1814	*right = from2 - 1;
				1815	else
				1816	*right = to2;
				1817	}
				1818	else
				1819	*right = to1;
				1820	}
				1821	#endif
				1822
				1823	/*
				1824	* skipwhite: skip over ' ' and '\t'.
				1825	*/
				1826	char_u *
				1827	skipwhite(p)
				1828	char_u *p;
				1829	{
				1830	while (vim_iswhite(p)) / skip to next non-white */
				1831	++p;
				1832	return p;
				1833	}
				1834
				1835	/*
				1836	* skipdigits: skip over digits;
				1837	*/
				1838	char_u *
				1839	skipdigits(p)
				1840	char_u *p;
				1841	{
				1842	while (VIM_ISDIGIT(p)) / skip to next non-digit */
				1843	++p;
				1844	return p;
				1845	}
				1846
				1847	/*
				1848	* Variant of isdigit() that can handle characters > 0x100.
				1849	* We don't use isdigit() here, because on some systems it also considers
				1850	* superscript 1 to be a digit.
				1851	* Use the VIM_ISDIGIT() macro for simple arguments.
				1852	*/
				1853	int
				1854	vim_isdigit(c)
				1855	int c;
				1856	{
				1857	return (c >= '0' && c <= '9');
				1858	}
				1859
				1860	/*
				1861	* Variant of isxdigit() that can handle characters > 0x100.
				1862	* We don't use isxdigit() here, because on some systems it also considers
				1863	* superscript 1 to be a digit.
				1864	*/
				1865	int
				1866	vim_isxdigit(c)
				1867	int c;
				1868	{
				1869	return (c >= '0' && c <= '9')
				1870	\|\| (c >= 'a' && c <= 'f')
				1871	\|\| (c >= 'A' && c <= 'F');
				1872	}
				1873
				1874	/*
				1875	* skiptowhite: skip over text until ' ' or '\t' or NUL.
				1876	*/
				1877	char_u *
				1878	skiptowhite(p)
				1879	char_u *p;
				1880	{
				1881	while (p != ' ' && p != '\t' && *p != NUL)
				1882	++p;
				1883	return p;
				1884	}
				1885
				1886	#if defined(FEAT_LISTCMDS) \|\| defined(FEAT_SIGNS) \|\| defined(FEAT_SNIFF) \
				1887	\|\| defined(PROTO)
				1888	/*
				1889	* skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
				1890	*/
				1891	char_u *
				1892	skiptowhite_esc(p)
				1893	char_u *p;
				1894	{
				1895	while (p != ' ' && p != '\t' && *p != NUL)
				1896	{
				1897	if ((p == '\\' \|\| p == Ctrl_V) && *(p + 1) != NUL)
				1898	++p;
				1899	++p;
				1900	}
				1901	return p;
				1902	}
				1903	#endif
				1904
				1905	/*
				1906	* Getdigits: Get a number from a string and skip over it.
				1907	* Note: the argument is a pointer to a char_u pointer!
				1908	*/
				1909	long
				1910	getdigits(pp)
				1911	char_u **pp;
				1912	{
				1913	char_u *p;
				1914	long retval;
				1915
				1916	p = *pp;
				1917	retval = atol((char *)p);
				1918	if (p == '-') / skip negative sign */
				1919	++p;
				1920	p = skipdigits(p); /* skip to next non-digit */
				1921	*pp = p;
				1922	return retval;
				1923	}
				1924
				1925	/*
				1926	* Return TRUE if "lbuf" is empty or only contains blanks.
				1927	*/
				1928	int
				1929	vim_isblankline(lbuf)
				1930	char_u *lbuf;
				1931	{
				1932	char_u *p;
				1933
				1934	p = skipwhite(lbuf);
				1935	return (p == NUL \|\| p == '\r' \|\| *p == '\n');
				1936	}
				1937
				1938	/*
				1939	* Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar	2df6dcc	2004-07-12 15:53:54 +0000	[diff] [blame]	1940	* hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1941	* If "hexp" is not NULL, returns a flag to indicate the type of the number:
				1942	* 0 decimal
				1943	* '0' octal
				1944	* 'X' hex
				1945	* 'x' hex
				1946	* If "len" is not NULL, the length of the number in characters is returned.
				1947	* If "nptr" is not NULL, the signed result is returned in it.
				1948	* If "unptr" is not NULL, the unsigned result is returned in it.
				1949	*/
				1950	void
				1951	vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
				1952	char_u *start;
				1953	int hexp; / return: type of number 0 = decimal, 'x'
				1954	or 'X' is hex, '0' = octal */
				1955	int len; / return: detected length of number */
				1956	int dooct; /* recognize octal number */
				1957	int dohex; /* recognize hex number */
				1958	long nptr; / return: signed result */
				1959	unsigned long unptr; / return: unsigned result */
				1960	{
				1961	char_u *ptr = start;
				1962	int hex = 0; /* default is decimal */
				1963	int negative = FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1964	unsigned long un = 0;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1965	int n;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1966
				1967	if (ptr[0] == '-')
				1968	{
				1969	negative = TRUE;
				1970	++ptr;
				1971	}
				1972
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1973	/* Recognize hex and octal. */
				1974	if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1975	{
				1976	hex = ptr[1];
				1977	if (dohex && (hex == 'X' \|\| hex == 'x') && vim_isxdigit(ptr[2]))
				1978	ptr += 2; /* hexadecimal */
				1979	else
				1980	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1981	hex = 0; /* default is decimal */
				1982	if (dooct)
				1983	{
				1984	/* Don't interpret "0", "08" or "0129" as octal. */
				1985	for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
				1986	{
				1987	if (ptr[n] > '7')
				1988	{
				1989	hex = 0; /* can't be octal */
				1990	break;
				1991	}
				1992	if (ptr[n] > '0')
				1993	hex = '0'; /* assume octal */
				1994	}
				1995	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1996	}
				1997	}
				1998
				1999	/*
				2000	* Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
				2001	*/
				2002	if (hex)
				2003	{
				2004	if (hex == '0')
				2005	{
				2006	/* octal */
				2007	while ('0' <= ptr && ptr <= '7')
				2008	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2009	un = 8 * un + (unsigned long)(*ptr - '0');
				2010	++ptr;
				2011	}
				2012	}
				2013	else
				2014	{
				2015	/* hex */
				2016	while (vim_isxdigit(*ptr))
				2017	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2018	un = 16 * un + (unsigned long)hex2nr(*ptr);
				2019	++ptr;
				2020	}
				2021	}
				2022	}
				2023	else
				2024	{
				2025	/* decimal */
				2026	while (VIM_ISDIGIT(*ptr))
				2027	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2028	un = 10 * un + (unsigned long)(*ptr - '0');
				2029	++ptr;
				2030	}
				2031	}
				2032
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2033	if (hexp != NULL)
				2034	*hexp = hex;
				2035	if (len != NULL)
				2036	*len = (int)(ptr - start);
				2037	if (nptr != NULL)
Bram Moolenaar	2df6dcc	2004-07-12 15:53:54 +0000	[diff] [blame]	2038	{
				2039	if (negative) /* account for leading '-' for decimal numbers */
				2040	*nptr = -(long)un;
				2041	else
				2042	*nptr = (long)un;
				2043	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2044	if (unptr != NULL)
				2045	*unptr = un;
				2046	}
				2047
				2048	/*
				2049	* Return the value of a single hex character.
				2050	* Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
				2051	*/
				2052	int
				2053	hex2nr(c)
				2054	int c;
				2055	{
				2056	if (c >= 'a' && c <= 'f')
				2057	return c - 'a' + 10;
				2058	if (c >= 'A' && c <= 'F')
				2059	return c - 'A' + 10;
				2060	return c - '0';
				2061	}
				2062
				2063	#if defined(FEAT_TERMRESPONSE) \
				2064	\|\| (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) \|\| defined(PROTO)
				2065	/*
				2066	* Convert two hex characters to a byte.
				2067	* Return -1 if one of the characters is not hex.
				2068	*/
				2069	int
				2070	hexhex2nr(p)
				2071	char_u *p;
				2072	{
				2073	if (!vim_isxdigit(p[0]) \|\| !vim_isxdigit(p[1]))
				2074	return -1;
				2075	return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
				2076	}
				2077	#endif
				2078
				2079	/*
				2080	* Return TRUE if "str" starts with a backslash that should be removed.
				2081	* For MS-DOS, WIN32 and OS/2 this is only done when the character after the
				2082	* backslash is not a normal file name character.
				2083	* '$' is a valid file name character, we don't remove the backslash before
				2084	* it. This means it is not possible to use an environment variable after a
				2085	* backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
				2086	* Although "\ name" is valid, the backslash in "Program\ files" must be
				2087	* removed. Assume a file name doesn't start with a space.
				2088	* For multi-byte names, never remove a backslash before a non-ascii
				2089	* character, assume that all multi-byte characters are valid file name
				2090	* characters.
				2091	*/
				2092	int
				2093	rem_backslash(str)
				2094	char_u *str;
				2095	{
				2096	#ifdef BACKSLASH_IN_FILENAME
				2097	return (str[0] == '\\'
				2098	# ifdef FEAT_MBYTE
				2099	&& str[1] < 0x80
				2100	# endif
				2101	&& (str[1] == ' '
				2102	\|\| (str[1] != NUL
				2103	&& str[1] != '*'
				2104	&& str[1] != '?'
				2105	&& !vim_isfilec(str[1]))));
				2106	#else
				2107	return (str[0] == '\\' && str[1] != NUL);
				2108	#endif
				2109	}
				2110
				2111	/*
				2112	* Halve the number of backslashes in a file name argument.
				2113	* For MS-DOS we only do this if the character after the backslash
				2114	* is not a normal file character.
				2115	*/
				2116	void
				2117	backslash_halve(p)
				2118	char_u *p;
				2119	{
				2120	for ( ; *p; ++p)
				2121	if (rem_backslash(p))
				2122	STRCPY(p, p + 1);
				2123	}
				2124
				2125	/*
				2126	* backslash_halve() plus save the result in allocated memory.
				2127	*/
				2128	char_u *
				2129	backslash_halve_save(p)
				2130	char_u *p;
				2131	{
				2132	char_u *res;
				2133
				2134	res = vim_strsave(p);
				2135	if (res == NULL)
				2136	return p;
				2137	backslash_halve(res);
				2138	return res;
				2139	}
				2140
				2141	#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) \|\| defined(PROTO)
				2142	/*
				2143	* Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
				2144	* The first 64 entries have been added to map control characters defined in
				2145	* ascii.h
				2146	*/
				2147	static char_u ebcdic2ascii_tab[256] =
				2148	{
				2149	0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
				2150	0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
				2151	0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
				2152	0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
				2153	0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
				2154	0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
				2155	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
				2156	0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
				2157	0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
				2158	0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
				2159	0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
				2160	0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
				2161	0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
				2162	0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
				2163	0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
				2164	0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
				2165	0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
				2166	0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
				2167	0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
				2168	0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
				2169	0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
				2170	0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
				2171	0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
				2172	0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
				2173	0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
				2174	0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
				2175	0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
				2176	0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
				2177	0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
				2178	0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
				2179	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
				2180	0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
				2181	};
				2182
				2183	/*
				2184	* Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
				2185	* wanting 7-bit ASCII characters out the other end.
				2186	*/
				2187	void
				2188	ebcdic2ascii(buffer, len)
				2189	char_u *buffer;
				2190	int len;
				2191	{
				2192	int i;
				2193
				2194	for (i = 0; i < len; i++)
				2195	buffer[i] = ebcdic2ascii_tab[buffer[i]];
				2196	}
				2197	#endif