Blame - src/charset.c - android_external_vim

blob: c198284476fdca3a5eb8f3e6455400246878d066 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	#include "vim.h"
				11
				12	#ifdef FEAT_LINEBREAK
				13	static int win_chartabsize __ARGS((win_T wp, char_u p, colnr_T col));
				14	#endif
				15
				16	#ifdef FEAT_MBYTE
				17	static int win_nolbr_chartabsize __ARGS((win_T wp, char_u s, colnr_T col, int *headp));
				18	#endif
				19
				20	static int nr2hex __ARGS((int c));
				21
				22	static int chartab_initialized = FALSE;
				23
				24	/* b_chartab[] is an array of 32 bytes, each bit representing one of the
				25	* characters 0-255. */
				26	#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] \|= (1 << ((c) & 0x7))
				27	#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
				28	#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
				29
				30	/*
				31	* Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
				32	* characters for current buffer.
				33	*
				34	* Depends on the option settings 'iskeyword', 'isident', 'isfname',
				35	* 'isprint' and 'encoding'.
				36	*
				37	* The index in chartab[] depends on 'encoding':
				38	* - For non-multi-byte index with the byte (same as the character).
				39	* - For DBCS index with the first byte.
				40	* - For UTF-8 index with the character (when first byte is up to 0x80 it is
				41	* the same as the character, if the first byte is 0x80 and above it depends
				42	* on further bytes).
				43	*
				44	* The contents of chartab[]:
				45	* - The lower two bits, masked by CT_CELL_MASK, give the number of display
				46	* cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
				47	* - CT_PRINT_CHAR bit is set when the character is printable (no need to
				48	* translate the character before displaying it). Note that only DBCS
				49	* characters can have 2 display cells and still be printable.
				50	* - CT_FNAME_CHAR bit is set when the character can be in a file name.
				51	* - CT_ID_CHAR bit is set when the character can be in an identifier.
				52	*
				53	* Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
				54	* error, OK otherwise.
				55	*/
				56	int
				57	init_chartab()
				58	{
				59	return buf_init_chartab(curbuf, TRUE);
				60	}
				61
				62	int
				63	buf_init_chartab(buf, global)
				64	buf_T *buf;
				65	int global; /* FALSE: only set buf->b_chartab[] */
				66	{
				67	int c;
				68	int c2;
				69	char_u *p;
				70	int i;
				71	int tilde;
				72	int do_isalpha;
				73
				74	if (global)
				75	{
				76	/*
				77	* Set the default size for printable characters:
				78	* From <Space> to '~' is 1 (printable), others are 2 (not printable).
				79	* This also inits all 'isident' and 'isfname' flags to FALSE.
				80	*
				81	* EBCDIC: all chars below ' ' are not printable, all others are
				82	* printable.
				83	*/
				84	c = 0;
				85	while (c < ' ')
				86	chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
				87	#ifdef EBCDIC
				88	while (c < 255)
				89	#else
				90	while (c <= '~')
				91	#endif
				92	chartab[c++] = 1 + CT_PRINT_CHAR;
				93	#ifdef FEAT_FKMAP
				94	if (p_altkeymap)
				95	{
				96	while (c < YE)
				97	chartab[c++] = 1 + CT_PRINT_CHAR;
				98	}
				99	#endif
				100	while (c < 256)
				101	{
				102	#ifdef FEAT_MBYTE
				103	/* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
				104	if (enc_utf8 && c >= 0xa0)
				105	chartab[c++] = CT_PRINT_CHAR + 1;
				106	/* euc-jp characters starting with 0x8e are single width */
				107	else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
				108	chartab[c++] = CT_PRINT_CHAR + 1;
				109	/* other double-byte chars can be printable AND double-width */
				110	else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
				111	chartab[c++] = CT_PRINT_CHAR + 2;
				112	else
				113	#endif
				114	/* the rest is unprintable by default */
				115	chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
				116	}
				117
				118	#ifdef FEAT_MBYTE
				119	/* Assume that every multi-byte char is a filename character. */
				120	for (c = 1; c < 256; ++c)
				121	if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
				122	\|\| (enc_dbcs == DBCS_JPNU && c == 0x8e)
				123	\|\| (enc_utf8 && c >= 0xa0))
				124	chartab[c] \|= CT_FNAME_CHAR;
				125	#endif
				126	}
				127
				128	/*
				129	* Init word char flags all to FALSE
				130	*/
				131	vim_memset(buf->b_chartab, 0, (size_t)32);
				132	#ifdef FEAT_MBYTE
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	133	if (enc_dbcs != 0)
				134	for (c = 0; c < 256; ++c)
				135	{
				136	/* double-byte characters are probably word characters */
				137	if (MB_BYTE2LEN(c) == 2)
				138	SET_CHARTAB(buf, c);
				139	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	140	#endif
				141
				142	#ifdef FEAT_LISP
				143	/*
				144	* In lisp mode the '-' character is included in keywords.
				145	*/
				146	if (buf->b_p_lisp)
				147	SET_CHARTAB(buf, '-');
				148	#endif
				149
				150	/* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
				151	* options Each option is a list of characters, character numbers or
				152	* ranges, separated by commas, e.g.: "200-210,x,#-178,-"
				153	*/
				154	for (i = global ? 0 : 3; i <= 3; ++i)
				155	{
				156	if (i == 0)
				157	p = p_isi; /* first round: 'isident' */
				158	else if (i == 1)
				159	p = p_isp; /* second round: 'isprint' */
				160	else if (i == 2)
				161	p = p_isf; /* third round: 'isfname' */
				162	else /* i == 3 */
				163	p = buf->b_p_isk; /* fourth round: 'iskeyword' */
				164
				165	while (*p)
				166	{
				167	tilde = FALSE;
				168	do_isalpha = FALSE;
				169	if (*p == '^' && p[1] != NUL)
				170	{
				171	tilde = TRUE;
				172	++p;
				173	}
				174	if (VIM_ISDIGIT(*p))
				175	c = getdigits(&p);
				176	else
				177	c = *p++;
				178	c2 = -1;
				179	if (*p == '-' && p[1] != NUL)
				180	{
				181	++p;
				182	if (VIM_ISDIGIT(*p))
				183	c2 = getdigits(&p);
				184	else
				185	c2 = *p++;
				186	}
				187	if (c <= 0 \|\| (c2 < c && c2 != -1) \|\| c2 >= 256
				188	\|\| !(p == NUL \|\| p == ','))
				189	return FAIL;
				190
				191	if (c2 == -1) /* not a range */
				192	{
				193	/*
				194	* A single '@' (not "@-@"):
				195	* Decide on letters being ID/printable/keyword chars with
				196	* standard function isalpha(). This takes care of locale for
				197	* single-byte characters).
				198	*/
				199	if (c == '@')
				200	{
				201	do_isalpha = TRUE;
				202	c = 1;
				203	c2 = 255;
				204	}
				205	else
				206	c2 = c;
				207	}
				208	while (c <= c2)
				209	{
				210	if (!do_isalpha \|\| isalpha(c)
				211	#ifdef FEAT_FKMAP
				212	\|\| (p_altkeymap && (F_isalpha(c) \|\| F_isdigit(c)))
				213	#endif
				214	)
				215	{
				216	if (i == 0) /* (re)set ID flag */
				217	{
				218	if (tilde)
				219	chartab[c] &= ~CT_ID_CHAR;
				220	else
				221	chartab[c] \|= CT_ID_CHAR;
				222	}
				223	else if (i == 1) /* (re)set printable */
				224	{
				225	if ((c < ' '
				226	#ifndef EBCDIC
				227	\|\| c > '~'
				228	#endif
				229	#ifdef FEAT_FKMAP
				230	\|\| (p_altkeymap
				231	&& (F_isalpha(c) \|\| F_isdigit(c)))
				232	#endif
				233	)
				234	#ifdef FEAT_MBYTE
				235	/* For double-byte we keep the cell width, so
				236	* that we can detect it from the first byte. */
				237	&& !(enc_dbcs && MB_BYTE2LEN(c) == 2)
				238	#endif
				239	)
				240	{
				241	if (tilde)
				242	{
				243	chartab[c] = (chartab[c] & ~CT_CELL_MASK)
				244	+ ((dy_flags & DY_UHEX) ? 4 : 2);
				245	chartab[c] &= ~CT_PRINT_CHAR;
				246	}
				247	else
				248	{
				249	chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
				250	chartab[c] \|= CT_PRINT_CHAR;
				251	}
				252	}
				253	}
				254	else if (i == 2) /* (re)set fname flag */
				255	{
				256	if (tilde)
				257	chartab[c] &= ~CT_FNAME_CHAR;
				258	else
				259	chartab[c] \|= CT_FNAME_CHAR;
				260	}
				261	else /* i == 3 / / (re)set keyword flag */
				262	{
				263	if (tilde)
				264	RESET_CHARTAB(buf, c);
				265	else
				266	SET_CHARTAB(buf, c);
				267	}
				268	}
				269	++c;
				270	}
				271	p = skip_to_option_part(p);
				272	}
				273	}
				274	chartab_initialized = TRUE;
				275	return OK;
				276	}
				277
				278	/*
				279	* Translate any special characters in buf[bufsize] in-place.
				280	* The result is a string with only printable characters, but if there is not
				281	* enough room, not all characters will be translated.
				282	*/
				283	void
				284	trans_characters(buf, bufsize)
				285	char_u *buf;
				286	int bufsize;
				287	{
				288	int len; /* length of string needing translation */
				289	int room; /* room in buffer after string */
				290	char_u trs; / translated character */
				291	int trs_len; /* length of trs[] */
				292
				293	len = (int)STRLEN(buf);
				294	room = bufsize - len;
				295	while (*buf != 0)
				296	{
				297	# ifdef FEAT_MBYTE
				298	/* Assume a multi-byte character doesn't need translation. */
				299	if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
				300	len -= trs_len;
				301	else
				302	# endif
				303	{
				304	trs = transchar_byte(*buf);
				305	trs_len = (int)STRLEN(trs);
				306	if (trs_len > 1)
				307	{
				308	room -= trs_len - 1;
				309	if (room <= 0)
				310	return;
				311	mch_memmove(buf + trs_len, buf + 1, (size_t)len);
				312	}
				313	mch_memmove(buf, trs, (size_t)trs_len);
				314	--len;
				315	}
				316	buf += trs_len;
				317	}
				318	}
				319
				320	#if defined(FEAT_EVAL) \|\| defined(FEAT_TITLE) \|\| defined(PROTO)
				321	/*
				322	* Translate a string into allocated memory, replacing special chars with
				323	* printable chars. Returns NULL when out of memory.
				324	*/
				325	char_u *
				326	transstr(s)
				327	char_u *s;
				328	{
				329	char_u *res;
				330	char_u *p;
				331	#ifdef FEAT_MBYTE
				332	int l, len, c;
				333	char_u hexbuf[11];
				334	#endif
				335
				336	#ifdef FEAT_MBYTE
				337	if (has_mbyte)
				338	{
				339	/* Compute the length of the result, taking account of unprintable
				340	* multi-byte characters. */
				341	len = 0;
				342	p = s;
				343	while (*p != NUL)
				344	{
				345	if ((l = (*mb_ptr2len_check)(p)) > 1)
				346	{
				347	c = (*mb_ptr2char)(p);
				348	p += l;
				349	if (vim_isprintc(c))
				350	len += l;
				351	else
				352	{
				353	transchar_hex(hexbuf, c);
				354	len += STRLEN(hexbuf);
				355	}
				356	}
				357	else
				358	{
				359	l = byte2cells(*p++);
				360	if (l > 0)
				361	len += l;
				362	else
				363	len += 4; /* illegal byte sequence */
				364	}
				365	}
				366	res = alloc((unsigned)(len + 1));
				367	}
				368	else
				369	#endif
				370	res = alloc((unsigned)(vim_strsize(s) + 1));
				371	if (res != NULL)
				372	{
				373	*res = NUL;
				374	p = s;
				375	while (*p != NUL)
				376	{
				377	#ifdef FEAT_MBYTE
				378	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				379	{
				380	c = (*mb_ptr2char)(p);
				381	if (vim_isprintc(c))
				382	STRNCAT(res, p, l); /* append printable multi-byte char */
				383	else
				384	transchar_hex(res + STRLEN(res), c);
				385	p += l;
				386	}
				387	else
				388	#endif
				389	STRCAT(res, transchar_byte(*p++));
				390	}
				391	}
				392	return res;
				393	}
				394	#endif
				395
				396	#if defined(FEAT_SYN_HL) \|\| defined(FEAT_INS_EXPAND) \|\| defined(PROTO)
				397	/*
Bram Moolenaar	217ad92	2005-03-20 22:37:15 +0000	[diff] [blame]	398	* Convert the string "str[orglen]" to do ignore-case comparing. Uses the
				399	* current locale.
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	400	* When "buf" is NULL returns an allocated string (NULL for out-of-memory).
				401	* Otherwise puts the result in "buf[buflen]".
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	402	*/
				403	char_u *
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	404	str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	405	char_u *str;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	406	int orglen;
				407	char_u *buf;
				408	int buflen;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	409	{
				410	garray_T ga;
				411	int i;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	412	int len = orglen;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	413
				414	#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
				415	#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	416	#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
				417	#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	418
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	419	/* Copy "str" into "buf" or allocated memory, unmodified. */
				420	if (buf == NULL)
				421	{
				422	ga_init2(&ga, 1, 10);
				423	if (ga_grow(&ga, len + 1) == FAIL)
				424	return NULL;
				425	mch_memmove(ga.ga_data, str, (size_t)len);
				426	ga.ga_len = len;
				427	}
				428	else
				429	{
				430	if (len >= buflen) /* Ugly! */
				431	len = buflen - 1;
				432	mch_memmove(buf, str, (size_t)len);
				433	}
				434	if (buf == NULL)
				435	GA_CHAR(len) = NUL;
				436	else
				437	buf[len] = NUL;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	438
				439	/* Make each character lower case. */
				440	i = 0;
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	441	while (STR_CHAR(i) != NUL)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	442	{
				443	#ifdef FEAT_MBYTE
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	444	if (enc_utf8 \|\| (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	445	{
				446	if (enc_utf8)
				447	{
				448	int c, lc;
				449
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	450	c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	451	lc = utf_tolower(c);
				452	if (c != lc)
				453	{
				454	int ol = utf_char2len(c);
				455	int nl = utf_char2len(lc);
				456
				457	/* If the byte length changes need to shift the following
				458	* characters forward or backward. */
				459	if (ol != nl)
				460	{
				461	if (nl > ol)
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	462	{
				463	if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
				464	: len + nl - ol >= buflen)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	465	{
				466	/* out of memory, keep old char */
				467	lc = c;
				468	nl = ol;
				469	}
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	470	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	471	if (ol != nl)
				472	{
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	473	if (buf == NULL)
				474	{
				475	mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	476	STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	477	ga.ga_len += nl - ol;
				478	}
				479	else
				480	{
				481	mch_memmove(buf + i + nl, buf + i + ol,
				482	STRLEN(buf + i + ol) + 1);
				483	len += nl - ol;
				484	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	485	}
				486	}
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	487	(void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	488	}
				489	}
				490	/* skip to next multi-byte char */
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	491	i += (*mb_ptr2len_check)(STR_PTR(i));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	492	}
				493	else
				494	#endif
				495	{
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	496	if (buf == NULL)
				497	GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
				498	else
				499	buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	500	++i;
				501	}
				502	}
				503
Bram Moolenaar	6ebb114	2005-01-25 21:58:26 +0000	[diff] [blame]	504	if (buf == NULL)
				505	return (char_u *)ga.ga_data;
				506	return buf;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	507	}
				508	#endif
				509
				510	/*
				511	* Catch 22: chartab[] can't be initialized before the options are
				512	* initialized, and initializing options may cause transchar() to be called!
				513	* When chartab_initialized == FALSE don't use chartab[].
				514	* Does NOT work for multi-byte characters, c must be <= 255.
				515	* Also doesn't work for the first byte of a multi-byte, "c" must be a
				516	* character!
				517	*/
				518	static char_u transchar_buf[7];
				519
				520	char_u *
				521	transchar(c)
				522	int c;
				523	{
				524	int i;
				525
				526	i = 0;
				527	if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
				528	{
				529	transchar_buf[0] = '~';
				530	transchar_buf[1] = '@';
				531	i = 2;
				532	c = K_SECOND(c);
				533	}
				534
				535	if ((!chartab_initialized && (
				536	#ifdef EBCDIC
				537	(c >= 64 && c < 255)
				538	#else
				539	(c >= ' ' && c <= '~')
				540	#endif
				541	#ifdef FEAT_FKMAP
				542	\|\| F_ischar(c)
				543	#endif
				544	)) \|\| (c < 256 && vim_isprintc_strict(c)))
				545	{
				546	/* printable character */
				547	transchar_buf[i] = c;
				548	transchar_buf[i + 1] = NUL;
				549	}
				550	else
				551	transchar_nonprint(transchar_buf + i, c);
				552	return transchar_buf;
				553	}
				554
				555	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				556	/*
				557	* Like transchar(), but called with a byte instead of a character. Checks
				558	* for an illegal UTF-8 byte.
				559	*/
				560	char_u *
				561	transchar_byte(c)
				562	int c;
				563	{
				564	if (enc_utf8 && c >= 0x80)
				565	{
				566	transchar_nonprint(transchar_buf, c);
				567	return transchar_buf;
				568	}
				569	return transchar(c);
				570	}
				571	#endif
				572
				573	/*
				574	* Convert non-printable character to two or more printable characters in
				575	* "buf[]". "buf" needs to be able to hold five bytes.
				576	* Does NOT work for multi-byte characters, c must be <= 255.
				577	*/
				578	void
				579	transchar_nonprint(buf, c)
				580	char_u *buf;
				581	int c;
				582	{
				583	if (c == NL)
				584	c = NUL; /* we use newline in place of a NUL */
				585	else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
				586	c = NL; /* we use CR in place of NL in this case */
				587
				588	if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
				589	transchar_hex(buf, c);
				590
				591	#ifdef EBCDIC
				592	/* For EBCDIC only the characters 0-63 and 255 are not printable */
				593	else if (CtrlChar(c) != 0 \|\| c == DEL)
				594	#else
				595	else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
				596	#endif
				597	{
				598	buf[0] = '^';
				599	#ifdef EBCDIC
				600	if (c == DEL)
				601	buf[1] = '?'; /* DEL displayed as ^? */
				602	else
				603	buf[1] = CtrlChar(c);
				604	#else
				605	buf[1] = c ^ 0x40; /* DEL displayed as ^? */
				606	#endif
				607
				608	buf[2] = NUL;
				609	}
				610	#ifdef FEAT_MBYTE
				611	else if (enc_utf8 && c >= 0x80)
				612	{
				613	transchar_hex(buf, c);
				614	}
				615	#endif
				616	#ifndef EBCDIC
				617	else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
				618	{
				619	buf[0] = '\|';
				620	buf[1] = c - 0x80;
				621	buf[2] = NUL;
				622	}
				623	#else
				624	else if (c < 64)
				625	{
				626	buf[0] = '~';
				627	buf[1] = MetaChar(c);
				628	buf[2] = NUL;
				629	}
				630	#endif
				631	else /* 0x80 - 0x9f and 0xff */
				632	{
				633	/*
				634	* TODO: EBCDIC I don't know what to do with this chars, so I display
				635	* them as '~?' for now
				636	*/
				637	buf[0] = '~';
				638	#ifdef EBCDIC
				639	buf[1] = '?'; /* 0xff displayed as ~? */
				640	#else
				641	buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
				642	#endif
				643	buf[2] = NUL;
				644	}
				645	}
				646
				647	void
				648	transchar_hex(buf, c)
				649	char_u *buf;
				650	int c;
				651	{
				652	int i = 0;
				653
				654	buf[0] = '<';
				655	#ifdef FEAT_MBYTE
				656	if (c > 255)
				657	{
				658	buf[++i] = nr2hex((unsigned)c >> 12);
				659	buf[++i] = nr2hex((unsigned)c >> 8);
				660	}
				661	#endif
				662	buf[++i] = nr2hex((unsigned)c >> 4);
				663	buf[++i] = nr2hex(c);
				664	buf[++i] = '>';
				665	buf[++i] = NUL;
				666	}
				667
				668	/*
				669	* Convert the lower 4 bits of byte "c" to its hex character.
				670	* Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
				671	* function key 1.
				672	*/
				673	static int
				674	nr2hex(c)
				675	int c;
				676	{
				677	if ((c & 0xf) <= 9)
				678	return (c & 0xf) + '0';
				679	return (c & 0xf) - 10 + 'a';
				680	}
				681
				682	/*
				683	* Return number of display cells occupied by byte "b".
				684	* Caller must make sure 0 <= b <= 255.
				685	* For multi-byte mode "b" must be the first byte of a character.
				686	* A TAB is counted as two cells: "^I".
				687	* For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
				688	* cells depends on further bytes.
				689	*/
				690	int
				691	byte2cells(b)
				692	int b;
				693	{
				694	#ifdef FEAT_MBYTE
				695	if (enc_utf8 && b >= 0x80)
				696	return 0;
				697	#endif
				698	return (chartab[b] & CT_CELL_MASK);
				699	}
				700
				701	/*
				702	* Return number of display cells occupied by character "c".
				703	* "c" can be a special key (negative number) in which case 3 or 4 is returned.
				704	* A TAB is counted as two cells: "^I" or four: "<09>".
				705	*/
				706	int
				707	char2cells(c)
				708	int c;
				709	{
				710	if (IS_SPECIAL(c))
				711	return char2cells(K_SECOND(c)) + 2;
				712	#ifdef FEAT_MBYTE
				713	if (c >= 0x80)
				714	{
				715	/* UTF-8: above 0x80 need to check the value */
				716	if (enc_utf8)
				717	return utf_char2cells(c);
				718	/* DBCS: double-byte means double-width, except for euc-jp with first
				719	* byte 0x8e */
				720	if (enc_dbcs != 0 && c >= 0x100)
				721	{
				722	if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
				723	return 1;
				724	return 2;
				725	}
				726	}
				727	#endif
				728	return (chartab[c & 0xff] & CT_CELL_MASK);
				729	}
				730
				731	/*
				732	* Return number of display cells occupied by character at "*p".
				733	* A TAB is counted as two cells: "^I" or four: "<09>".
				734	*/
				735	int
				736	ptr2cells(p)
				737	char_u *p;
				738	{
				739	#ifdef FEAT_MBYTE
				740	/* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
				741	if (enc_utf8 && *p >= 0x80)
				742	return utf_ptr2cells(p);
				743	/* For DBCS we can tell the cell count from the first byte. */
				744	#endif
				745	return (chartab[*p] & CT_CELL_MASK);
				746	}
				747
				748	/*
				749	* Return the number of characters string "s" will take on the screen,
				750	* counting TABs as two characters: "^I".
				751	*/
				752	int
				753	vim_strsize(s)
				754	char_u *s;
				755	{
				756	return vim_strnsize(s, (int)MAXCOL);
				757	}
				758
				759	/*
				760	* Return the number of characters string "s[len]" will take on the screen,
				761	* counting TABs as two characters: "^I".
				762	*/
				763	int
				764	vim_strnsize(s, len)
				765	char_u *s;
				766	int len;
				767	{
				768	int size = 0;
				769
				770	while (*s != NUL && --len >= 0)
				771	{
				772	#ifdef FEAT_MBYTE
				773	if (has_mbyte)
				774	{
				775	int l = (*mb_ptr2len_check)(s);
				776
				777	size += ptr2cells(s);
				778	s += l;
				779	len -= l - 1;
				780	}
				781	else
				782	#endif
				783	size += byte2cells(*s++);
				784	}
				785	return size;
				786	}
				787
				788	/*
				789	* Return the number of characters 'c' will take on the screen, taking
				790	* into account the size of a tab.
				791	* Use a define to make it fast, this is used very often!!!
				792	* Also see getvcol() below.
				793	*/
				794
				795	#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
				796	if (*(p) == TAB && (!(wp)->w_p_list \|\| lcs_tab1)) \
				797	{ \
				798	int ts; \
				799	ts = (buf)->b_p_ts; \
				800	return (int)(ts - (col % ts)); \
				801	} \
				802	else \
				803	return ptr2cells(p);
				804
				805	#if defined(FEAT_VREPLACE) \|\| defined(FEAT_EX_EXTRA) \|\| defined(FEAT_GUI) \
				806	\|\| defined(FEAT_VIRTUALEDIT) \|\| defined(PROTO)
				807	int
				808	chartabsize(p, col)
				809	char_u *p;
				810	colnr_T col;
				811	{
				812	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
				813	}
				814	#endif
				815
				816	#ifdef FEAT_LINEBREAK
				817	static int
				818	win_chartabsize(wp, p, col)
				819	win_T *wp;
				820	char_u *p;
				821	colnr_T col;
				822	{
				823	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
				824	}
				825	#endif
				826
				827	/*
				828	* return the number of characters the string 's' will take on the screen,
				829	* taking into account the size of a tab
				830	*/
				831	int
				832	linetabsize(s)
				833	char_u *s;
				834	{
				835	colnr_T col = 0;
				836
				837	while (*s != NUL)
				838	col += lbr_chartabsize_adv(&s, col);
				839	return (int)col;
				840	}
				841
				842	/*
				843	* Like linetabsize(), but for a given window instead of the current one.
				844	*/
				845	int
				846	win_linetabsize(wp, p, len)
				847	win_T *wp;
				848	char_u *p;
				849	colnr_T len;
				850	{
				851	colnr_T col = 0;
				852	char_u *s;
				853
Bram Moolenaar	b5bf5b8	2004-12-24 14:35:23 +0000	[diff] [blame]	854	for (s = p; *s != NUL && (len == MAXCOL \|\| s < p + len); mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	855	col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	856	return (int)col;
				857	}
				858
				859	/*
Bram Moolenaar	8169525	2004-12-29 20:58:21 +0000	[diff] [blame]	860	* Return TRUE if 'c' is a normal identifier character:
				861	* Letters and characters from the 'isident' option.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	862	*/
				863	int
				864	vim_isIDc(c)
				865	int c;
				866	{
				867	return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
				868	}
				869
				870	/*
				871	* return TRUE if 'c' is a keyword character: Letters and characters from
				872	* 'iskeyword' option for current buffer.
				873	* For multi-byte characters mb_get_class() is used (builtin rules).
				874	*/
				875	int
				876	vim_iswordc(c)
				877	int c;
				878	{
				879	#ifdef FEAT_MBYTE
				880	if (c >= 0x100)
				881	{
				882	if (enc_dbcs != 0)
				883	return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
				884	if (enc_utf8)
				885	return utf_class(c) >= 2;
				886	}
				887	#endif
				888	return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
				889	}
				890
				891	/*
				892	* Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
				893	*/
				894	int
				895	vim_iswordp(p)
				896	char_u *p;
				897	{
				898	#ifdef FEAT_MBYTE
				899	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				900	return mb_get_class(p) >= 2;
				901	#endif
				902	return GET_CHARTAB(curbuf, *p) != 0;
				903	}
				904
				905	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				906	int
				907	vim_iswordc_buf(p, buf)
				908	char_u *p;
				909	buf_T *buf;
				910	{
				911	# ifdef FEAT_MBYTE
				912	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				913	return mb_get_class(p) >= 2;
				914	# endif
				915	return (GET_CHARTAB(buf, *p) != 0);
				916	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	917
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	918	/*
				919	* The tables used for spelling. These are only used for the first 256
				920	* characters.
				921	*/
				922	typedef struct spelltab_S
				923	{
				924	char_u st_isw[256]; /* flags: is word char */
				925	char_u st_isu[256]; /* flags: is uppercase char */
				926	char_u st_fold[256]; /* chars: folded case */
				927	} spelltab_T;
				928
				929	static spelltab_T spelltab;
				930	static int did_set_spelltab;
				931
				932	#define SPELL_ISWORD 1
				933	#define SPELL_ISUPPER 2
				934
				935	static void clear_spell_chartab __ARGS((spelltab_T *sp));
				936	static int set_spell_finish __ARGS((spelltab_T *new_st));
				937
				938	/*
				939	* Init the chartab used for spelling for ASCII.
				940	* EBCDIC is not supported!
				941	*/
				942	static void
				943	clear_spell_chartab(sp)
				944	spelltab_T *sp;
				945	{
				946	int i;
				947
				948	/* Init everything to FALSE. */
				949	vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
				950	vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
				951	for (i = 0; i < 256; ++i)
				952	sp->st_fold[i] = i;
				953
				954	/* We include digits. A word shouldn't start with a digit, but handling
				955	* that is done separately. */
				956	for (i = '0'; i <= '9'; ++i)
				957	sp->st_isw[i] = TRUE;
				958	for (i = 'A'; i <= 'Z'; ++i)
				959	{
				960	sp->st_isw[i] = TRUE;
				961	sp->st_isu[i] = TRUE;
				962	sp->st_fold[i] = i + 0x20;
				963	}
				964	for (i = 'a'; i <= 'z'; ++i)
				965	sp->st_isw[i] = TRUE;
				966	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	967
				968	/*
				969	* Init the chartab used for spelling. Only depends on 'encoding'.
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	970	* Called once while starting up and when 'encoding' changes.
				971	* The default is to use isalpha(), but the spell file should define the word
				972	* characters to make it possible that 'encoding' differs from the current
				973	* locale.
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	974	*/
				975	void
				976	init_spell_chartab()
				977	{
				978	int i;
				979
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	980	did_set_spelltab = FALSE;
				981	clear_spell_chartab(&spelltab);
				982
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	983	#ifdef FEAT_MBYTE
				984	if (enc_dbcs)
				985	{
				986	/* DBCS: assume double-wide characters are word characters. */
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	987	for (i = 128; i <= 255; ++i)
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	988	if (MB_BYTE2LEN(i) == 2)
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	989	spelltab.st_isw[i] = TRUE;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	990	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	991	else
				992	#endif
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	993	{
Bram Moolenaar	c91506a	2005-04-24 22:04:21 +0000	[diff] [blame]	994	/* Rough guess: use isalpha() and isupper() for characters above 128.
				995	* */
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	996	for (i = 128; i < 256; ++i)
				997	{
Bram Moolenaar	c91506a	2005-04-24 22:04:21 +0000	[diff] [blame]	998	spelltab.st_isw[i] = MB_ISUPPER(i) \|\| MB_ISLOWER(i);
				999	if (MB_ISUPPER(i))
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1000	{
				1001	spelltab.st_isu[i] = TRUE;
Bram Moolenaar	c91506a	2005-04-24 22:04:21 +0000	[diff] [blame]	1002	spelltab.st_fold[i] = MB_TOLOWER(i);
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1003	}
				1004	}
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1005	}
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1006	}
				1007
				1008	static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
				1009	static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
				1010
				1011	/*
				1012	* Set the spell character tables from strings in the affix file.
				1013	*/
				1014	int
				1015	set_spell_chartab(fol, low, upp)
				1016	char_u *fol;
				1017	char_u *low;
				1018	char_u *upp;
				1019	{
				1020	/* We build the new tables here first, so that we can compare with the
				1021	* previous one. */
				1022	spelltab_T new_st;
				1023	char_u pf = fol, pl = low, *pu = upp;
				1024	int f, l, u;
				1025
				1026	clear_spell_chartab(&new_st);
				1027
				1028	while (*pf != NUL)
				1029	{
				1030	if (pl == NUL \|\| pu == NUL)
				1031	{
				1032	EMSG(_(e_affform));
				1033	return FAIL;
				1034	}
				1035	#ifdef FEAT_MBYTE
				1036	f = mb_ptr2char_adv(&pf);
				1037	l = mb_ptr2char_adv(&pl);
				1038	u = mb_ptr2char_adv(&pu);
				1039	#else
				1040	f = *pf++;
				1041	l = *pl++;
				1042	u = *pu++;
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1043	#endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1044	/* Every character that appears is a word character. */
				1045	if (f < 256)
				1046	new_st.st_isw[f] = TRUE;
				1047	if (l < 256)
				1048	new_st.st_isw[l] = TRUE;
				1049	if (u < 256)
				1050	new_st.st_isw[u] = TRUE;
				1051
				1052	/* if "LOW" and "FOL" are not the same the "LOW" char needs
				1053	* case-folding */
				1054	if (l < 256 && l != f)
				1055	{
				1056	if (f >= 256)
				1057	{
				1058	EMSG(_(e_affrange));
				1059	return FAIL;
				1060	}
				1061	new_st.st_fold[l] = f;
				1062	}
				1063
				1064	/* if "UPP" and "FOL" are not the same the "UPP" char needs
				1065	* case-folding and it's upper case. */
				1066	if (u < 256 && u != f)
				1067	{
				1068	if (f >= 256)
				1069	{
				1070	EMSG(_(e_affrange));
				1071	return FAIL;
				1072	}
				1073	new_st.st_fold[u] = f;
				1074	new_st.st_isu[u] = TRUE;
				1075	}
				1076	}
				1077
				1078	if (pl != NUL \|\| pu != NUL)
				1079	{
				1080	EMSG(_(e_affform));
				1081	return FAIL;
				1082	}
				1083
				1084	return set_spell_finish(&new_st);
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1085	}
				1086
				1087	/*
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1088	* Set the spell character tables from strings in the .spl file.
				1089	*/
				1090	int
				1091	set_spell_charflags(flags, cnt, upp)
				1092	char_u *flags;
				1093	int cnt;
				1094	char_u *upp;
				1095	{
				1096	/* We build the new tables here first, so that we can compare with the
				1097	* previous one. */
				1098	spelltab_T new_st;
				1099	int i;
				1100	char_u *p = upp;
				1101
				1102	clear_spell_chartab(&new_st);
				1103
				1104	for (i = 0; i < cnt; ++i)
				1105	{
				1106	new_st.st_isw[i + 128] = (flags[i] & SPELL_ISWORD) != 0;
				1107	new_st.st_isu[i + 128] = (flags[i] & SPELL_ISUPPER) != 0;
				1108
				1109	if (*p == NUL)
				1110	return FAIL;
				1111	#ifdef FEAT_MBYTE
				1112	new_st.st_fold[i + 128] = mb_ptr2char_adv(&p);
				1113	#else
				1114	new_st.st_fold[i + 128] = *p++;
				1115	#endif
				1116	}
				1117
				1118	return set_spell_finish(&new_st);
				1119	}
				1120
				1121	static int
				1122	set_spell_finish(new_st)
				1123	spelltab_T *new_st;
				1124	{
				1125	int i;
				1126
				1127	if (did_set_spelltab)
				1128	{
				1129	/* check that it's the same table */
				1130	for (i = 0; i < 256; ++i)
				1131	{
				1132	if (spelltab.st_isw[i] != new_st->st_isw[i]
				1133	\|\| spelltab.st_isu[i] != new_st->st_isu[i]
				1134	\|\| spelltab.st_fold[i] != new_st->st_fold[i])
				1135	{
				1136	EMSG(_("E763: Word characters differ between spell files"));
				1137	return FAIL;
				1138	}
				1139	}
				1140	}
				1141	else
				1142	{
				1143	/* copy the new spelltab into the one being used */
				1144	spelltab = *new_st;
				1145	did_set_spelltab = TRUE;
				1146	}
				1147
				1148	return OK;
				1149	}
				1150
				1151	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1152	/*
				1153	* Write the current tables into the .spl file.
Bram Moolenaar	c91506a	2005-04-24 22:04:21 +0000	[diff] [blame]	1154	* This makes sure the same characters are recognized as word characters when
				1155	* generating an when using a spell file.
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1156	*/
				1157	void
				1158	write_spell_chartab(fd)
				1159	FILE *fd;
				1160	{
				1161	char_u charbuf[256 * 4];
				1162	int len = 0;
				1163	int flags;
				1164	int i;
				1165
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1166	fputc(128, fd); /* <charflagslen> */
				1167	for (i = 128; i < 256; ++i)
				1168	{
				1169	flags = 0;
				1170	if (spelltab.st_isw[i])
				1171	flags \|= SPELL_ISWORD;
				1172	if (spelltab.st_isu[i])
				1173	flags \|= SPELL_ISUPPER;
				1174	fputc(flags, fd); /* <charflags> */
				1175
				1176	len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
				1177	}
				1178
				1179	put_bytes(fd, (long_u)len, 2); /* <fcharlen> */
				1180	fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */
				1181	}
				1182	#endif
				1183
				1184	/*
				1185	* Return TRUE if "p" points to a word character for spelling.
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1186	*/
				1187	int
				1188	spell_iswordc(p)
				1189	char_u *p;
				1190	{
				1191	# ifdef FEAT_MBYTE
				1192	if (has_mbyte && MB_BYTE2LEN(*p) > 1)
				1193	return mb_get_class(p) >= 2;
				1194	# endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1195	return spelltab.st_isw[*p];
Bram Moolenaar	6bb6836	2005-03-22 23:03:44 +0000	[diff] [blame]	1196	}
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1197
				1198	/*
				1199	* Return TRUE if "c" is an upper-case character for spelling.
				1200	*/
				1201	int
				1202	spell_isupper(c)
				1203	int c;
				1204	{
				1205	# ifdef FEAT_MBYTE
				1206	if (enc_utf8)
				1207	{
				1208	/* For Unicode we can call utf_isupper(), but don't do that for ASCII,
				1209	* because we don't want to use 'casemap' here. */
				1210	if (c >= 128)
				1211	return utf_isupper(c);
				1212	}
				1213	else if (has_mbyte && c > 256)
				1214	{
				1215	/* For characters above 255 we don't have something specfied.
				1216	* Fall back to locale-dependent iswupper(). If not available
				1217	* simply return FALSE. */
				1218	# ifdef HAVE_ISWUPPER
				1219	return iswupper(c);
				1220	# else
				1221	return FALSE;
				1222	# endif
				1223	}
				1224	# endif
				1225	return spelltab.st_isu[c];
				1226	}
				1227
				1228	/*
				1229	* case-fold "p[len]" into "buf[buflen]". Used for spell checking.
				1230	* Returns FAIL when something wrong.
				1231	*/
				1232	int
				1233	spell_casefold(p, len, buf, buflen)
				1234	char_u *p;
				1235	int len;
				1236	char_u *buf;
				1237	int buflen;
				1238	{
				1239	int i;
				1240
				1241	if (len >= buflen)
				1242	{
				1243	buf[0] = NUL;
				1244	return FAIL; /* result will not fit */
				1245	}
				1246
				1247	#ifdef FEAT_MBYTE
				1248	if (has_mbyte)
				1249	{
				1250	int c;
				1251	int outi = 0;
				1252
				1253	/* Fold one character at a time. */
				1254	for (i = 0; i < len; i += mb_ptr2len_check(p + i))
				1255	{
				1256	c = mb_ptr2char(p + i);
				1257	if (enc_utf8)
				1258	/* For Unicode case folding is always the same, no need to use
				1259	* the table from the spell file. */
				1260	c = utf_fold(c);
				1261	else if (c < 256)
				1262	/* Use the table from the spell file. */
				1263	c = spelltab.st_fold[c];
				1264	# ifdef HAVE_TOWLOWER
				1265	else
				1266	/* We don't know what to do, fall back to towlower(), it
				1267	* depends on the current locale. */
				1268	c = towlower(c);
				1269	# endif
				1270	if (outi + MB_MAXBYTES > buflen)
				1271	{
				1272	buf[outi] = NUL;
				1273	return FAIL;
				1274	}
				1275	outi += mb_char2bytes(c, buf + outi);
				1276	}
				1277	buf[outi] = NUL;
				1278	}
				1279	else
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1280	#endif
Bram Moolenaar	0cb032e	2005-04-23 20:52:00 +0000	[diff] [blame]	1281	{
				1282	/* Be quick for non-multibyte encodings. */
				1283	for (i = 0; i < len; ++i)
				1284	buf[i] = spelltab.st_fold[p[i]];
				1285	buf[i] = NUL;
				1286	}
				1287
				1288	return OK;
				1289	}
				1290
				1291	#endif /* FEAT_SYN_HL */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1292
				1293	/*
				1294	* return TRUE if 'c' is a valid file-name character
				1295	* Assume characters above 0x100 are valid (multi-byte).
				1296	*/
				1297	int
				1298	vim_isfilec(c)
				1299	int c;
				1300	{
				1301	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
				1302	}
				1303
				1304	/*
				1305	* return TRUE if 'c' is a printable character
				1306	* Assume characters above 0x100 are printable (multi-byte), except for
				1307	* Unicode.
				1308	*/
				1309	int
				1310	vim_isprintc(c)
				1311	int c;
				1312	{
				1313	#ifdef FEAT_MBYTE
				1314	if (enc_utf8 && c >= 0x100)
				1315	return utf_printable(c);
				1316	#endif
				1317	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
				1318	}
				1319
				1320	/*
				1321	* Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
				1322	* byte of a double-byte character.
				1323	*/
				1324	int
				1325	vim_isprintc_strict(c)
				1326	int c;
				1327	{
				1328	#ifdef FEAT_MBYTE
				1329	if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
				1330	return FALSE;
				1331	if (enc_utf8 && c >= 0x100)
				1332	return utf_printable(c);
				1333	#endif
				1334	return (c >= 0x100 \|\| (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
				1335	}
				1336
				1337	/*
				1338	* like chartabsize(), but also check for line breaks on the screen
				1339	*/
				1340	int
				1341	lbr_chartabsize(s, col)
				1342	unsigned char *s;
				1343	colnr_T col;
				1344	{
				1345	#ifdef FEAT_LINEBREAK
				1346	if (!curwin->w_p_lbr && *p_sbr == NUL)
				1347	{
				1348	#endif
				1349	#ifdef FEAT_MBYTE
				1350	if (curwin->w_p_wrap)
				1351	return win_nolbr_chartabsize(curwin, s, col, NULL);
				1352	#endif
				1353	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
				1354	#ifdef FEAT_LINEBREAK
				1355	}
				1356	return win_lbr_chartabsize(curwin, s, col, NULL);
				1357	#endif
				1358	}
				1359
				1360	/*
				1361	* Call lbr_chartabsize() and advance the pointer.
				1362	*/
				1363	int
				1364	lbr_chartabsize_adv(s, col)
				1365	char_u **s;
				1366	colnr_T col;
				1367	{
				1368	int retval;
				1369
				1370	retval = lbr_chartabsize(*s, col);
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1371	mb_ptr_adv(*s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1372	return retval;
				1373	}
				1374
				1375	/*
				1376	* This function is used very often, keep it fast!!!!
				1377	*
				1378	* If "headp" not NULL, set *headp to the size of what we for 'showbreak'
				1379	* string at start of line. Warning: *headp is only set if it's a non-zero
				1380	* value, init to 0 before calling.
				1381	*/
				1382	/ARGSUSED/
				1383	int
				1384	win_lbr_chartabsize(wp, s, col, headp)
				1385	win_T *wp;
				1386	char_u *s;
				1387	colnr_T col;
				1388	int *headp;
				1389	{
				1390	#ifdef FEAT_LINEBREAK
				1391	int c;
				1392	int size;
				1393	colnr_T col2;
				1394	colnr_T colmax;
				1395	int added;
				1396	# ifdef FEAT_MBYTE
				1397	int mb_added = 0;
				1398	# else
				1399	# define mb_added 0
				1400	# endif
				1401	int numberextra;
				1402	char_u *ps;
				1403	int tab_corr = (*s == TAB);
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1404	int n;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1405
				1406	/*
				1407	* No 'linebreak' and 'showbreak': return quickly.
				1408	*/
				1409	if (!wp->w_p_lbr && *p_sbr == NUL)
				1410	#endif
				1411	{
				1412	#ifdef FEAT_MBYTE
				1413	if (wp->w_p_wrap)
				1414	return win_nolbr_chartabsize(wp, s, col, headp);
				1415	#endif
				1416	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
				1417	}
				1418
				1419	#ifdef FEAT_LINEBREAK
				1420	/*
				1421	* First get normal size, without 'linebreak'
				1422	*/
				1423	size = win_chartabsize(wp, s, col);
				1424	c = *s;
				1425
				1426	/*
				1427	* If 'linebreak' set check at a blank before a non-blank if the line
				1428	* needs a break here
				1429	*/
				1430	if (wp->w_p_lbr
				1431	&& vim_isbreak(c)
				1432	&& !vim_isbreak(s[1])
				1433	&& !wp->w_p_list
				1434	&& wp->w_p_wrap
				1435	# ifdef FEAT_VERTSPLIT
				1436	&& wp->w_width != 0
				1437	# endif
				1438	)
				1439	{
				1440	/*
				1441	* Count all characters from first non-blank after a blank up to next
				1442	* non-blank after a blank.
				1443	*/
				1444	numberextra = win_col_off(wp);
				1445	col2 = col;
				1446	colmax = W_WIDTH(wp) - numberextra;
				1447	if (col >= colmax)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1448	{
				1449	n = colmax + win_col_off2(wp);
				1450	if (n > 0)
				1451	colmax += (((col - colmax) / n) + 1) * n;
				1452	}
				1453
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1454	for (;;)
				1455	{
				1456	ps = s;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1457	mb_ptr_adv(s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1458	c = *s;
				1459	if (!(c != NUL
				1460	&& (vim_isbreak(c)
				1461	\|\| (!vim_isbreak(c)
				1462	&& (col2 == col \|\| !vim_isbreak(*ps))))))
				1463	break;
				1464
				1465	col2 += win_chartabsize(wp, s, col2);
				1466	if (col2 >= colmax) /* doesn't fit */
				1467	{
				1468	size = colmax - col;
				1469	tab_corr = FALSE;
				1470	break;
				1471	}
				1472	}
				1473	}
				1474	# ifdef FEAT_MBYTE
				1475	else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
				1476	&& wp->w_p_wrap && in_win_border(wp, col))
				1477	{
				1478	++size; /* Count the ">" in the last column. */
				1479	mb_added = 1;
				1480	}
				1481	# endif
				1482
				1483	/*
				1484	* May have to add something for 'showbreak' string at start of line
				1485	* Set *headp to the size of what we add.
				1486	*/
				1487	added = 0;
				1488	if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
				1489	{
				1490	numberextra = win_col_off(wp);
				1491	col += numberextra + mb_added;
				1492	if (col >= (colnr_T)W_WIDTH(wp))
				1493	{
				1494	col -= W_WIDTH(wp);
				1495	numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
				1496	if (numberextra > 0)
				1497	col = col % numberextra;
				1498	}
				1499	if (col == 0 \|\| col + size > (colnr_T)W_WIDTH(wp))
				1500	{
				1501	added = vim_strsize(p_sbr);
				1502	if (tab_corr)
				1503	size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
				1504	else
				1505	size += added;
				1506	if (col != 0)
				1507	added = 0;
				1508	}
				1509	}
				1510	if (headp != NULL)
				1511	*headp = added + mb_added;
				1512	return size;
				1513	#endif
				1514	}
				1515
				1516	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1517	/*
				1518	* Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
				1519	* 'wrap' is on. This means we need to check for a double-byte character that
				1520	* doesn't fit at the end of the screen line.
				1521	*/
				1522	static int
				1523	win_nolbr_chartabsize(wp, s, col, headp)
				1524	win_T *wp;
				1525	char_u *s;
				1526	colnr_T col;
				1527	int *headp;
				1528	{
				1529	int n;
				1530
				1531	if (*s == TAB && (!wp->w_p_list \|\| lcs_tab1))
				1532	{
				1533	n = wp->w_buffer->b_p_ts;
				1534	return (int)(n - (col % n));
				1535	}
				1536	n = ptr2cells(s);
				1537	/* Add one cell for a double-width character in the last column of the
				1538	* window, displayed with a ">". */
				1539	if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
				1540	{
				1541	if (headp != NULL)
				1542	*headp = 1;
				1543	return 3;
				1544	}
				1545	return n;
				1546	}
				1547
				1548	/*
				1549	* Return TRUE if virtual column "vcol" is in the rightmost column of window
				1550	* "wp".
				1551	*/
				1552	int
				1553	in_win_border(wp, vcol)
				1554	win_T *wp;
				1555	colnr_T vcol;
				1556	{
				1557	colnr_T width1; /* width of first line (after line number) */
				1558	colnr_T width2; /* width of further lines */
				1559
				1560	#ifdef FEAT_VERTSPLIT
				1561	if (wp->w_width == 0) /* there is no border */
				1562	return FALSE;
				1563	#endif
				1564	width1 = W_WIDTH(wp) - win_col_off(wp);
				1565	if (vcol < width1 - 1)
				1566	return FALSE;
				1567	if (vcol == width1 - 1)
				1568	return TRUE;
				1569	width2 = width1 + win_col_off2(wp);
				1570	return ((vcol - width1) % width2 == width2 - 1);
				1571	}
				1572	#endif /* FEAT_MBYTE */
				1573
				1574	/*
				1575	* Get virtual column number of pos.
				1576	* start: on the first position of this character (TAB, ctrl)
				1577	* cursor: where the cursor is on this character (first char, except for TAB)
				1578	* end: on the last position of this character (TAB, ctrl)
				1579	*
				1580	* This is used very often, keep it fast!
				1581	*/
				1582	void
				1583	getvcol(wp, pos, start, cursor, end)
				1584	win_T *wp;
				1585	pos_T *pos;
				1586	colnr_T *start;
				1587	colnr_T *cursor;
				1588	colnr_T *end;
				1589	{
				1590	colnr_T vcol;
				1591	char_u ptr; / points to current char */
				1592	char_u posptr; / points to char at pos->col */
				1593	int incr;
				1594	int head;
				1595	int ts = wp->w_buffer->b_p_ts;
				1596	int c;
				1597
				1598	vcol = 0;
				1599	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
				1600	posptr = ptr + pos->col;
				1601
				1602	/*
				1603	* This function is used very often, do some speed optimizations.
				1604	* When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
				1605	* Also use this when 'list' is set but tabs take their normal size.
				1606	*/
				1607	if ((!wp->w_p_list \|\| lcs_tab1 != NUL)
				1608	#ifdef FEAT_LINEBREAK
				1609	&& !wp->w_p_lbr && *p_sbr == NUL
				1610	#endif
				1611	)
				1612	{
				1613	#ifndef FEAT_MBYTE
				1614	head = 0;
				1615	#endif
				1616	for (;;)
				1617	{
				1618	#ifdef FEAT_MBYTE
				1619	head = 0;
				1620	#endif
				1621	c = *ptr;
				1622	/* make sure we don't go past the end of the line */
				1623	if (c == NUL)
				1624	{
				1625	incr = 1; /* NUL at end of line only takes one column */
				1626	break;
				1627	}
				1628	/* A tab gets expanded, depending on the current column */
				1629	if (c == TAB)
				1630	incr = ts - (vcol % ts);
				1631	else
				1632	{
				1633	#ifdef FEAT_MBYTE
				1634	if (has_mbyte)
				1635	{
				1636	/* For utf-8, if the byte is >= 0x80, need to look at
				1637	* further bytes to find the cell width. */
				1638	if (enc_utf8 && c >= 0x80)
				1639	incr = utf_ptr2cells(ptr);
				1640	else
				1641	incr = CHARSIZE(c);
				1642
				1643	/* If a double-cell char doesn't fit at the end of a line
				1644	* it wraps to the next line, it's like this char is three
				1645	* cells wide. */
				1646	if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
				1647	{
				1648	++incr;
				1649	head = 1;
				1650	}
				1651	}
				1652	else
				1653	#endif
				1654	incr = CHARSIZE(c);
				1655	}
				1656
				1657	if (ptr >= posptr) /* character at pos->col */
				1658	break;
				1659
				1660	vcol += incr;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1661	mb_ptr_adv(ptr);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1662	}
				1663	}
				1664	else
				1665	{
				1666	for (;;)
				1667	{
				1668	/* A tab gets expanded, depending on the current column */
				1669	head = 0;
				1670	incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
				1671	/* make sure we don't go past the end of the line */
				1672	if (*ptr == NUL)
				1673	{
				1674	incr = 1; /* NUL at end of line only takes one column */
				1675	break;
				1676	}
				1677
				1678	if (ptr >= posptr) /* character at pos->col */
				1679	break;
				1680
				1681	vcol += incr;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1682	mb_ptr_adv(ptr);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1683	}
				1684	}
				1685	if (start != NULL)
				1686	*start = vcol + head;
				1687	if (end != NULL)
				1688	*end = vcol + incr - 1;
				1689	if (cursor != NULL)
				1690	{
				1691	if (*ptr == TAB
				1692	&& (State & NORMAL)
				1693	&& !wp->w_p_list
				1694	&& !virtual_active()
				1695	#ifdef FEAT_VISUAL
				1696	&& !(VIsual_active
				1697	&& (p_sel == 'e' \|\| ltoreq(pos, VIsual)))
				1698	#endif
				1699	)
				1700	cursor = vcol + incr - 1; / cursor at end */
				1701	else
				1702	cursor = vcol + head; / cursor at start */
				1703	}
				1704	}
				1705
				1706	/*
				1707	* Get virtual cursor column in the current window, pretending 'list' is off.
				1708	*/
				1709	colnr_T
				1710	getvcol_nolist(posp)
				1711	pos_T *posp;
				1712	{
				1713	int list_save = curwin->w_p_list;
				1714	colnr_T vcol;
				1715
				1716	curwin->w_p_list = FALSE;
				1717	getvcol(curwin, posp, NULL, &vcol, NULL);
				1718	curwin->w_p_list = list_save;
				1719	return vcol;
				1720	}
				1721
				1722	#if defined(FEAT_VIRTUALEDIT) \|\| defined(PROTO)
				1723	/*
				1724	* Get virtual column in virtual mode.
				1725	*/
				1726	void
				1727	getvvcol(wp, pos, start, cursor, end)
				1728	win_T *wp;
				1729	pos_T *pos;
				1730	colnr_T *start;
				1731	colnr_T *cursor;
				1732	colnr_T *end;
				1733	{
				1734	colnr_T col;
				1735	colnr_T coladd;
				1736	colnr_T endadd;
				1737	# ifdef FEAT_MBYTE
				1738	char_u *ptr;
				1739	# endif
				1740
				1741	if (virtual_active())
				1742	{
				1743	/* For virtual mode, only want one value */
				1744	getvcol(wp, pos, &col, NULL, NULL);
				1745
				1746	coladd = pos->coladd;
				1747	endadd = 0;
				1748	# ifdef FEAT_MBYTE
				1749	/* Cannot put the cursor on part of a wide character. */
				1750	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
				1751	if (pos->col < STRLEN(ptr))
				1752	{
				1753	int c = (*mb_ptr2char)(ptr + pos->col);
				1754
				1755	if (c != TAB && vim_isprintc(c))
				1756	{
				1757	endadd = char2cells(c) - 1;
				1758	if (coladd >= endadd)
				1759	coladd -= endadd;
				1760	else
				1761	coladd = 0;
				1762	}
				1763	}
				1764	# endif
				1765	col += coladd;
				1766	if (start != NULL)
				1767	*start = col;
				1768	if (cursor != NULL)
				1769	*cursor = col;
				1770	if (end != NULL)
				1771	*end = col + endadd;
				1772	}
				1773	else
				1774	getvcol(wp, pos, start, cursor, end);
				1775	}
				1776	#endif
				1777
				1778	#if defined(FEAT_VISUAL) \|\| defined(PROTO)
				1779	/*
				1780	* Get the leftmost and rightmost virtual column of pos1 and pos2.
				1781	* Used for Visual block mode.
				1782	*/
				1783	void
				1784	getvcols(wp, pos1, pos2, left, right)
				1785	win_T *wp;
				1786	pos_T pos1, pos2;
				1787	colnr_T left, right;
				1788	{
				1789	colnr_T from1, from2, to1, to2;
				1790
				1791	if (ltp(pos1, pos2))
				1792	{
				1793	getvvcol(wp, pos1, &from1, NULL, &to1);
				1794	getvvcol(wp, pos2, &from2, NULL, &to2);
				1795	}
				1796	else
				1797	{
				1798	getvvcol(wp, pos2, &from1, NULL, &to1);
				1799	getvvcol(wp, pos1, &from2, NULL, &to2);
				1800	}
				1801	if (from2 < from1)
				1802	*left = from2;
				1803	else
				1804	*left = from1;
				1805	if (to2 > to1)
				1806	{
				1807	if (*p_sel == 'e' && from2 - 1 >= to1)
				1808	*right = from2 - 1;
				1809	else
				1810	*right = to2;
				1811	}
				1812	else
				1813	*right = to1;
				1814	}
				1815	#endif
				1816
				1817	/*
				1818	* skipwhite: skip over ' ' and '\t'.
				1819	*/
				1820	char_u *
				1821	skipwhite(p)
				1822	char_u *p;
				1823	{
				1824	while (vim_iswhite(p)) / skip to next non-white */
				1825	++p;
				1826	return p;
				1827	}
				1828
				1829	/*
				1830	* skipdigits: skip over digits;
				1831	*/
				1832	char_u *
				1833	skipdigits(p)
				1834	char_u *p;
				1835	{
				1836	while (VIM_ISDIGIT(p)) / skip to next non-digit */
				1837	++p;
				1838	return p;
				1839	}
				1840
				1841	/*
				1842	* Variant of isdigit() that can handle characters > 0x100.
				1843	* We don't use isdigit() here, because on some systems it also considers
				1844	* superscript 1 to be a digit.
				1845	* Use the VIM_ISDIGIT() macro for simple arguments.
				1846	*/
				1847	int
				1848	vim_isdigit(c)
				1849	int c;
				1850	{
				1851	return (c >= '0' && c <= '9');
				1852	}
				1853
				1854	/*
				1855	* Variant of isxdigit() that can handle characters > 0x100.
				1856	* We don't use isxdigit() here, because on some systems it also considers
				1857	* superscript 1 to be a digit.
				1858	*/
				1859	int
				1860	vim_isxdigit(c)
				1861	int c;
				1862	{
				1863	return (c >= '0' && c <= '9')
				1864	\|\| (c >= 'a' && c <= 'f')
				1865	\|\| (c >= 'A' && c <= 'F');
				1866	}
				1867
				1868	/*
				1869	* skiptowhite: skip over text until ' ' or '\t' or NUL.
				1870	*/
				1871	char_u *
				1872	skiptowhite(p)
				1873	char_u *p;
				1874	{
				1875	while (p != ' ' && p != '\t' && *p != NUL)
				1876	++p;
				1877	return p;
				1878	}
				1879
				1880	#if defined(FEAT_LISTCMDS) \|\| defined(FEAT_SIGNS) \|\| defined(FEAT_SNIFF) \
				1881	\|\| defined(PROTO)
				1882	/*
				1883	* skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
				1884	*/
				1885	char_u *
				1886	skiptowhite_esc(p)
				1887	char_u *p;
				1888	{
				1889	while (p != ' ' && p != '\t' && *p != NUL)
				1890	{
				1891	if ((p == '\\' \|\| p == Ctrl_V) && *(p + 1) != NUL)
				1892	++p;
				1893	++p;
				1894	}
				1895	return p;
				1896	}
				1897	#endif
				1898
				1899	/*
				1900	* Getdigits: Get a number from a string and skip over it.
				1901	* Note: the argument is a pointer to a char_u pointer!
				1902	*/
				1903	long
				1904	getdigits(pp)
				1905	char_u **pp;
				1906	{
				1907	char_u *p;
				1908	long retval;
				1909
				1910	p = *pp;
				1911	retval = atol((char *)p);
				1912	if (p == '-') / skip negative sign */
				1913	++p;
				1914	p = skipdigits(p); /* skip to next non-digit */
				1915	*pp = p;
				1916	return retval;
				1917	}
				1918
				1919	/*
				1920	* Return TRUE if "lbuf" is empty or only contains blanks.
				1921	*/
				1922	int
				1923	vim_isblankline(lbuf)
				1924	char_u *lbuf;
				1925	{
				1926	char_u *p;
				1927
				1928	p = skipwhite(lbuf);
				1929	return (p == NUL \|\| p == '\r' \|\| *p == '\n');
				1930	}
				1931
				1932	/*
				1933	* Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar	2df6dcc	2004-07-12 15:53:54 +0000	[diff] [blame]	1934	* hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1935	* If "hexp" is not NULL, returns a flag to indicate the type of the number:
				1936	* 0 decimal
				1937	* '0' octal
				1938	* 'X' hex
				1939	* 'x' hex
				1940	* If "len" is not NULL, the length of the number in characters is returned.
				1941	* If "nptr" is not NULL, the signed result is returned in it.
				1942	* If "unptr" is not NULL, the unsigned result is returned in it.
				1943	*/
				1944	void
				1945	vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
				1946	char_u *start;
				1947	int hexp; / return: type of number 0 = decimal, 'x'
				1948	or 'X' is hex, '0' = octal */
				1949	int len; / return: detected length of number */
				1950	int dooct; /* recognize octal number */
				1951	int dohex; /* recognize hex number */
				1952	long nptr; / return: signed result */
				1953	unsigned long unptr; / return: unsigned result */
				1954	{
				1955	char_u *ptr = start;
				1956	int hex = 0; /* default is decimal */
				1957	int negative = FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1958	unsigned long un = 0;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1959	int n;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1960
				1961	if (ptr[0] == '-')
				1962	{
				1963	negative = TRUE;
				1964	++ptr;
				1965	}
				1966
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1967	/* Recognize hex and octal. */
				1968	if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1969	{
				1970	hex = ptr[1];
				1971	if (dohex && (hex == 'X' \|\| hex == 'x') && vim_isxdigit(ptr[2]))
				1972	ptr += 2; /* hexadecimal */
				1973	else
				1974	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	1975	hex = 0; /* default is decimal */
				1976	if (dooct)
				1977	{
				1978	/* Don't interpret "0", "08" or "0129" as octal. */
				1979	for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
				1980	{
				1981	if (ptr[n] > '7')
				1982	{
				1983	hex = 0; /* can't be octal */
				1984	break;
				1985	}
				1986	if (ptr[n] > '0')
				1987	hex = '0'; /* assume octal */
				1988	}
				1989	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1990	}
				1991	}
				1992
				1993	/*
				1994	* Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
				1995	*/
				1996	if (hex)
				1997	{
				1998	if (hex == '0')
				1999	{
				2000	/* octal */
				2001	while ('0' <= ptr && ptr <= '7')
				2002	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2003	un = 8 * un + (unsigned long)(*ptr - '0');
				2004	++ptr;
				2005	}
				2006	}
				2007	else
				2008	{
				2009	/* hex */
				2010	while (vim_isxdigit(*ptr))
				2011	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2012	un = 16 * un + (unsigned long)hex2nr(*ptr);
				2013	++ptr;
				2014	}
				2015	}
				2016	}
				2017	else
				2018	{
				2019	/* decimal */
				2020	while (VIM_ISDIGIT(*ptr))
				2021	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2022	un = 10 * un + (unsigned long)(*ptr - '0');
				2023	++ptr;
				2024	}
				2025	}
				2026
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2027	if (hexp != NULL)
				2028	*hexp = hex;
				2029	if (len != NULL)
				2030	*len = (int)(ptr - start);
				2031	if (nptr != NULL)
Bram Moolenaar	2df6dcc	2004-07-12 15:53:54 +0000	[diff] [blame]	2032	{
				2033	if (negative) /* account for leading '-' for decimal numbers */
				2034	*nptr = -(long)un;
				2035	else
				2036	*nptr = (long)un;
				2037	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2038	if (unptr != NULL)
				2039	*unptr = un;
				2040	}
				2041
				2042	/*
				2043	* Return the value of a single hex character.
				2044	* Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
				2045	*/
				2046	int
				2047	hex2nr(c)
				2048	int c;
				2049	{
				2050	if (c >= 'a' && c <= 'f')
				2051	return c - 'a' + 10;
				2052	if (c >= 'A' && c <= 'F')
				2053	return c - 'A' + 10;
				2054	return c - '0';
				2055	}
				2056
				2057	#if defined(FEAT_TERMRESPONSE) \
				2058	\|\| (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) \|\| defined(PROTO)
				2059	/*
				2060	* Convert two hex characters to a byte.
				2061	* Return -1 if one of the characters is not hex.
				2062	*/
				2063	int
				2064	hexhex2nr(p)
				2065	char_u *p;
				2066	{
				2067	if (!vim_isxdigit(p[0]) \|\| !vim_isxdigit(p[1]))
				2068	return -1;
				2069	return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
				2070	}
				2071	#endif
				2072
				2073	/*
				2074	* Return TRUE if "str" starts with a backslash that should be removed.
				2075	* For MS-DOS, WIN32 and OS/2 this is only done when the character after the
				2076	* backslash is not a normal file name character.
				2077	* '$' is a valid file name character, we don't remove the backslash before
				2078	* it. This means it is not possible to use an environment variable after a
				2079	* backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
				2080	* Although "\ name" is valid, the backslash in "Program\ files" must be
				2081	* removed. Assume a file name doesn't start with a space.
				2082	* For multi-byte names, never remove a backslash before a non-ascii
				2083	* character, assume that all multi-byte characters are valid file name
				2084	* characters.
				2085	*/
				2086	int
				2087	rem_backslash(str)
				2088	char_u *str;
				2089	{
				2090	#ifdef BACKSLASH_IN_FILENAME
				2091	return (str[0] == '\\'
				2092	# ifdef FEAT_MBYTE
				2093	&& str[1] < 0x80
				2094	# endif
				2095	&& (str[1] == ' '
				2096	\|\| (str[1] != NUL
				2097	&& str[1] != '*'
				2098	&& str[1] != '?'
				2099	&& !vim_isfilec(str[1]))));
				2100	#else
				2101	return (str[0] == '\\' && str[1] != NUL);
				2102	#endif
				2103	}
				2104
				2105	/*
				2106	* Halve the number of backslashes in a file name argument.
				2107	* For MS-DOS we only do this if the character after the backslash
				2108	* is not a normal file character.
				2109	*/
				2110	void
				2111	backslash_halve(p)
				2112	char_u *p;
				2113	{
				2114	for ( ; *p; ++p)
				2115	if (rem_backslash(p))
				2116	STRCPY(p, p + 1);
				2117	}
				2118
				2119	/*
				2120	* backslash_halve() plus save the result in allocated memory.
				2121	*/
				2122	char_u *
				2123	backslash_halve_save(p)
				2124	char_u *p;
				2125	{
				2126	char_u *res;
				2127
				2128	res = vim_strsave(p);
				2129	if (res == NULL)
				2130	return p;
				2131	backslash_halve(res);
				2132	return res;
				2133	}
				2134
				2135	#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) \|\| defined(PROTO)
				2136	/*
				2137	* Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
				2138	* The first 64 entries have been added to map control characters defined in
				2139	* ascii.h
				2140	*/
				2141	static char_u ebcdic2ascii_tab[256] =
				2142	{
				2143	0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
				2144	0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
				2145	0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
				2146	0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
				2147	0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
				2148	0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
				2149	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
				2150	0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
				2151	0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
				2152	0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
				2153	0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
				2154	0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
				2155	0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
				2156	0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
				2157	0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
				2158	0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
				2159	0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
				2160	0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
				2161	0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
				2162	0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
				2163	0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
				2164	0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
				2165	0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
				2166	0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
				2167	0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
				2168	0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
				2169	0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
				2170	0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
				2171	0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
				2172	0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
				2173	0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
				2174	0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
				2175	};
				2176
				2177	/*
				2178	* Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
				2179	* wanting 7-bit ASCII characters out the other end.
				2180	*/
				2181	void
				2182	ebcdic2ascii(buffer, len)
				2183	char_u *buffer;
				2184	int len;
				2185	{
				2186	int i;
				2187
				2188	for (i = 0; i < len; i++)
				2189	buffer[i] = ebcdic2ascii_tab[buffer[i]];
				2190	}
				2191	#endif