blob: 70e3bc2dcb19754602c83f614ab8c62dcc910e2e [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
13# include <wchar.h> /* for towupper() and towlower() */
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
21/* b_chartab[] is an array of 32 bytes, each bit representing one of the
22 * characters 0-255. */
23#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010027/* table used below, see init_chartab() for an explanation */
28static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
33#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
34#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
35#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
36#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
37
38/*
39 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000040 * characters for current buffer.
41 *
42 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
43 * 'isprint' and 'encoding'.
44 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010045 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000046 * - For non-multi-byte index with the byte (same as the character).
47 * - For DBCS index with the first byte.
48 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
49 * the same as the character, if the first byte is 0x80 and above it depends
50 * on further bytes).
51 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010052 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000053 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
54 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
55 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
56 * translate the character before displaying it). Note that only DBCS
57 * characters can have 2 display cells and still be printable.
58 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
59 * - CT_ID_CHAR bit is set when the character can be in an identifier.
60 *
61 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
62 * error, OK otherwise.
63 */
64 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010065init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000066{
67 return buf_init_chartab(curbuf, TRUE);
68}
69
70 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010071buf_init_chartab(
72 buf_T *buf,
73 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000074{
75 int c;
76 int c2;
77 char_u *p;
78 int i;
79 int tilde;
80 int do_isalpha;
81
82 if (global)
83 {
84 /*
85 * Set the default size for printable characters:
86 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
87 * This also inits all 'isident' and 'isfname' flags to FALSE.
88 *
89 * EBCDIC: all chars below ' ' are not printable, all others are
90 * printable.
91 */
92 c = 0;
93 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010094 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000095#ifdef EBCDIC
96 while (c < 255)
97#else
98 while (c <= '~')
99#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100100 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000101#ifdef FEAT_FKMAP
102 if (p_altkeymap)
103 {
104 while (c < YE)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100105 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 }
107#endif
108 while (c < 256)
109 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000110 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
111 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100112 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 /* euc-jp characters starting with 0x8e are single width */
114 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100115 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000116 /* other double-byte chars can be printable AND double-width */
117 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100118 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000119 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100121 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 }
123
Bram Moolenaar071d4272004-06-13 20:20:40 +0000124 /* Assume that every multi-byte char is a filename character. */
125 for (c = 1; c < 256; ++c)
126 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
127 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
128 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100129 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000130 }
131
132 /*
133 * Init word char flags all to FALSE
134 */
135 vim_memset(buf->b_chartab, 0, (size_t)32);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000136 if (enc_dbcs != 0)
137 for (c = 0; c < 256; ++c)
138 {
139 /* double-byte characters are probably word characters */
140 if (MB_BYTE2LEN(c) == 2)
141 SET_CHARTAB(buf, c);
142 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000143
144#ifdef FEAT_LISP
145 /*
146 * In lisp mode the '-' character is included in keywords.
147 */
148 if (buf->b_p_lisp)
149 SET_CHARTAB(buf, '-');
150#endif
151
152 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
153 * options Each option is a list of characters, character numbers or
154 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
155 */
156 for (i = global ? 0 : 3; i <= 3; ++i)
157 {
158 if (i == 0)
159 p = p_isi; /* first round: 'isident' */
160 else if (i == 1)
161 p = p_isp; /* second round: 'isprint' */
162 else if (i == 2)
163 p = p_isf; /* third round: 'isfname' */
164 else /* i == 3 */
165 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
166
167 while (*p)
168 {
169 tilde = FALSE;
170 do_isalpha = FALSE;
171 if (*p == '^' && p[1] != NUL)
172 {
173 tilde = TRUE;
174 ++p;
175 }
176 if (VIM_ISDIGIT(*p))
177 c = getdigits(&p);
178 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000179 if (has_mbyte)
180 c = mb_ptr2char_adv(&p);
181 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000182 c = *p++;
183 c2 = -1;
184 if (*p == '-' && p[1] != NUL)
185 {
186 ++p;
187 if (VIM_ISDIGIT(*p))
188 c2 = getdigits(&p);
189 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000190 if (has_mbyte)
191 c2 = mb_ptr2char_adv(&p);
192 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000193 c2 = *p++;
194 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000195 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000196 || !(*p == NUL || *p == ','))
197 return FAIL;
198
199 if (c2 == -1) /* not a range */
200 {
201 /*
202 * A single '@' (not "@-@"):
203 * Decide on letters being ID/printable/keyword chars with
204 * standard function isalpha(). This takes care of locale for
205 * single-byte characters).
206 */
207 if (c == '@')
208 {
209 do_isalpha = TRUE;
210 c = 1;
211 c2 = 255;
212 }
213 else
214 c2 = c;
215 }
216 while (c <= c2)
217 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000218 /* Use the MB_ functions here, because isalpha() doesn't
219 * work properly when 'encoding' is "latin1" and the locale is
220 * "C". */
221 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222#ifdef FEAT_FKMAP
223 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
224#endif
225 )
226 {
227 if (i == 0) /* (re)set ID flag */
228 {
229 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100230 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000231 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100232 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000233 }
234 else if (i == 1) /* (re)set printable */
235 {
236 if ((c < ' '
237#ifndef EBCDIC
238 || c > '~'
239#endif
240#ifdef FEAT_FKMAP
241 || (p_altkeymap
242 && (F_isalpha(c) || F_isdigit(c)))
243#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100244 // For double-byte we keep the cell width, so
245 // that we can detect it from the first byte.
246 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000247 {
248 if (tilde)
249 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100252 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 }
254 else
255 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100256 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
257 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 }
259 }
260 }
261 else if (i == 2) /* (re)set fname flag */
262 {
263 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100264 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000265 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100266 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000267 }
268 else /* i == 3 */ /* (re)set keyword flag */
269 {
270 if (tilde)
271 RESET_CHARTAB(buf, c);
272 else
273 SET_CHARTAB(buf, c);
274 }
275 }
276 ++c;
277 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100278
279 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000280 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100281 if (c == ',' && *p == NUL)
282 /* Trailing comma is not allowed. */
283 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000284 }
285 }
286 chartab_initialized = TRUE;
287 return OK;
288}
289
290/*
291 * Translate any special characters in buf[bufsize] in-place.
292 * The result is a string with only printable characters, but if there is not
293 * enough room, not all characters will be translated.
294 */
295 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100296trans_characters(
297 char_u *buf,
298 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000299{
300 int len; /* length of string needing translation */
301 int room; /* room in buffer after string */
302 char_u *trs; /* translated character */
303 int trs_len; /* length of trs[] */
304
305 len = (int)STRLEN(buf);
306 room = bufsize - len;
307 while (*buf != 0)
308 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000309 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000310 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000311 len -= trs_len;
312 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000313 {
314 trs = transchar_byte(*buf);
315 trs_len = (int)STRLEN(trs);
316 if (trs_len > 1)
317 {
318 room -= trs_len - 1;
319 if (room <= 0)
320 return;
321 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
322 }
323 mch_memmove(buf, trs, (size_t)trs_len);
324 --len;
325 }
326 buf += trs_len;
327 }
328}
329
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000330#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
331 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000332/*
333 * Translate a string into allocated memory, replacing special chars with
334 * printable chars. Returns NULL when out of memory.
335 */
336 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100337transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338{
339 char_u *res;
340 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000341 int l, len, c;
342 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344 if (has_mbyte)
345 {
346 /* Compute the length of the result, taking account of unprintable
347 * multi-byte characters. */
348 len = 0;
349 p = s;
350 while (*p != NUL)
351 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000352 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353 {
354 c = (*mb_ptr2char)(p);
355 p += l;
356 if (vim_isprintc(c))
357 len += l;
358 else
359 {
360 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000361 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000362 }
363 }
364 else
365 {
366 l = byte2cells(*p++);
367 if (l > 0)
368 len += l;
369 else
370 len += 4; /* illegal byte sequence */
371 }
372 }
373 res = alloc((unsigned)(len + 1));
374 }
375 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000376 res = alloc((unsigned)(vim_strsize(s) + 1));
377 if (res != NULL)
378 {
379 *res = NUL;
380 p = s;
381 while (*p != NUL)
382 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000383 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384 {
385 c = (*mb_ptr2char)(p);
386 if (vim_isprintc(c))
387 STRNCAT(res, p, l); /* append printable multi-byte char */
388 else
389 transchar_hex(res + STRLEN(res), c);
390 p += l;
391 }
392 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393 STRCAT(res, transchar_byte(*p++));
394 }
395 }
396 return res;
397}
398#endif
399
400#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
401/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000402 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
403 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
405 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000406 */
407 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100408str_foldcase(
409 char_u *str,
410 int orglen,
411 char_u *buf,
412 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413{
414 garray_T ga;
415 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417
418#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
419#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000420#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
421#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000423 /* Copy "str" into "buf" or allocated memory, unmodified. */
424 if (buf == NULL)
425 {
426 ga_init2(&ga, 1, 10);
427 if (ga_grow(&ga, len + 1) == FAIL)
428 return NULL;
429 mch_memmove(ga.ga_data, str, (size_t)len);
430 ga.ga_len = len;
431 }
432 else
433 {
434 if (len >= buflen) /* Ugly! */
435 len = buflen - 1;
436 mch_memmove(buf, str, (size_t)len);
437 }
438 if (buf == NULL)
439 GA_CHAR(len) = NUL;
440 else
441 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
443 /* Make each character lower case. */
444 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000445 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000447 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448 {
449 if (enc_utf8)
450 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000451 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100452 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000453 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000454
Bram Moolenaarb9839212008-06-28 11:03:50 +0000455 /* Only replace the character when it is not an invalid
456 * sequence (ASCII character or more than one byte) and
457 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100458 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000459 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100460 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461
462 /* If the byte length changes need to shift the following
463 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100464 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000467 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100468 if (buf == NULL
469 ? ga_grow(&ga, nlen - olen + 1) == FAIL
470 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 {
472 /* out of memory, keep old char */
473 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100474 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000476 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100477 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000479 if (buf == NULL)
480 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100481 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
482 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000483 }
484 else
485 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100486 STRMOVE(buf + i + nlen, buf + i + olen);
487 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000488 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000489 }
490 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000491 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492 }
493 }
494 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000495 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 }
497 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000498 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000499 if (buf == NULL)
500 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
501 else
502 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000503 ++i;
504 }
505 }
506
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000507 if (buf == NULL)
508 return (char_u *)ga.ga_data;
509 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000510}
511#endif
512
513/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100514 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100516 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000517 * Does NOT work for multi-byte characters, c must be <= 255.
518 * Also doesn't work for the first byte of a multi-byte, "c" must be a
519 * character!
520 */
521static char_u transchar_buf[7];
522
523 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100524transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000525{
526 int i;
527
528 i = 0;
529 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
530 {
531 transchar_buf[0] = '~';
532 transchar_buf[1] = '@';
533 i = 2;
534 c = K_SECOND(c);
535 }
536
537 if ((!chartab_initialized && (
538#ifdef EBCDIC
539 (c >= 64 && c < 255)
540#else
541 (c >= ' ' && c <= '~')
542#endif
543#ifdef FEAT_FKMAP
Bram Moolenaaree2615a2016-07-02 18:25:34 +0200544 || (p_altkeymap && F_ischar(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545#endif
546 )) || (c < 256 && vim_isprintc_strict(c)))
547 {
548 /* printable character */
549 transchar_buf[i] = c;
550 transchar_buf[i + 1] = NUL;
551 }
552 else
553 transchar_nonprint(transchar_buf + i, c);
554 return transchar_buf;
555}
556
Bram Moolenaar071d4272004-06-13 20:20:40 +0000557/*
558 * Like transchar(), but called with a byte instead of a character. Checks
559 * for an illegal UTF-8 byte.
560 */
561 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100562transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563{
564 if (enc_utf8 && c >= 0x80)
565 {
566 transchar_nonprint(transchar_buf, c);
567 return transchar_buf;
568 }
569 return transchar(c);
570}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000571
572/*
573 * Convert non-printable character to two or more printable characters in
574 * "buf[]". "buf" needs to be able to hold five bytes.
575 * Does NOT work for multi-byte characters, c must be <= 255.
576 */
577 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100578transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000579{
580 if (c == NL)
581 c = NUL; /* we use newline in place of a NUL */
582 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
583 c = NL; /* we use CR in place of NL in this case */
584
585 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
586 transchar_hex(buf, c);
587
588#ifdef EBCDIC
589 /* For EBCDIC only the characters 0-63 and 255 are not printable */
590 else if (CtrlChar(c) != 0 || c == DEL)
591#else
592 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
593#endif
594 {
595 buf[0] = '^';
596#ifdef EBCDIC
597 if (c == DEL)
598 buf[1] = '?'; /* DEL displayed as ^? */
599 else
600 buf[1] = CtrlChar(c);
601#else
602 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
603#endif
604
605 buf[2] = NUL;
606 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000607 else if (enc_utf8 && c >= 0x80)
608 {
609 transchar_hex(buf, c);
610 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000611#ifndef EBCDIC
612 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
613 {
614 buf[0] = '|';
615 buf[1] = c - 0x80;
616 buf[2] = NUL;
617 }
618#else
619 else if (c < 64)
620 {
621 buf[0] = '~';
622 buf[1] = MetaChar(c);
623 buf[2] = NUL;
624 }
625#endif
626 else /* 0x80 - 0x9f and 0xff */
627 {
628 /*
629 * TODO: EBCDIC I don't know what to do with this chars, so I display
630 * them as '~?' for now
631 */
632 buf[0] = '~';
633#ifdef EBCDIC
634 buf[1] = '?'; /* 0xff displayed as ~? */
635#else
636 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
637#endif
638 buf[2] = NUL;
639 }
640}
641
642 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100643transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000644{
645 int i = 0;
646
647 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648 if (c > 255)
649 {
650 buf[++i] = nr2hex((unsigned)c >> 12);
651 buf[++i] = nr2hex((unsigned)c >> 8);
652 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000653 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000654 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000655 buf[++i] = '>';
656 buf[++i] = NUL;
657}
658
659/*
660 * Convert the lower 4 bits of byte "c" to its hex character.
661 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
662 * function key 1.
663 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000664 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100665nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666{
667 if ((c & 0xf) <= 9)
668 return (c & 0xf) + '0';
669 return (c & 0xf) - 10 + 'a';
670}
671
672/*
673 * Return number of display cells occupied by byte "b".
674 * Caller must make sure 0 <= b <= 255.
675 * For multi-byte mode "b" must be the first byte of a character.
676 * A TAB is counted as two cells: "^I".
677 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
678 * cells depends on further bytes.
679 */
680 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100681byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000682{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683 if (enc_utf8 && b >= 0x80)
684 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100685 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000686}
687
688/*
689 * Return number of display cells occupied by character "c".
690 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
691 * A TAB is counted as two cells: "^I" or four: "<09>".
692 */
693 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100694char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695{
696 if (IS_SPECIAL(c))
697 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000698 if (c >= 0x80)
699 {
700 /* UTF-8: above 0x80 need to check the value */
701 if (enc_utf8)
702 return utf_char2cells(c);
703 /* DBCS: double-byte means double-width, except for euc-jp with first
704 * byte 0x8e */
705 if (enc_dbcs != 0 && c >= 0x100)
706 {
707 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
708 return 1;
709 return 2;
710 }
711 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100712 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713}
714
715/*
716 * Return number of display cells occupied by character at "*p".
717 * A TAB is counted as two cells: "^I" or four: "<09>".
718 */
719 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100720ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000721{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000722 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
723 if (enc_utf8 && *p >= 0x80)
724 return utf_ptr2cells(p);
725 /* For DBCS we can tell the cell count from the first byte. */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100726 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727}
728
729/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100730 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 * counting TABs as two characters: "^I".
732 */
733 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100734vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000735{
736 return vim_strnsize(s, (int)MAXCOL);
737}
738
739/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100740 * Return the number of character cells string "s[len]" will take on the
741 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 */
743 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100744vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000745{
746 int size = 0;
747
748 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000749 if (has_mbyte)
750 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000751 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000752
753 size += ptr2cells(s);
754 s += l;
755 len -= l - 1;
756 }
757 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000758 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100759
Bram Moolenaar071d4272004-06-13 20:20:40 +0000760 return size;
761}
762
763/*
764 * Return the number of characters 'c' will take on the screen, taking
765 * into account the size of a tab.
766 * Use a define to make it fast, this is used very often!!!
767 * Also see getvcol() below.
768 */
769
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#ifdef FEAT_VARTABS
771# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
772 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
773 { \
774 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
775 } \
776 else \
777 return ptr2cells(p);
778#else
779# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000780 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
781 { \
782 int ts; \
783 ts = (buf)->b_p_ts; \
784 return (int)(ts - (col % ts)); \
785 } \
786 else \
787 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200788#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789
Bram Moolenaar071d4272004-06-13 20:20:40 +0000790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
793 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
794}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000795
796#ifdef FEAT_LINEBREAK
797 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100798win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000799{
800 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
801}
802#endif
803
804/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200805 * Return the number of characters the string 's' will take on the screen,
806 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 */
808 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100809linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000810{
Bram Moolenaardc536092010-07-18 15:45:49 +0200811 return linetabsize_col(0, s);
812}
813
814/*
815 * Like linetabsize(), but starting at column "startcol".
816 */
817 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100818linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200819{
820 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822
823 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200824 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 return (int)col;
826}
827
828/*
829 * Like linetabsize(), but for a given window instead of the current one.
830 */
831 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100832win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833{
834 colnr_T col = 0;
835 char_u *s;
836
Bram Moolenaar597a4222014-06-25 14:39:50 +0200837 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100838 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200839 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840 return (int)col;
841}
842
843/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000844 * Return TRUE if 'c' is a normal identifier character:
845 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 */
847 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100848vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000849{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100850 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851}
852
853/*
854 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100855 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000856 * For multi-byte characters mb_get_class() is used (builtin rules).
857 */
858 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100859vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100861 return vim_iswordc_buf(c, curbuf);
862}
863
864 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100865vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100866{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000867 if (c >= 0x100)
868 {
869 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000870 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000871 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100872 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100873 return FALSE;
874 }
875 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000876}
877
878/*
879 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
880 */
881 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100882vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000883{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100884 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885}
886
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100888vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100890 int c = *p;
891
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100892 if (has_mbyte && MB_BYTE2LEN(c) > 1)
893 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100894 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000895}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000896
897/*
898 * return TRUE if 'c' is a valid file-name character
899 * Assume characters above 0x100 are valid (multi-byte).
900 */
901 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100902vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000903{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100904 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000905}
906
907/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000908 * return TRUE if 'c' is a valid file-name character or a wildcard character
909 * Assume characters above 0x100 are valid (multi-byte).
910 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
911 * returns false.
912 */
913 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100914vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000915{
916 char_u buf[2];
917
918 buf[0] = (char_u)c;
919 buf[1] = NUL;
920 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
921}
922
923/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200924 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000925 * Assume characters above 0x100 are printable (multi-byte), except for
926 * Unicode.
927 */
928 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100929vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000930{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000931 if (enc_utf8 && c >= 0x100)
932 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100933 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934}
935
936/*
937 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
938 * byte of a double-byte character.
939 */
940 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100941vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000942{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000943 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
944 return FALSE;
945 if (enc_utf8 && c >= 0x100)
946 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100947 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000948}
949
950/*
951 * like chartabsize(), but also check for line breaks on the screen
952 */
953 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100954lbr_chartabsize(
955 char_u *line UNUSED, /* start of the line */
956 unsigned char *s,
957 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958{
959#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +0200960 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000961 {
962#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000963 if (curwin->w_p_wrap)
964 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000965 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
966#ifdef FEAT_LINEBREAK
967 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200968 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000969#endif
970}
971
972/*
973 * Call lbr_chartabsize() and advance the pointer.
974 */
975 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100976lbr_chartabsize_adv(
977 char_u *line, /* start of the line */
978 char_u **s,
979 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000980{
981 int retval;
982
Bram Moolenaar597a4222014-06-25 14:39:50 +0200983 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100984 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000985 return retval;
986}
987
988/*
989 * This function is used very often, keep it fast!!!!
990 *
991 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
992 * string at start of line. Warning: *headp is only set if it's a non-zero
993 * value, init to 0 before calling.
994 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100996win_lbr_chartabsize(
997 win_T *wp,
998 char_u *line UNUSED, /* start of the line */
999 char_u *s,
1000 colnr_T col,
1001 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001002{
1003#ifdef FEAT_LINEBREAK
1004 int c;
1005 int size;
1006 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001007 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001008 colnr_T colmax;
1009 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001010 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001011 int numberextra;
1012 char_u *ps;
1013 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001014 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001015
1016 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001017 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001019 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001020#endif
1021 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001022 if (wp->w_p_wrap)
1023 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001024 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1025 }
1026
1027#ifdef FEAT_LINEBREAK
1028 /*
1029 * First get normal size, without 'linebreak'
1030 */
1031 size = win_chartabsize(wp, s, col);
1032 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001033 if (tab_corr)
1034 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001035
1036 /*
1037 * If 'linebreak' set check at a blank before a non-blank if the line
1038 * needs a break here
1039 */
1040 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001041 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001042 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001043 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001044 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001045 {
1046 /*
1047 * Count all characters from first non-blank after a blank up to next
1048 * non-blank after a blank.
1049 */
1050 numberextra = win_col_off(wp);
1051 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001052 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001053 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001054 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001055 colmax += col_adj;
1056 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001057 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001058 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001059 }
1060
Bram Moolenaar071d4272004-06-13 20:20:40 +00001061 for (;;)
1062 {
1063 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001064 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001065 c = *s;
1066 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001067 && (VIM_ISBREAK(c)
1068 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001069 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 break;
1071
1072 col2 += win_chartabsize(wp, s, col2);
1073 if (col2 >= colmax) /* doesn't fit */
1074 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001075 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 tab_corr = FALSE;
1077 break;
1078 }
1079 }
1080 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1082 && wp->w_p_wrap && in_win_border(wp, col))
1083 {
1084 ++size; /* Count the ">" in the last column. */
1085 mb_added = 1;
1086 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001087
1088 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001089 * May have to add something for 'breakindent' and/or 'showbreak'
1090 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001091 * Set *headp to the size of what we add.
1092 */
1093 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001094 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001095 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001096 colnr_T sbrlen = 0;
1097 int numberwidth = win_col_off(wp);
1098
1099 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001101 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001103 col -= wp->w_width;
1104 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001105 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001106 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001107 if (*p_sbr != NUL)
1108 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001110 if (col >= sbrlen)
1111 col -= sbrlen;
1112 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001113 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001114 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001115 else if (col > 0 && numberextra > 0)
1116 col += numberwidth - win_col_off2(wp);
1117
1118 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001119 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001120 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001121 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001122 added = 0;
1123 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001124 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001125 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001126 {
1127 /* calculate effective window width */
Bram Moolenaar02631462017-09-22 15:20:32 +02001128 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
1129 int prev_width = col ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001130 if (width == 0)
Bram Moolenaar02631462017-09-22 15:20:32 +02001131 width = (colnr_T)wp->w_width;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001132 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1133 if ((size - prev_width) % width)
1134 /* wrapped, add another length of 'sbr' */
1135 added += vim_strsize(p_sbr);
1136 }
1137 else
1138 added += vim_strsize(p_sbr);
1139 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001140 if (wp->w_p_bri)
1141 added += get_breakindent_win(wp, line);
1142
Bram Moolenaar95765082014-08-24 21:19:25 +02001143 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001144 if (col != 0)
1145 added = 0;
1146 }
1147 }
1148 if (headp != NULL)
1149 *headp = added + mb_added;
1150 return size;
1151#endif
1152}
1153
Bram Moolenaar071d4272004-06-13 20:20:40 +00001154/*
1155 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1156 * 'wrap' is on. This means we need to check for a double-byte character that
1157 * doesn't fit at the end of the screen line.
1158 */
1159 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001160win_nolbr_chartabsize(
1161 win_T *wp,
1162 char_u *s,
1163 colnr_T col,
1164 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165{
1166 int n;
1167
1168 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1169 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001170# ifdef FEAT_VARTABS
1171 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1172 wp->w_buffer->b_p_vts_array);
1173# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001174 n = wp->w_buffer->b_p_ts;
1175 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001176# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177 }
1178 n = ptr2cells(s);
1179 /* Add one cell for a double-width character in the last column of the
1180 * window, displayed with a ">". */
1181 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1182 {
1183 if (headp != NULL)
1184 *headp = 1;
1185 return 3;
1186 }
1187 return n;
1188}
1189
1190/*
1191 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1192 * "wp".
1193 */
1194 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001195in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001196{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001197 int width1; /* width of first line (after line number) */
1198 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001199
Bram Moolenaar071d4272004-06-13 20:20:40 +00001200 if (wp->w_width == 0) /* there is no border */
1201 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001202 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001203 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001204 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001205 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206 return TRUE;
1207 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001208 if (width2 <= 0)
1209 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001210 return ((vcol - width1) % width2 == width2 - 1);
1211}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001212
1213/*
1214 * Get virtual column number of pos.
1215 * start: on the first position of this character (TAB, ctrl)
1216 * cursor: where the cursor is on this character (first char, except for TAB)
1217 * end: on the last position of this character (TAB, ctrl)
1218 *
1219 * This is used very often, keep it fast!
1220 */
1221 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001222getvcol(
1223 win_T *wp,
1224 pos_T *pos,
1225 colnr_T *start,
1226 colnr_T *cursor,
1227 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001228{
1229 colnr_T vcol;
1230 char_u *ptr; /* points to current char */
1231 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001232 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001233 int incr;
1234 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001235#ifdef FEAT_VARTABS
1236 int *vts = wp->w_buffer->b_p_vts_array;
1237#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238 int ts = wp->w_buffer->b_p_ts;
1239 int c;
1240
1241 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001242 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001243 if (pos->col == MAXCOL)
1244 posptr = NULL; /* continue until the NUL */
1245 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001246 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001247 /* Special check for an empty line, which can happen on exit, when
1248 * ml_get_buf() always returns an empty string. */
1249 if (*ptr == NUL)
1250 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001251 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001252 if (has_mbyte)
1253 /* always start on the first byte */
1254 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001255 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001256
1257 /*
1258 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001259 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1260 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001261 * Also use this when 'list' is set but tabs take their normal size.
1262 */
1263 if ((!wp->w_p_list || lcs_tab1 != NUL)
1264#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001265 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001266#endif
1267 )
1268 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 for (;;)
1271 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001272 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273 c = *ptr;
1274 /* make sure we don't go past the end of the line */
1275 if (c == NUL)
1276 {
1277 incr = 1; /* NUL at end of line only takes one column */
1278 break;
1279 }
1280 /* A tab gets expanded, depending on the current column */
1281 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001282#ifdef FEAT_VARTABS
1283 incr = tabstop_padding(vcol, ts, vts);
1284#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001285 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001286#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001287 else
1288 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001289 if (has_mbyte)
1290 {
1291 /* For utf-8, if the byte is >= 0x80, need to look at
1292 * further bytes to find the cell width. */
1293 if (enc_utf8 && c >= 0x80)
1294 incr = utf_ptr2cells(ptr);
1295 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001296 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001297
1298 /* If a double-cell char doesn't fit at the end of a line
1299 * it wraps to the next line, it's like this char is three
1300 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001301 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1302 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001303 {
1304 ++incr;
1305 head = 1;
1306 }
1307 }
1308 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001309 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310 }
1311
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001312 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001313 break;
1314
1315 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001316 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001317 }
1318 }
1319 else
1320 {
1321 for (;;)
1322 {
1323 /* A tab gets expanded, depending on the current column */
1324 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001325 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001326 /* make sure we don't go past the end of the line */
1327 if (*ptr == NUL)
1328 {
1329 incr = 1; /* NUL at end of line only takes one column */
1330 break;
1331 }
1332
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001333 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 break;
1335
1336 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001337 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001338 }
1339 }
1340 if (start != NULL)
1341 *start = vcol + head;
1342 if (end != NULL)
1343 *end = vcol + incr - 1;
1344 if (cursor != NULL)
1345 {
1346 if (*ptr == TAB
1347 && (State & NORMAL)
1348 && !wp->w_p_list
1349 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001350 && !(VIsual_active
1351 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001352 )
1353 *cursor = vcol + incr - 1; /* cursor at end */
1354 else
1355 *cursor = vcol + head; /* cursor at start */
1356 }
1357}
1358
1359/*
1360 * Get virtual cursor column in the current window, pretending 'list' is off.
1361 */
1362 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001363getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001364{
1365 int list_save = curwin->w_p_list;
1366 colnr_T vcol;
1367
1368 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001369#ifdef FEAT_VIRTUALEDIT
1370 if (posp->coladd)
1371 getvvcol(curwin, posp, NULL, &vcol, NULL);
1372 else
1373#endif
1374 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001375 curwin->w_p_list = list_save;
1376 return vcol;
1377}
1378
1379#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1380/*
1381 * Get virtual column in virtual mode.
1382 */
1383 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001384getvvcol(
1385 win_T *wp,
1386 pos_T *pos,
1387 colnr_T *start,
1388 colnr_T *cursor,
1389 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390{
1391 colnr_T col;
1392 colnr_T coladd;
1393 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001394 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001395
1396 if (virtual_active())
1397 {
1398 /* For virtual mode, only want one value */
1399 getvcol(wp, pos, &col, NULL, NULL);
1400
1401 coladd = pos->coladd;
1402 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001403 /* Cannot put the cursor on part of a wide character. */
1404 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001405 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001406 {
1407 int c = (*mb_ptr2char)(ptr + pos->col);
1408
1409 if (c != TAB && vim_isprintc(c))
1410 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001411 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001412 if (coladd > endadd) /* past end of line */
1413 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001414 else
1415 coladd = 0;
1416 }
1417 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001418 col += coladd;
1419 if (start != NULL)
1420 *start = col;
1421 if (cursor != NULL)
1422 *cursor = col;
1423 if (end != NULL)
1424 *end = col + endadd;
1425 }
1426 else
1427 getvcol(wp, pos, start, cursor, end);
1428}
1429#endif
1430
Bram Moolenaar071d4272004-06-13 20:20:40 +00001431/*
1432 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1433 * Used for Visual block mode.
1434 */
1435 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001436getvcols(
1437 win_T *wp,
1438 pos_T *pos1,
1439 pos_T *pos2,
1440 colnr_T *left,
1441 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001442{
1443 colnr_T from1, from2, to1, to2;
1444
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001445 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001446 {
1447 getvvcol(wp, pos1, &from1, NULL, &to1);
1448 getvvcol(wp, pos2, &from2, NULL, &to2);
1449 }
1450 else
1451 {
1452 getvvcol(wp, pos2, &from1, NULL, &to1);
1453 getvvcol(wp, pos1, &from2, NULL, &to2);
1454 }
1455 if (from2 < from1)
1456 *left = from2;
1457 else
1458 *left = from1;
1459 if (to2 > to1)
1460 {
1461 if (*p_sel == 'e' && from2 - 1 >= to1)
1462 *right = from2 - 1;
1463 else
1464 *right = to2;
1465 }
1466 else
1467 *right = to1;
1468}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001469
1470/*
1471 * skipwhite: skip over ' ' and '\t'.
1472 */
1473 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001474skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001475{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001476 char_u *p = q;
1477
Bram Moolenaar1c465442017-03-12 20:10:05 +01001478 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001479 ++p;
1480 return p;
1481}
1482
1483/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001484 * getwhitecols: return the number of whitespace
1485 * columns (bytes) at the start of a given line
1486 */
1487 int
1488getwhitecols_curline()
1489{
1490 return getwhitecols(ml_get_curline());
1491}
1492
1493 int
1494getwhitecols(char_u *p)
1495{
1496 return skipwhite(p) - p;
1497}
1498
1499/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001500 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001501 */
1502 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001503skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001504{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001505 char_u *p = q;
1506
Bram Moolenaar071d4272004-06-13 20:20:40 +00001507 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1508 ++p;
1509 return p;
1510}
1511
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001512#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001513/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001514 * skip over binary digits
1515 */
1516 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001517skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001518{
1519 char_u *p = q;
1520
1521 while (vim_isbdigit(*p)) /* skip to next non-digit */
1522 ++p;
1523 return p;
1524}
1525
1526/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001527 * skip over digits and hex characters
1528 */
1529 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001530skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001531{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001532 char_u *p = q;
1533
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001534 while (vim_isxdigit(*p)) /* skip to next non-digit */
1535 ++p;
1536 return p;
1537}
1538#endif
1539
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001540/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001541 * skip to bin digit (or NUL after the string)
1542 */
1543 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001544skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001545{
1546 char_u *p = q;
1547
1548 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1549 ++p;
1550 return p;
1551}
1552
1553/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001554 * skip to digit (or NUL after the string)
1555 */
1556 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001557skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001558{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001559 char_u *p = q;
1560
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001561 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1562 ++p;
1563 return p;
1564}
1565
1566/*
1567 * skip to hex character (or NUL after the string)
1568 */
1569 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001570skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001571{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001572 char_u *p = q;
1573
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001574 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1575 ++p;
1576 return p;
1577}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001578
Bram Moolenaar071d4272004-06-13 20:20:40 +00001579/*
1580 * Variant of isdigit() that can handle characters > 0x100.
1581 * We don't use isdigit() here, because on some systems it also considers
1582 * superscript 1 to be a digit.
1583 * Use the VIM_ISDIGIT() macro for simple arguments.
1584 */
1585 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001586vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001587{
1588 return (c >= '0' && c <= '9');
1589}
1590
1591/*
1592 * Variant of isxdigit() that can handle characters > 0x100.
1593 * We don't use isxdigit() here, because on some systems it also considers
1594 * superscript 1 to be a digit.
1595 */
1596 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001597vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001598{
1599 return (c >= '0' && c <= '9')
1600 || (c >= 'a' && c <= 'f')
1601 || (c >= 'A' && c <= 'F');
1602}
1603
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001604/*
1605 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1606 * characters > 0x100.
1607 */
1608 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001609vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001610{
1611 return (c == '0' || c == '1');
1612}
1613
Bram Moolenaar78622822005-08-23 21:00:13 +00001614/*
1615 * Vim's own character class functions. These exist because many library
1616 * islower()/toupper() etc. do not work properly: they crash when used with
1617 * invalid values or can't handle latin1 when the locale is C.
1618 * Speed is most important here.
1619 */
1620#define LATIN1LOWER 'l'
1621#define LATIN1UPPER 'U'
1622
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001623static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001624static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1625static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001626
1627 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001628vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001629{
1630 if (c <= '@')
1631 return FALSE;
1632 if (c >= 0x80)
1633 {
1634 if (enc_utf8)
1635 return utf_islower(c);
1636 if (c >= 0x100)
1637 {
1638#ifdef HAVE_ISWLOWER
1639 if (has_mbyte)
1640 return iswlower(c);
1641#endif
1642 /* islower() can't handle these chars and may crash */
1643 return FALSE;
1644 }
1645 if (enc_latin1like)
1646 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1647 }
1648 return islower(c);
1649}
1650
1651 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001652vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001653{
1654 if (c <= '@')
1655 return FALSE;
1656 if (c >= 0x80)
1657 {
1658 if (enc_utf8)
1659 return utf_isupper(c);
1660 if (c >= 0x100)
1661 {
1662#ifdef HAVE_ISWUPPER
1663 if (has_mbyte)
1664 return iswupper(c);
1665#endif
1666 /* islower() can't handle these chars and may crash */
1667 return FALSE;
1668 }
1669 if (enc_latin1like)
1670 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1671 }
1672 return isupper(c);
1673}
1674
1675 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001676vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001677{
1678 if (c <= '@')
1679 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001680 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001681 {
1682 if (enc_utf8)
1683 return utf_toupper(c);
1684 if (c >= 0x100)
1685 {
1686#ifdef HAVE_TOWUPPER
1687 if (has_mbyte)
1688 return towupper(c);
1689#endif
1690 /* toupper() can't handle these chars and may crash */
1691 return c;
1692 }
1693 if (enc_latin1like)
1694 return latin1upper[c];
1695 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001696 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1697 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001698 return TOUPPER_LOC(c);
1699}
1700
1701 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001702vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001703{
1704 if (c <= '@')
1705 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001706 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001707 {
1708 if (enc_utf8)
1709 return utf_tolower(c);
1710 if (c >= 0x100)
1711 {
1712#ifdef HAVE_TOWLOWER
1713 if (has_mbyte)
1714 return towlower(c);
1715#endif
1716 /* tolower() can't handle these chars and may crash */
1717 return c;
1718 }
1719 if (enc_latin1like)
1720 return latin1lower[c];
1721 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001722 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1723 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001724 return TOLOWER_LOC(c);
1725}
Bram Moolenaar78622822005-08-23 21:00:13 +00001726
Bram Moolenaar071d4272004-06-13 20:20:40 +00001727/*
1728 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1729 */
1730 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001731skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001732{
1733 while (*p != ' ' && *p != '\t' && *p != NUL)
1734 ++p;
1735 return p;
1736}
1737
Bram Moolenaar071d4272004-06-13 20:20:40 +00001738/*
1739 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1740 */
1741 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001742skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001743{
1744 while (*p != ' ' && *p != '\t' && *p != NUL)
1745 {
1746 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1747 ++p;
1748 ++p;
1749 }
1750 return p;
1751}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001752
1753/*
1754 * Getdigits: Get a number from a string and skip over it.
1755 * Note: the argument is a pointer to a char_u pointer!
1756 */
1757 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001758getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001759{
1760 char_u *p;
1761 long retval;
1762
1763 p = *pp;
1764 retval = atol((char *)p);
1765 if (*p == '-') /* skip negative sign */
1766 ++p;
1767 p = skipdigits(p); /* skip to next non-digit */
1768 *pp = p;
1769 return retval;
1770}
1771
1772/*
1773 * Return TRUE if "lbuf" is empty or only contains blanks.
1774 */
1775 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001776vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001777{
1778 char_u *p;
1779
1780 p = skipwhite(lbuf);
1781 return (*p == NUL || *p == '\r' || *p == '\n');
1782}
1783
1784/*
1785 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001786 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1787 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001788 * 0 decimal
1789 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001790 * 'B' bin
1791 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001792 * 'X' hex
1793 * 'x' hex
1794 * If "len" is not NULL, the length of the number in characters is returned.
1795 * If "nptr" is not NULL, the signed result is returned in it.
1796 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001797 * If "what" contains STR2NR_BIN recognize binary numbers
1798 * If "what" contains STR2NR_OCT recognize octal numbers
1799 * If "what" contains STR2NR_HEX recognize hex numbers
1800 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaarce157752017-10-28 16:07:33 +02001801 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001802 */
1803 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001804vim_str2nr(
1805 char_u *start,
1806 int *prep, /* return: type of number 0 = decimal, 'x'
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001807 or 'X' is hex, '0' = octal, 'b' or 'B'
1808 is bin */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001809 int *len, /* return: detected length of number */
1810 int what, /* what numbers to recognize */
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001811 varnumber_T *nptr, /* return: signed result */
1812 uvarnumber_T *unptr, /* return: unsigned result */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001813 int maxlen) /* max length of string to check */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001814{
1815 char_u *ptr = start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001816 int pre = 0; /* default is decimal */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001817 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001818 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001819 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001820
1821 if (ptr[0] == '-')
1822 {
1823 negative = TRUE;
1824 ++ptr;
1825 }
1826
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001827 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001828 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1829 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001830 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001831 pre = ptr[1];
1832 if ((what & STR2NR_HEX)
1833 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1834 && (maxlen == 0 || maxlen > 2))
1835 /* hexadecimal */
1836 ptr += 2;
1837 else if ((what & STR2NR_BIN)
1838 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1839 && (maxlen == 0 || maxlen > 2))
1840 /* binary */
1841 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001842 else
1843 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001844 /* decimal or octal, default is decimal */
1845 pre = 0;
1846 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001847 {
1848 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001849 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001850 {
1851 if (ptr[n] > '7')
1852 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001853 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001854 break;
1855 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001856 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001857 }
1858 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001859 }
1860 }
1861
1862 /*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001863 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1864 */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001865 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001866 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1867 {
1868 /* bin */
1869 if (pre != 0)
1870 n += 2; /* skip over "0b" */
1871 while ('0' <= *ptr && *ptr <= '1')
1872 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001873 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001874 if (un <= UVARNUM_MAX / 2)
1875 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001876 else
1877 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001878 ++ptr;
1879 if (n++ == maxlen)
1880 break;
1881 }
1882 }
1883 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001884 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001885 /* octal */
1886 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001887 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001888 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001889 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001890 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1891 else
1892 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001893 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001894 if (n++ == maxlen)
1895 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001896 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001897 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001898 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001899 {
1900 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001901 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001902 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001903 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001904 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001905 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001906 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001907 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1908 else
1909 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001910 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001911 if (n++ == maxlen)
1912 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001913 }
1914 }
1915 else
1916 {
1917 /* decimal */
1918 while (VIM_ISDIGIT(*ptr))
1919 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001920 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1921
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001922 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001923 if (un < UVARNUM_MAX / 10
1924 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1925 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001926 else
1927 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001928 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001929 if (n++ == maxlen)
1930 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001931 }
1932 }
1933
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001934 if (prep != NULL)
1935 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001936 if (len != NULL)
1937 *len = (int)(ptr - start);
1938 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001939 {
1940 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001941 {
1942 /* avoid ubsan error for overflow */
1943 if (un > VARNUM_MAX)
1944 *nptr = VARNUM_MIN;
1945 else
1946 *nptr = -(varnumber_T)un;
1947 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001948 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001949 {
1950 if (un > VARNUM_MAX)
1951 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001952 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001953 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001954 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001955 if (unptr != NULL)
1956 *unptr = un;
1957}
1958
1959/*
1960 * Return the value of a single hex character.
1961 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1962 */
1963 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001964hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001965{
1966 if (c >= 'a' && c <= 'f')
1967 return c - 'a' + 10;
1968 if (c >= 'A' && c <= 'F')
1969 return c - 'A' + 10;
1970 return c - '0';
1971}
1972
Bram Moolenaar4033c552017-09-16 20:54:51 +02001973#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001974/*
1975 * Convert two hex characters to a byte.
1976 * Return -1 if one of the characters is not hex.
1977 */
1978 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001979hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001980{
1981 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1982 return -1;
1983 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1984}
1985#endif
1986
1987/*
1988 * Return TRUE if "str" starts with a backslash that should be removed.
1989 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1990 * backslash is not a normal file name character.
1991 * '$' is a valid file name character, we don't remove the backslash before
1992 * it. This means it is not possible to use an environment variable after a
1993 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1994 * Although "\ name" is valid, the backslash in "Program\ files" must be
1995 * removed. Assume a file name doesn't start with a space.
1996 * For multi-byte names, never remove a backslash before a non-ascii
1997 * character, assume that all multi-byte characters are valid file name
1998 * characters.
1999 */
2000 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002001rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002002{
2003#ifdef BACKSLASH_IN_FILENAME
2004 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002005 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002006 && (str[1] == ' '
2007 || (str[1] != NUL
2008 && str[1] != '*'
2009 && str[1] != '?'
2010 && !vim_isfilec(str[1]))));
2011#else
2012 return (str[0] == '\\' && str[1] != NUL);
2013#endif
2014}
2015
2016/*
2017 * Halve the number of backslashes in a file name argument.
2018 * For MS-DOS we only do this if the character after the backslash
2019 * is not a normal file character.
2020 */
2021 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002022backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002023{
2024 for ( ; *p; ++p)
2025 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002026 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002027}
2028
2029/*
2030 * backslash_halve() plus save the result in allocated memory.
2031 */
2032 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002033backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002034{
2035 char_u *res;
2036
2037 res = vim_strsave(p);
2038 if (res == NULL)
2039 return p;
2040 backslash_halve(res);
2041 return res;
2042}
2043
2044#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2045/*
2046 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2047 * The first 64 entries have been added to map control characters defined in
2048 * ascii.h
2049 */
2050static char_u ebcdic2ascii_tab[256] =
2051{
2052 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2053 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2054 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2055 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2056 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2057 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2058 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2059 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2060 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2061 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2062 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2063 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2064 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2065 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2066 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2067 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2068 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2069 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2070 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2071 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2072 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2073 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2074 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2075 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2076 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2077 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2078 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2079 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2080 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2081 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2082 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2083 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2084};
2085
2086/*
2087 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2088 * wanting 7-bit ASCII characters out the other end.
2089 */
2090 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002091ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002092{
2093 int i;
2094
2095 for (i = 0; i < len; i++)
2096 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2097}
2098#endif