blob: a4816887e464146e81502a1cf5d43ed52ca7fbb8 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010013static int win_chartabsize(win_T *wp, char_u *p, colnr_T col);
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
15
16#ifdef FEAT_MBYTE
Bram Moolenaard7b734a2010-08-12 20:17:02 +020017# if defined(HAVE_WCHAR_H)
18# include <wchar.h> /* for towupper() and towlower() */
19# endif
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010020static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000021#endif
22
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010023static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000024
25static int chartab_initialized = FALSE;
26
27/* b_chartab[] is an array of 32 bytes, each bit representing one of the
28 * characters 0-255. */
29#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
30#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
31#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
32
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010033/* table used below, see init_chartab() for an explanation */
34static char_u g_chartab[256];
35
Bram Moolenaar071d4272004-06-13 20:20:40 +000036/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010037 * Flags for g_chartab[].
38 */
39#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
40#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
41#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
42#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
43
44/*
45 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000046 * characters for current buffer.
47 *
48 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
49 * 'isprint' and 'encoding'.
50 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010051 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000052 * - For non-multi-byte index with the byte (same as the character).
53 * - For DBCS index with the first byte.
54 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
55 * the same as the character, if the first byte is 0x80 and above it depends
56 * on further bytes).
57 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010058 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000059 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
60 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
61 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
62 * translate the character before displaying it). Note that only DBCS
63 * characters can have 2 display cells and still be printable.
64 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
65 * - CT_ID_CHAR bit is set when the character can be in an identifier.
66 *
67 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
68 * error, OK otherwise.
69 */
70 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010071init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000072{
73 return buf_init_chartab(curbuf, TRUE);
74}
75
76 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010077buf_init_chartab(
78 buf_T *buf,
79 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000080{
81 int c;
82 int c2;
83 char_u *p;
84 int i;
85 int tilde;
86 int do_isalpha;
87
88 if (global)
89 {
90 /*
91 * Set the default size for printable characters:
92 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
93 * This also inits all 'isident' and 'isfname' flags to FALSE.
94 *
95 * EBCDIC: all chars below ' ' are not printable, all others are
96 * printable.
97 */
98 c = 0;
99 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100100 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000101#ifdef EBCDIC
102 while (c < 255)
103#else
104 while (c <= '~')
105#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100106 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000107#ifdef FEAT_FKMAP
108 if (p_altkeymap)
109 {
110 while (c < YE)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100111 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 }
113#endif
114 while (c < 256)
115 {
116#ifdef FEAT_MBYTE
117 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
118 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100119 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 /* euc-jp characters starting with 0x8e are single width */
121 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100122 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000123 /* other double-byte chars can be printable AND double-width */
124 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100125 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000126 else
127#endif
128 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100129 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000130 }
131
132#ifdef FEAT_MBYTE
133 /* Assume that every multi-byte char is a filename character. */
134 for (c = 1; c < 256; ++c)
135 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
136 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
137 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100138 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000139#endif
140 }
141
142 /*
143 * Init word char flags all to FALSE
144 */
145 vim_memset(buf->b_chartab, 0, (size_t)32);
146#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000147 if (enc_dbcs != 0)
148 for (c = 0; c < 256; ++c)
149 {
150 /* double-byte characters are probably word characters */
151 if (MB_BYTE2LEN(c) == 2)
152 SET_CHARTAB(buf, c);
153 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154#endif
155
156#ifdef FEAT_LISP
157 /*
158 * In lisp mode the '-' character is included in keywords.
159 */
160 if (buf->b_p_lisp)
161 SET_CHARTAB(buf, '-');
162#endif
163
164 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
165 * options Each option is a list of characters, character numbers or
166 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
167 */
168 for (i = global ? 0 : 3; i <= 3; ++i)
169 {
170 if (i == 0)
171 p = p_isi; /* first round: 'isident' */
172 else if (i == 1)
173 p = p_isp; /* second round: 'isprint' */
174 else if (i == 2)
175 p = p_isf; /* third round: 'isfname' */
176 else /* i == 3 */
177 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
178
179 while (*p)
180 {
181 tilde = FALSE;
182 do_isalpha = FALSE;
183 if (*p == '^' && p[1] != NUL)
184 {
185 tilde = TRUE;
186 ++p;
187 }
188 if (VIM_ISDIGIT(*p))
189 c = getdigits(&p);
190 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000191#ifdef FEAT_MBYTE
192 if (has_mbyte)
193 c = mb_ptr2char_adv(&p);
194 else
195#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000196 c = *p++;
197 c2 = -1;
198 if (*p == '-' && p[1] != NUL)
199 {
200 ++p;
201 if (VIM_ISDIGIT(*p))
202 c2 = getdigits(&p);
203 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000204#ifdef FEAT_MBYTE
205 if (has_mbyte)
206 c2 = mb_ptr2char_adv(&p);
207 else
208#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000209 c2 = *p++;
210 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000211 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000212 || !(*p == NUL || *p == ','))
213 return FAIL;
214
215 if (c2 == -1) /* not a range */
216 {
217 /*
218 * A single '@' (not "@-@"):
219 * Decide on letters being ID/printable/keyword chars with
220 * standard function isalpha(). This takes care of locale for
221 * single-byte characters).
222 */
223 if (c == '@')
224 {
225 do_isalpha = TRUE;
226 c = 1;
227 c2 = 255;
228 }
229 else
230 c2 = c;
231 }
232 while (c <= c2)
233 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000234 /* Use the MB_ functions here, because isalpha() doesn't
235 * work properly when 'encoding' is "latin1" and the locale is
236 * "C". */
237 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238#ifdef FEAT_FKMAP
239 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
240#endif
241 )
242 {
243 if (i == 0) /* (re)set ID flag */
244 {
245 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100246 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000247 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100248 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249 }
250 else if (i == 1) /* (re)set printable */
251 {
252 if ((c < ' '
253#ifndef EBCDIC
254 || c > '~'
255#endif
256#ifdef FEAT_FKMAP
257 || (p_altkeymap
258 && (F_isalpha(c) || F_isdigit(c)))
259#endif
260 )
261#ifdef FEAT_MBYTE
262 /* For double-byte we keep the cell width, so
263 * that we can detect it from the first byte. */
264 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
265#endif
266 )
267 {
268 if (tilde)
269 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100270 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000271 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100272 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000273 }
274 else
275 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100276 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
277 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000278 }
279 }
280 }
281 else if (i == 2) /* (re)set fname flag */
282 {
283 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100284 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000285 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100286 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000287 }
288 else /* i == 3 */ /* (re)set keyword flag */
289 {
290 if (tilde)
291 RESET_CHARTAB(buf, c);
292 else
293 SET_CHARTAB(buf, c);
294 }
295 }
296 ++c;
297 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100298
299 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100301 if (c == ',' && *p == NUL)
302 /* Trailing comma is not allowed. */
303 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000304 }
305 }
306 chartab_initialized = TRUE;
307 return OK;
308}
309
310/*
311 * Translate any special characters in buf[bufsize] in-place.
312 * The result is a string with only printable characters, but if there is not
313 * enough room, not all characters will be translated.
314 */
315 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100316trans_characters(
317 char_u *buf,
318 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000319{
320 int len; /* length of string needing translation */
321 int room; /* room in buffer after string */
322 char_u *trs; /* translated character */
323 int trs_len; /* length of trs[] */
324
325 len = (int)STRLEN(buf);
326 room = bufsize - len;
327 while (*buf != 0)
328 {
329# ifdef FEAT_MBYTE
330 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000331 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000332 len -= trs_len;
333 else
334# endif
335 {
336 trs = transchar_byte(*buf);
337 trs_len = (int)STRLEN(trs);
338 if (trs_len > 1)
339 {
340 room -= trs_len - 1;
341 if (room <= 0)
342 return;
343 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
344 }
345 mch_memmove(buf, trs, (size_t)trs_len);
346 --len;
347 }
348 buf += trs_len;
349 }
350}
351
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000352#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
353 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354/*
355 * Translate a string into allocated memory, replacing special chars with
356 * printable chars. Returns NULL when out of memory.
357 */
358 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100359transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000360{
361 char_u *res;
362 char_u *p;
363#ifdef FEAT_MBYTE
364 int l, len, c;
365 char_u hexbuf[11];
366#endif
367
368#ifdef FEAT_MBYTE
369 if (has_mbyte)
370 {
371 /* Compute the length of the result, taking account of unprintable
372 * multi-byte characters. */
373 len = 0;
374 p = s;
375 while (*p != NUL)
376 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000377 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000378 {
379 c = (*mb_ptr2char)(p);
380 p += l;
381 if (vim_isprintc(c))
382 len += l;
383 else
384 {
385 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000386 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000387 }
388 }
389 else
390 {
391 l = byte2cells(*p++);
392 if (l > 0)
393 len += l;
394 else
395 len += 4; /* illegal byte sequence */
396 }
397 }
398 res = alloc((unsigned)(len + 1));
399 }
400 else
401#endif
402 res = alloc((unsigned)(vim_strsize(s) + 1));
403 if (res != NULL)
404 {
405 *res = NUL;
406 p = s;
407 while (*p != NUL)
408 {
409#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000410 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000411 {
412 c = (*mb_ptr2char)(p);
413 if (vim_isprintc(c))
414 STRNCAT(res, p, l); /* append printable multi-byte char */
415 else
416 transchar_hex(res + STRLEN(res), c);
417 p += l;
418 }
419 else
420#endif
421 STRCAT(res, transchar_byte(*p++));
422 }
423 }
424 return res;
425}
426#endif
427
428#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
429/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000430 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
431 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000432 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
433 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434 */
435 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100436str_foldcase(
437 char_u *str,
438 int orglen,
439 char_u *buf,
440 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441{
442 garray_T ga;
443 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000444 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445
446#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
447#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000448#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
449#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000450
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000451 /* Copy "str" into "buf" or allocated memory, unmodified. */
452 if (buf == NULL)
453 {
454 ga_init2(&ga, 1, 10);
455 if (ga_grow(&ga, len + 1) == FAIL)
456 return NULL;
457 mch_memmove(ga.ga_data, str, (size_t)len);
458 ga.ga_len = len;
459 }
460 else
461 {
462 if (len >= buflen) /* Ugly! */
463 len = buflen - 1;
464 mch_memmove(buf, str, (size_t)len);
465 }
466 if (buf == NULL)
467 GA_CHAR(len) = NUL;
468 else
469 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000470
471 /* Make each character lower case. */
472 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000473 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000474 {
475#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000476 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000477 {
478 if (enc_utf8)
479 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000480 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100481 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000482 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000483
Bram Moolenaarb9839212008-06-28 11:03:50 +0000484 /* Only replace the character when it is not an invalid
485 * sequence (ASCII character or more than one byte) and
486 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100487 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000488 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100489 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490
491 /* If the byte length changes need to shift the following
492 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100493 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100495 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000496 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100497 if (buf == NULL
498 ? ga_grow(&ga, nlen - olen + 1) == FAIL
499 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 {
501 /* out of memory, keep old char */
502 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100503 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000505 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100506 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000507 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000508 if (buf == NULL)
509 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100510 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
511 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000512 }
513 else
514 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100515 STRMOVE(buf + i + nlen, buf + i + olen);
516 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000517 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000518 }
519 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000520 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000521 }
522 }
523 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000524 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000525 }
526 else
527#endif
528 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000529 if (buf == NULL)
530 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
531 else
532 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000533 ++i;
534 }
535 }
536
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000537 if (buf == NULL)
538 return (char_u *)ga.ga_data;
539 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000540}
541#endif
542
543/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100544 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100546 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000547 * Does NOT work for multi-byte characters, c must be <= 255.
548 * Also doesn't work for the first byte of a multi-byte, "c" must be a
549 * character!
550 */
551static char_u transchar_buf[7];
552
553 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100554transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000555{
556 int i;
557
558 i = 0;
559 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
560 {
561 transchar_buf[0] = '~';
562 transchar_buf[1] = '@';
563 i = 2;
564 c = K_SECOND(c);
565 }
566
567 if ((!chartab_initialized && (
568#ifdef EBCDIC
569 (c >= 64 && c < 255)
570#else
571 (c >= ' ' && c <= '~')
572#endif
573#ifdef FEAT_FKMAP
Bram Moolenaaree2615a2016-07-02 18:25:34 +0200574 || (p_altkeymap && F_ischar(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000575#endif
576 )) || (c < 256 && vim_isprintc_strict(c)))
577 {
578 /* printable character */
579 transchar_buf[i] = c;
580 transchar_buf[i + 1] = NUL;
581 }
582 else
583 transchar_nonprint(transchar_buf + i, c);
584 return transchar_buf;
585}
586
587#if defined(FEAT_MBYTE) || defined(PROTO)
588/*
589 * Like transchar(), but called with a byte instead of a character. Checks
590 * for an illegal UTF-8 byte.
591 */
592 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100593transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000594{
595 if (enc_utf8 && c >= 0x80)
596 {
597 transchar_nonprint(transchar_buf, c);
598 return transchar_buf;
599 }
600 return transchar(c);
601}
602#endif
603
604/*
605 * Convert non-printable character to two or more printable characters in
606 * "buf[]". "buf" needs to be able to hold five bytes.
607 * Does NOT work for multi-byte characters, c must be <= 255.
608 */
609 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100610transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000611{
612 if (c == NL)
613 c = NUL; /* we use newline in place of a NUL */
614 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
615 c = NL; /* we use CR in place of NL in this case */
616
617 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
618 transchar_hex(buf, c);
619
620#ifdef EBCDIC
621 /* For EBCDIC only the characters 0-63 and 255 are not printable */
622 else if (CtrlChar(c) != 0 || c == DEL)
623#else
624 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
625#endif
626 {
627 buf[0] = '^';
628#ifdef EBCDIC
629 if (c == DEL)
630 buf[1] = '?'; /* DEL displayed as ^? */
631 else
632 buf[1] = CtrlChar(c);
633#else
634 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
635#endif
636
637 buf[2] = NUL;
638 }
639#ifdef FEAT_MBYTE
640 else if (enc_utf8 && c >= 0x80)
641 {
642 transchar_hex(buf, c);
643 }
644#endif
645#ifndef EBCDIC
646 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
647 {
648 buf[0] = '|';
649 buf[1] = c - 0x80;
650 buf[2] = NUL;
651 }
652#else
653 else if (c < 64)
654 {
655 buf[0] = '~';
656 buf[1] = MetaChar(c);
657 buf[2] = NUL;
658 }
659#endif
660 else /* 0x80 - 0x9f and 0xff */
661 {
662 /*
663 * TODO: EBCDIC I don't know what to do with this chars, so I display
664 * them as '~?' for now
665 */
666 buf[0] = '~';
667#ifdef EBCDIC
668 buf[1] = '?'; /* 0xff displayed as ~? */
669#else
670 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
671#endif
672 buf[2] = NUL;
673 }
674}
675
676 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100677transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000678{
679 int i = 0;
680
681 buf[0] = '<';
682#ifdef FEAT_MBYTE
683 if (c > 255)
684 {
685 buf[++i] = nr2hex((unsigned)c >> 12);
686 buf[++i] = nr2hex((unsigned)c >> 8);
687 }
688#endif
689 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000690 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000691 buf[++i] = '>';
692 buf[++i] = NUL;
693}
694
695/*
696 * Convert the lower 4 bits of byte "c" to its hex character.
697 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
698 * function key 1.
699 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000700 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100701nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000702{
703 if ((c & 0xf) <= 9)
704 return (c & 0xf) + '0';
705 return (c & 0xf) - 10 + 'a';
706}
707
708/*
709 * Return number of display cells occupied by byte "b".
710 * Caller must make sure 0 <= b <= 255.
711 * For multi-byte mode "b" must be the first byte of a character.
712 * A TAB is counted as two cells: "^I".
713 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
714 * cells depends on further bytes.
715 */
716 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100717byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000718{
719#ifdef FEAT_MBYTE
720 if (enc_utf8 && b >= 0x80)
721 return 0;
722#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100723 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724}
725
726/*
727 * Return number of display cells occupied by character "c".
728 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
729 * A TAB is counted as two cells: "^I" or four: "<09>".
730 */
731 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100732char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000733{
734 if (IS_SPECIAL(c))
735 return char2cells(K_SECOND(c)) + 2;
736#ifdef FEAT_MBYTE
737 if (c >= 0x80)
738 {
739 /* UTF-8: above 0x80 need to check the value */
740 if (enc_utf8)
741 return utf_char2cells(c);
742 /* DBCS: double-byte means double-width, except for euc-jp with first
743 * byte 0x8e */
744 if (enc_dbcs != 0 && c >= 0x100)
745 {
746 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
747 return 1;
748 return 2;
749 }
750 }
751#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100752 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000753}
754
755/*
756 * Return number of display cells occupied by character at "*p".
757 * A TAB is counted as two cells: "^I" or four: "<09>".
758 */
759 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100760ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000761{
762#ifdef FEAT_MBYTE
763 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
764 if (enc_utf8 && *p >= 0x80)
765 return utf_ptr2cells(p);
766 /* For DBCS we can tell the cell count from the first byte. */
767#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100768 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000769}
770
771/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100772 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000773 * counting TABs as two characters: "^I".
774 */
775 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100776vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777{
778 return vim_strnsize(s, (int)MAXCOL);
779}
780
781/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100782 * Return the number of character cells string "s[len]" will take on the
783 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000784 */
785 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100786vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000787{
788 int size = 0;
789
790 while (*s != NUL && --len >= 0)
791 {
792#ifdef FEAT_MBYTE
793 if (has_mbyte)
794 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000795 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000796
797 size += ptr2cells(s);
798 s += l;
799 len -= l - 1;
800 }
801 else
802#endif
803 size += byte2cells(*s++);
804 }
805 return size;
806}
807
808/*
809 * Return the number of characters 'c' will take on the screen, taking
810 * into account the size of a tab.
811 * Use a define to make it fast, this is used very often!!!
812 * Also see getvcol() below.
813 */
814
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200815#ifdef FEAT_VARTABS
816# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
817 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
818 { \
819 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
820 } \
821 else \
822 return ptr2cells(p);
823#else
824# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
826 { \
827 int ts; \
828 ts = (buf)->b_p_ts; \
829 return (int)(ts - (col % ts)); \
830 } \
831 else \
832 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200833#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000834
Bram Moolenaar071d4272004-06-13 20:20:40 +0000835 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100836chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000837{
838 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
839}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840
841#ifdef FEAT_LINEBREAK
842 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100843win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000844{
845 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
846}
847#endif
848
849/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200850 * Return the number of characters the string 's' will take on the screen,
851 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000852 */
853 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100854linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855{
Bram Moolenaardc536092010-07-18 15:45:49 +0200856 return linetabsize_col(0, s);
857}
858
859/*
860 * Like linetabsize(), but starting at column "startcol".
861 */
862 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100863linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200864{
865 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200866 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000867
868 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200869 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000870 return (int)col;
871}
872
873/*
874 * Like linetabsize(), but for a given window instead of the current one.
875 */
876 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100877win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000878{
879 colnr_T col = 0;
880 char_u *s;
881
Bram Moolenaar597a4222014-06-25 14:39:50 +0200882 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100883 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200884 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885 return (int)col;
886}
887
888/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000889 * Return TRUE if 'c' is a normal identifier character:
890 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000891 */
892 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100893vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000894{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100895 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000896}
897
898/*
899 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100900 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000901 * For multi-byte characters mb_get_class() is used (builtin rules).
902 */
903 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100904vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000905{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100906 return vim_iswordc_buf(c, curbuf);
907}
908
909 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100910vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100911{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000912 if (c >= 0x100)
913 {
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100914#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +0000915 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000916 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000917 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100918 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000919#endif
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100920 return FALSE;
921 }
922 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000923}
924
925/*
926 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
927 */
928 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100929vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000930{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100931 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000932}
933
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100935vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000936{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100937 int c = *p;
938
Bram Moolenaara50e5862013-01-30 17:30:17 +0100939#ifdef FEAT_MBYTE
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100940 if (has_mbyte && MB_BYTE2LEN(c) > 1)
941 c = (*mb_ptr2char)(p);
Bram Moolenaara50e5862013-01-30 17:30:17 +0100942#endif
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100943 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000944}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000945
946/*
947 * return TRUE if 'c' is a valid file-name character
948 * Assume characters above 0x100 are valid (multi-byte).
949 */
950 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100951vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000952{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100953 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000954}
955
956/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000957 * return TRUE if 'c' is a valid file-name character or a wildcard character
958 * Assume characters above 0x100 are valid (multi-byte).
959 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
960 * returns false.
961 */
962 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100963vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000964{
965 char_u buf[2];
966
967 buf[0] = (char_u)c;
968 buf[1] = NUL;
969 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
970}
971
972/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200973 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000974 * Assume characters above 0x100 are printable (multi-byte), except for
975 * Unicode.
976 */
977 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100978vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000979{
980#ifdef FEAT_MBYTE
981 if (enc_utf8 && c >= 0x100)
982 return utf_printable(c);
983#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100984 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000985}
986
987/*
988 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
989 * byte of a double-byte character.
990 */
991 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100992vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000993{
994#ifdef FEAT_MBYTE
995 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
996 return FALSE;
997 if (enc_utf8 && c >= 0x100)
998 return utf_printable(c);
999#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001000 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001}
1002
1003/*
1004 * like chartabsize(), but also check for line breaks on the screen
1005 */
1006 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001007lbr_chartabsize(
1008 char_u *line UNUSED, /* start of the line */
1009 unsigned char *s,
1010 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001011{
1012#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001013 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001014 {
1015#endif
1016#ifdef FEAT_MBYTE
1017 if (curwin->w_p_wrap)
1018 return win_nolbr_chartabsize(curwin, s, col, NULL);
1019#endif
1020 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1021#ifdef FEAT_LINEBREAK
1022 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001023 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001024#endif
1025}
1026
1027/*
1028 * Call lbr_chartabsize() and advance the pointer.
1029 */
1030 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001031lbr_chartabsize_adv(
1032 char_u *line, /* start of the line */
1033 char_u **s,
1034 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001035{
1036 int retval;
1037
Bram Moolenaar597a4222014-06-25 14:39:50 +02001038 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001039 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001040 return retval;
1041}
1042
1043/*
1044 * This function is used very often, keep it fast!!!!
1045 *
1046 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1047 * string at start of line. Warning: *headp is only set if it's a non-zero
1048 * value, init to 0 before calling.
1049 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001050 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001051win_lbr_chartabsize(
1052 win_T *wp,
1053 char_u *line UNUSED, /* start of the line */
1054 char_u *s,
1055 colnr_T col,
1056 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057{
1058#ifdef FEAT_LINEBREAK
1059 int c;
1060 int size;
1061 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001062 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001063 colnr_T colmax;
1064 int added;
1065# ifdef FEAT_MBYTE
1066 int mb_added = 0;
1067# else
1068# define mb_added 0
1069# endif
1070 int numberextra;
1071 char_u *ps;
1072 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001073 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001074
1075 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001076 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001077 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001078 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001079#endif
1080 {
1081#ifdef FEAT_MBYTE
1082 if (wp->w_p_wrap)
1083 return win_nolbr_chartabsize(wp, s, col, headp);
1084#endif
1085 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1086 }
1087
1088#ifdef FEAT_LINEBREAK
1089 /*
1090 * First get normal size, without 'linebreak'
1091 */
1092 size = win_chartabsize(wp, s, col);
1093 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001094 if (tab_corr)
1095 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001096
1097 /*
1098 * If 'linebreak' set check at a blank before a non-blank if the line
1099 * needs a break here
1100 */
1101 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001102 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001103 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001104 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001105 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001106 {
1107 /*
1108 * Count all characters from first non-blank after a blank up to next
1109 * non-blank after a blank.
1110 */
1111 numberextra = win_col_off(wp);
1112 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001113 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001114 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001115 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001116 colmax += col_adj;
1117 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001118 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001119 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001120 }
1121
Bram Moolenaar071d4272004-06-13 20:20:40 +00001122 for (;;)
1123 {
1124 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001125 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001126 c = *s;
1127 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001128 && (VIM_ISBREAK(c)
1129 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001130 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001131 break;
1132
1133 col2 += win_chartabsize(wp, s, col2);
1134 if (col2 >= colmax) /* doesn't fit */
1135 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001136 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001137 tab_corr = FALSE;
1138 break;
1139 }
1140 }
1141 }
1142# ifdef FEAT_MBYTE
1143 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1144 && wp->w_p_wrap && in_win_border(wp, col))
1145 {
1146 ++size; /* Count the ">" in the last column. */
1147 mb_added = 1;
1148 }
1149# endif
1150
1151 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001152 * May have to add something for 'breakindent' and/or 'showbreak'
1153 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001154 * Set *headp to the size of what we add.
1155 */
1156 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001157 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001158 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001159 colnr_T sbrlen = 0;
1160 int numberwidth = win_col_off(wp);
1161
1162 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001163 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001164 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001166 col -= wp->w_width;
1167 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001168 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001169 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001170 if (*p_sbr != NUL)
1171 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001172 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001173 if (col >= sbrlen)
1174 col -= sbrlen;
1175 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001176 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001178 else if (col > 0 && numberextra > 0)
1179 col += numberwidth - win_col_off2(wp);
1180
1181 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001182 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001183 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001184 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001185 added = 0;
1186 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001187 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001188 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001189 {
1190 /* calculate effective window width */
Bram Moolenaar02631462017-09-22 15:20:32 +02001191 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
1192 int prev_width = col ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001193 if (width == 0)
Bram Moolenaar02631462017-09-22 15:20:32 +02001194 width = (colnr_T)wp->w_width;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001195 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1196 if ((size - prev_width) % width)
1197 /* wrapped, add another length of 'sbr' */
1198 added += vim_strsize(p_sbr);
1199 }
1200 else
1201 added += vim_strsize(p_sbr);
1202 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001203 if (wp->w_p_bri)
1204 added += get_breakindent_win(wp, line);
1205
Bram Moolenaar95765082014-08-24 21:19:25 +02001206 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001207 if (col != 0)
1208 added = 0;
1209 }
1210 }
1211 if (headp != NULL)
1212 *headp = added + mb_added;
1213 return size;
1214#endif
1215}
1216
1217#if defined(FEAT_MBYTE) || defined(PROTO)
1218/*
1219 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1220 * 'wrap' is on. This means we need to check for a double-byte character that
1221 * doesn't fit at the end of the screen line.
1222 */
1223 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001224win_nolbr_chartabsize(
1225 win_T *wp,
1226 char_u *s,
1227 colnr_T col,
1228 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001229{
1230 int n;
1231
1232 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1233 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001234# ifdef FEAT_VARTABS
1235 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1236 wp->w_buffer->b_p_vts_array);
1237# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238 n = wp->w_buffer->b_p_ts;
1239 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001240# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001241 }
1242 n = ptr2cells(s);
1243 /* Add one cell for a double-width character in the last column of the
1244 * window, displayed with a ">". */
1245 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1246 {
1247 if (headp != NULL)
1248 *headp = 1;
1249 return 3;
1250 }
1251 return n;
1252}
1253
1254/*
1255 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1256 * "wp".
1257 */
1258 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001259in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001260{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001261 int width1; /* width of first line (after line number) */
1262 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001263
Bram Moolenaar071d4272004-06-13 20:20:40 +00001264 if (wp->w_width == 0) /* there is no border */
1265 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001266 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001267 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001268 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001269 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 return TRUE;
1271 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001272 if (width2 <= 0)
1273 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001274 return ((vcol - width1) % width2 == width2 - 1);
1275}
1276#endif /* FEAT_MBYTE */
1277
1278/*
1279 * Get virtual column number of pos.
1280 * start: on the first position of this character (TAB, ctrl)
1281 * cursor: where the cursor is on this character (first char, except for TAB)
1282 * end: on the last position of this character (TAB, ctrl)
1283 *
1284 * This is used very often, keep it fast!
1285 */
1286 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001287getvcol(
1288 win_T *wp,
1289 pos_T *pos,
1290 colnr_T *start,
1291 colnr_T *cursor,
1292 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001293{
1294 colnr_T vcol;
1295 char_u *ptr; /* points to current char */
1296 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001297 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001298 int incr;
1299 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001300#ifdef FEAT_VARTABS
1301 int *vts = wp->w_buffer->b_p_vts_array;
1302#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001303 int ts = wp->w_buffer->b_p_ts;
1304 int c;
1305
1306 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001307 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001308 if (pos->col == MAXCOL)
1309 posptr = NULL; /* continue until the NUL */
1310 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001311 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001312 /* Special check for an empty line, which can happen on exit, when
1313 * ml_get_buf() always returns an empty string. */
1314 if (*ptr == NUL)
1315 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001316 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001317#ifdef FEAT_MBYTE
1318 if (has_mbyte)
1319 /* always start on the first byte */
1320 posptr -= (*mb_head_off)(line, posptr);
1321#endif
1322 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001323
1324 /*
1325 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001326 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1327 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001328 * Also use this when 'list' is set but tabs take their normal size.
1329 */
1330 if ((!wp->w_p_list || lcs_tab1 != NUL)
1331#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001332 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001333#endif
1334 )
1335 {
1336#ifndef FEAT_MBYTE
1337 head = 0;
1338#endif
1339 for (;;)
1340 {
1341#ifdef FEAT_MBYTE
1342 head = 0;
1343#endif
1344 c = *ptr;
1345 /* make sure we don't go past the end of the line */
1346 if (c == NUL)
1347 {
1348 incr = 1; /* NUL at end of line only takes one column */
1349 break;
1350 }
1351 /* A tab gets expanded, depending on the current column */
1352 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001353#ifdef FEAT_VARTABS
1354 incr = tabstop_padding(vcol, ts, vts);
1355#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001356 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001357#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001358 else
1359 {
1360#ifdef FEAT_MBYTE
1361 if (has_mbyte)
1362 {
1363 /* For utf-8, if the byte is >= 0x80, need to look at
1364 * further bytes to find the cell width. */
1365 if (enc_utf8 && c >= 0x80)
1366 incr = utf_ptr2cells(ptr);
1367 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001368 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001369
1370 /* If a double-cell char doesn't fit at the end of a line
1371 * it wraps to the next line, it's like this char is three
1372 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001373 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1374 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001375 {
1376 ++incr;
1377 head = 1;
1378 }
1379 }
1380 else
1381#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001382 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001383 }
1384
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001385 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386 break;
1387
1388 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001389 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390 }
1391 }
1392 else
1393 {
1394 for (;;)
1395 {
1396 /* A tab gets expanded, depending on the current column */
1397 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001398 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001399 /* make sure we don't go past the end of the line */
1400 if (*ptr == NUL)
1401 {
1402 incr = 1; /* NUL at end of line only takes one column */
1403 break;
1404 }
1405
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001406 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001407 break;
1408
1409 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001410 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001411 }
1412 }
1413 if (start != NULL)
1414 *start = vcol + head;
1415 if (end != NULL)
1416 *end = vcol + incr - 1;
1417 if (cursor != NULL)
1418 {
1419 if (*ptr == TAB
1420 && (State & NORMAL)
1421 && !wp->w_p_list
1422 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001423 && !(VIsual_active
1424 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001425 )
1426 *cursor = vcol + incr - 1; /* cursor at end */
1427 else
1428 *cursor = vcol + head; /* cursor at start */
1429 }
1430}
1431
1432/*
1433 * Get virtual cursor column in the current window, pretending 'list' is off.
1434 */
1435 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001436getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001437{
1438 int list_save = curwin->w_p_list;
1439 colnr_T vcol;
1440
1441 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001442#ifdef FEAT_VIRTUALEDIT
1443 if (posp->coladd)
1444 getvvcol(curwin, posp, NULL, &vcol, NULL);
1445 else
1446#endif
1447 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001448 curwin->w_p_list = list_save;
1449 return vcol;
1450}
1451
1452#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1453/*
1454 * Get virtual column in virtual mode.
1455 */
1456 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001457getvvcol(
1458 win_T *wp,
1459 pos_T *pos,
1460 colnr_T *start,
1461 colnr_T *cursor,
1462 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001463{
1464 colnr_T col;
1465 colnr_T coladd;
1466 colnr_T endadd;
1467# ifdef FEAT_MBYTE
1468 char_u *ptr;
1469# endif
1470
1471 if (virtual_active())
1472 {
1473 /* For virtual mode, only want one value */
1474 getvcol(wp, pos, &col, NULL, NULL);
1475
1476 coladd = pos->coladd;
1477 endadd = 0;
1478# ifdef FEAT_MBYTE
1479 /* Cannot put the cursor on part of a wide character. */
1480 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001481 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001482 {
1483 int c = (*mb_ptr2char)(ptr + pos->col);
1484
1485 if (c != TAB && vim_isprintc(c))
1486 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001487 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001488 if (coladd > endadd) /* past end of line */
1489 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001490 else
1491 coladd = 0;
1492 }
1493 }
1494# endif
1495 col += coladd;
1496 if (start != NULL)
1497 *start = col;
1498 if (cursor != NULL)
1499 *cursor = col;
1500 if (end != NULL)
1501 *end = col + endadd;
1502 }
1503 else
1504 getvcol(wp, pos, start, cursor, end);
1505}
1506#endif
1507
Bram Moolenaar071d4272004-06-13 20:20:40 +00001508/*
1509 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1510 * Used for Visual block mode.
1511 */
1512 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001513getvcols(
1514 win_T *wp,
1515 pos_T *pos1,
1516 pos_T *pos2,
1517 colnr_T *left,
1518 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001519{
1520 colnr_T from1, from2, to1, to2;
1521
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001522 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001523 {
1524 getvvcol(wp, pos1, &from1, NULL, &to1);
1525 getvvcol(wp, pos2, &from2, NULL, &to2);
1526 }
1527 else
1528 {
1529 getvvcol(wp, pos2, &from1, NULL, &to1);
1530 getvvcol(wp, pos1, &from2, NULL, &to2);
1531 }
1532 if (from2 < from1)
1533 *left = from2;
1534 else
1535 *left = from1;
1536 if (to2 > to1)
1537 {
1538 if (*p_sel == 'e' && from2 - 1 >= to1)
1539 *right = from2 - 1;
1540 else
1541 *right = to2;
1542 }
1543 else
1544 *right = to1;
1545}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001546
1547/*
1548 * skipwhite: skip over ' ' and '\t'.
1549 */
1550 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001551skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001552{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001553 char_u *p = q;
1554
Bram Moolenaar1c465442017-03-12 20:10:05 +01001555 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001556 ++p;
1557 return p;
1558}
1559
1560/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001561 * getwhitecols: return the number of whitespace
1562 * columns (bytes) at the start of a given line
1563 */
1564 int
1565getwhitecols_curline()
1566{
1567 return getwhitecols(ml_get_curline());
1568}
1569
1570 int
1571getwhitecols(char_u *p)
1572{
1573 return skipwhite(p) - p;
1574}
1575
1576/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001577 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578 */
1579 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001580skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001581{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001582 char_u *p = q;
1583
Bram Moolenaar071d4272004-06-13 20:20:40 +00001584 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1585 ++p;
1586 return p;
1587}
1588
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001589#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001590/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001591 * skip over binary digits
1592 */
1593 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001594skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001595{
1596 char_u *p = q;
1597
1598 while (vim_isbdigit(*p)) /* skip to next non-digit */
1599 ++p;
1600 return p;
1601}
1602
1603/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001604 * skip over digits and hex characters
1605 */
1606 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001607skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001608{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001609 char_u *p = q;
1610
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001611 while (vim_isxdigit(*p)) /* skip to next non-digit */
1612 ++p;
1613 return p;
1614}
1615#endif
1616
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001617/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001618 * skip to bin digit (or NUL after the string)
1619 */
1620 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001621skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001622{
1623 char_u *p = q;
1624
1625 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1626 ++p;
1627 return p;
1628}
1629
1630/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001631 * skip to digit (or NUL after the string)
1632 */
1633 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001634skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001635{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001636 char_u *p = q;
1637
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001638 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1639 ++p;
1640 return p;
1641}
1642
1643/*
1644 * skip to hex character (or NUL after the string)
1645 */
1646 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001647skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001648{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001649 char_u *p = q;
1650
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001651 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1652 ++p;
1653 return p;
1654}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001655
Bram Moolenaar071d4272004-06-13 20:20:40 +00001656/*
1657 * Variant of isdigit() that can handle characters > 0x100.
1658 * We don't use isdigit() here, because on some systems it also considers
1659 * superscript 1 to be a digit.
1660 * Use the VIM_ISDIGIT() macro for simple arguments.
1661 */
1662 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001663vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001664{
1665 return (c >= '0' && c <= '9');
1666}
1667
1668/*
1669 * Variant of isxdigit() that can handle characters > 0x100.
1670 * We don't use isxdigit() here, because on some systems it also considers
1671 * superscript 1 to be a digit.
1672 */
1673 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001674vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001675{
1676 return (c >= '0' && c <= '9')
1677 || (c >= 'a' && c <= 'f')
1678 || (c >= 'A' && c <= 'F');
1679}
1680
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001681/*
1682 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1683 * characters > 0x100.
1684 */
1685 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001686vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001687{
1688 return (c == '0' || c == '1');
1689}
1690
Bram Moolenaar78622822005-08-23 21:00:13 +00001691#if defined(FEAT_MBYTE) || defined(PROTO)
1692/*
1693 * Vim's own character class functions. These exist because many library
1694 * islower()/toupper() etc. do not work properly: they crash when used with
1695 * invalid values or can't handle latin1 when the locale is C.
1696 * Speed is most important here.
1697 */
1698#define LATIN1LOWER 'l'
1699#define LATIN1UPPER 'U'
1700
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001701static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001702static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1703static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001704
1705 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001706vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001707{
1708 if (c <= '@')
1709 return FALSE;
1710 if (c >= 0x80)
1711 {
1712 if (enc_utf8)
1713 return utf_islower(c);
1714 if (c >= 0x100)
1715 {
1716#ifdef HAVE_ISWLOWER
1717 if (has_mbyte)
1718 return iswlower(c);
1719#endif
1720 /* islower() can't handle these chars and may crash */
1721 return FALSE;
1722 }
1723 if (enc_latin1like)
1724 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1725 }
1726 return islower(c);
1727}
1728
1729 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001730vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001731{
1732 if (c <= '@')
1733 return FALSE;
1734 if (c >= 0x80)
1735 {
1736 if (enc_utf8)
1737 return utf_isupper(c);
1738 if (c >= 0x100)
1739 {
1740#ifdef HAVE_ISWUPPER
1741 if (has_mbyte)
1742 return iswupper(c);
1743#endif
1744 /* islower() can't handle these chars and may crash */
1745 return FALSE;
1746 }
1747 if (enc_latin1like)
1748 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1749 }
1750 return isupper(c);
1751}
1752
1753 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001754vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001755{
1756 if (c <= '@')
1757 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001758 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001759 {
1760 if (enc_utf8)
1761 return utf_toupper(c);
1762 if (c >= 0x100)
1763 {
1764#ifdef HAVE_TOWUPPER
1765 if (has_mbyte)
1766 return towupper(c);
1767#endif
1768 /* toupper() can't handle these chars and may crash */
1769 return c;
1770 }
1771 if (enc_latin1like)
1772 return latin1upper[c];
1773 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001774 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1775 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001776 return TOUPPER_LOC(c);
1777}
1778
1779 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001780vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001781{
1782 if (c <= '@')
1783 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001784 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001785 {
1786 if (enc_utf8)
1787 return utf_tolower(c);
1788 if (c >= 0x100)
1789 {
1790#ifdef HAVE_TOWLOWER
1791 if (has_mbyte)
1792 return towlower(c);
1793#endif
1794 /* tolower() can't handle these chars and may crash */
1795 return c;
1796 }
1797 if (enc_latin1like)
1798 return latin1lower[c];
1799 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001800 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1801 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001802 return TOLOWER_LOC(c);
1803}
1804#endif
1805
Bram Moolenaar071d4272004-06-13 20:20:40 +00001806/*
1807 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1808 */
1809 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001810skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811{
1812 while (*p != ' ' && *p != '\t' && *p != NUL)
1813 ++p;
1814 return p;
1815}
1816
Bram Moolenaar071d4272004-06-13 20:20:40 +00001817/*
1818 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1819 */
1820 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001821skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001822{
1823 while (*p != ' ' && *p != '\t' && *p != NUL)
1824 {
1825 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1826 ++p;
1827 ++p;
1828 }
1829 return p;
1830}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001831
1832/*
1833 * Getdigits: Get a number from a string and skip over it.
1834 * Note: the argument is a pointer to a char_u pointer!
1835 */
1836 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001837getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001838{
1839 char_u *p;
1840 long retval;
1841
1842 p = *pp;
1843 retval = atol((char *)p);
1844 if (*p == '-') /* skip negative sign */
1845 ++p;
1846 p = skipdigits(p); /* skip to next non-digit */
1847 *pp = p;
1848 return retval;
1849}
1850
1851/*
1852 * Return TRUE if "lbuf" is empty or only contains blanks.
1853 */
1854 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001855vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001856{
1857 char_u *p;
1858
1859 p = skipwhite(lbuf);
1860 return (*p == NUL || *p == '\r' || *p == '\n');
1861}
1862
1863/*
1864 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001865 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1866 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001867 * 0 decimal
1868 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001869 * 'B' bin
1870 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001871 * 'X' hex
1872 * 'x' hex
1873 * If "len" is not NULL, the length of the number in characters is returned.
1874 * If "nptr" is not NULL, the signed result is returned in it.
1875 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001876 * If "what" contains STR2NR_BIN recognize binary numbers
1877 * If "what" contains STR2NR_OCT recognize octal numbers
1878 * If "what" contains STR2NR_HEX recognize hex numbers
1879 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaarce157752017-10-28 16:07:33 +02001880 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001881 */
1882 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001883vim_str2nr(
1884 char_u *start,
1885 int *prep, /* return: type of number 0 = decimal, 'x'
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001886 or 'X' is hex, '0' = octal, 'b' or 'B'
1887 is bin */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001888 int *len, /* return: detected length of number */
1889 int what, /* what numbers to recognize */
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001890 varnumber_T *nptr, /* return: signed result */
1891 uvarnumber_T *unptr, /* return: unsigned result */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001892 int maxlen) /* max length of string to check */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001893{
1894 char_u *ptr = start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001895 int pre = 0; /* default is decimal */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001896 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001897 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001898 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001899
1900 if (ptr[0] == '-')
1901 {
1902 negative = TRUE;
1903 ++ptr;
1904 }
1905
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001906 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001907 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1908 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001909 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001910 pre = ptr[1];
1911 if ((what & STR2NR_HEX)
1912 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1913 && (maxlen == 0 || maxlen > 2))
1914 /* hexadecimal */
1915 ptr += 2;
1916 else if ((what & STR2NR_BIN)
1917 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1918 && (maxlen == 0 || maxlen > 2))
1919 /* binary */
1920 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001921 else
1922 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001923 /* decimal or octal, default is decimal */
1924 pre = 0;
1925 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001926 {
1927 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001928 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001929 {
1930 if (ptr[n] > '7')
1931 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001932 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001933 break;
1934 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001935 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001936 }
1937 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001938 }
1939 }
1940
1941 /*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001942 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1943 */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001944 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001945 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1946 {
1947 /* bin */
1948 if (pre != 0)
1949 n += 2; /* skip over "0b" */
1950 while ('0' <= *ptr && *ptr <= '1')
1951 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001952 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001953 if (un <= UVARNUM_MAX / 2)
1954 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001955 else
1956 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001957 ++ptr;
1958 if (n++ == maxlen)
1959 break;
1960 }
1961 }
1962 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001963 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001964 /* octal */
1965 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001966 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001967 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001968 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001969 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1970 else
1971 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001972 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001973 if (n++ == maxlen)
1974 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001975 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001976 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001977 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001978 {
1979 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001980 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001981 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001982 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001983 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001984 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001985 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001986 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1987 else
1988 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001989 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001990 if (n++ == maxlen)
1991 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001992 }
1993 }
1994 else
1995 {
1996 /* decimal */
1997 while (VIM_ISDIGIT(*ptr))
1998 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001999 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
2000
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002001 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02002002 if (un < UVARNUM_MAX / 10
2003 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
2004 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002005 else
2006 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002007 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02002008 if (n++ == maxlen)
2009 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002010 }
2011 }
2012
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01002013 if (prep != NULL)
2014 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002015 if (len != NULL)
2016 *len = (int)(ptr - start);
2017 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002018 {
2019 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002020 {
2021 /* avoid ubsan error for overflow */
2022 if (un > VARNUM_MAX)
2023 *nptr = VARNUM_MIN;
2024 else
2025 *nptr = -(varnumber_T)un;
2026 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002027 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002028 {
2029 if (un > VARNUM_MAX)
2030 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02002031 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002032 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002033 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002034 if (unptr != NULL)
2035 *unptr = un;
2036}
2037
2038/*
2039 * Return the value of a single hex character.
2040 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2041 */
2042 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002043hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002044{
2045 if (c >= 'a' && c <= 'f')
2046 return c - 'a' + 10;
2047 if (c >= 'A' && c <= 'F')
2048 return c - 'A' + 10;
2049 return c - '0';
2050}
2051
Bram Moolenaar4033c552017-09-16 20:54:51 +02002052#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002053/*
2054 * Convert two hex characters to a byte.
2055 * Return -1 if one of the characters is not hex.
2056 */
2057 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002058hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002059{
2060 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2061 return -1;
2062 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2063}
2064#endif
2065
2066/*
2067 * Return TRUE if "str" starts with a backslash that should be removed.
2068 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
2069 * backslash is not a normal file name character.
2070 * '$' is a valid file name character, we don't remove the backslash before
2071 * it. This means it is not possible to use an environment variable after a
2072 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2073 * Although "\ name" is valid, the backslash in "Program\ files" must be
2074 * removed. Assume a file name doesn't start with a space.
2075 * For multi-byte names, never remove a backslash before a non-ascii
2076 * character, assume that all multi-byte characters are valid file name
2077 * characters.
2078 */
2079 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002080rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002081{
2082#ifdef BACKSLASH_IN_FILENAME
2083 return (str[0] == '\\'
2084# ifdef FEAT_MBYTE
2085 && str[1] < 0x80
2086# endif
2087 && (str[1] == ' '
2088 || (str[1] != NUL
2089 && str[1] != '*'
2090 && str[1] != '?'
2091 && !vim_isfilec(str[1]))));
2092#else
2093 return (str[0] == '\\' && str[1] != NUL);
2094#endif
2095}
2096
2097/*
2098 * Halve the number of backslashes in a file name argument.
2099 * For MS-DOS we only do this if the character after the backslash
2100 * is not a normal file character.
2101 */
2102 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002103backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002104{
2105 for ( ; *p; ++p)
2106 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002107 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002108}
2109
2110/*
2111 * backslash_halve() plus save the result in allocated memory.
2112 */
2113 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002114backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002115{
2116 char_u *res;
2117
2118 res = vim_strsave(p);
2119 if (res == NULL)
2120 return p;
2121 backslash_halve(res);
2122 return res;
2123}
2124
2125#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2126/*
2127 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2128 * The first 64 entries have been added to map control characters defined in
2129 * ascii.h
2130 */
2131static char_u ebcdic2ascii_tab[256] =
2132{
2133 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2134 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2135 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2136 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2137 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2138 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2139 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2140 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2141 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2142 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2143 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2144 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2145 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2146 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2147 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2148 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2149 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2150 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2151 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2152 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2153 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2154 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2155 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2156 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2157 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2158 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2159 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2160 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2161 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2162 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2163 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2164 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2165};
2166
2167/*
2168 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2169 * wanting 7-bit ASCII characters out the other end.
2170 */
2171 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002172ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002173{
2174 int i;
2175
2176 for (i = 0; i < len; i++)
2177 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2178}
2179#endif