blob: 25d6d3851da1fb603feb6646138e64575e5fe783 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
13# include <wchar.h> /* for towupper() and towlower() */
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
21/* b_chartab[] is an array of 32 bytes, each bit representing one of the
22 * characters 0-255. */
23#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010027/* table used below, see init_chartab() for an explanation */
28static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
33#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
34#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
35#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
36#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
37
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
75 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000105 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000108 /* euc-jp characters starting with 0x8e are single width */
109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000111 /* other double-byte chars can be printable AND double-width */
112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000115 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaar071d4272004-06-13 20:20:40 +0000119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
130 vim_memset(buf->b_chartab, 0, (size_t)32);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
134 /* double-byte characters are probably word characters */
135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
147 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 * options Each option is a list of characters, character numbers or
149 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
150 */
151 for (i = global ? 0 : 3; i <= 3; ++i)
152 {
153 if (i == 0)
154 p = p_isi; /* first round: 'isident' */
155 else if (i == 1)
156 p = p_isp; /* second round: 'isprint' */
157 else if (i == 2)
158 p = p_isf; /* third round: 'isfname' */
159 else /* i == 3 */
160 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
161
162 while (*p)
163 {
164 tilde = FALSE;
165 do_isalpha = FALSE;
166 if (*p == '^' && p[1] != NUL)
167 {
168 tilde = TRUE;
169 ++p;
170 }
171 if (VIM_ISDIGIT(*p))
172 c = getdigits(&p);
173 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000174 if (has_mbyte)
175 c = mb_ptr2char_adv(&p);
176 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000185 if (has_mbyte)
186 c2 = mb_ptr2char_adv(&p);
187 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000188 c2 = *p++;
189 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000190 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000191 || !(*p == NUL || *p == ','))
192 return FAIL;
193
194 if (c2 == -1) /* not a range */
195 {
196 /*
197 * A single '@' (not "@-@"):
198 * Decide on letters being ID/printable/keyword chars with
199 * standard function isalpha(). This takes care of locale for
200 * single-byte characters).
201 */
202 if (c == '@')
203 {
204 do_isalpha = TRUE;
205 c = 1;
206 c2 = 255;
207 }
208 else
209 c2 = c;
210 }
211 while (c <= c2)
212 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000213 /* Use the MB_ functions here, because isalpha() doesn't
214 * work properly when 'encoding' is "latin1" and the locale is
215 * "C". */
Bram Moolenaar14184a32019-02-16 15:10:30 +0100216 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000217 {
218 if (i == 0) /* (re)set ID flag */
219 {
220 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100221 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100223 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000224 }
225 else if (i == 1) /* (re)set printable */
226 {
227 if ((c < ' '
228#ifndef EBCDIC
229 || c > '~'
230#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100231 // For double-byte we keep the cell width, so
232 // that we can detect it from the first byte.
233 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000234 {
235 if (tilde)
236 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100237 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100239 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000240 }
241 else
242 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100243 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
244 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000245 }
246 }
247 }
248 else if (i == 2) /* (re)set fname flag */
249 {
250 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100251 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000252 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100253 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 }
255 else /* i == 3 */ /* (re)set keyword flag */
256 {
257 if (tilde)
258 RESET_CHARTAB(buf, c);
259 else
260 SET_CHARTAB(buf, c);
261 }
262 }
263 ++c;
264 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100265
266 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000267 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100268 if (c == ',' && *p == NUL)
269 /* Trailing comma is not allowed. */
270 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000271 }
272 }
273 chartab_initialized = TRUE;
274 return OK;
275}
276
277/*
278 * Translate any special characters in buf[bufsize] in-place.
279 * The result is a string with only printable characters, but if there is not
280 * enough room, not all characters will be translated.
281 */
282 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100283trans_characters(
284 char_u *buf,
285 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000286{
287 int len; /* length of string needing translation */
288 int room; /* room in buffer after string */
289 char_u *trs; /* translated character */
290 int trs_len; /* length of trs[] */
291
292 len = (int)STRLEN(buf);
293 room = bufsize - len;
294 while (*buf != 0)
295 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000297 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000298 len -= trs_len;
299 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 {
301 trs = transchar_byte(*buf);
302 trs_len = (int)STRLEN(trs);
303 if (trs_len > 1)
304 {
305 room -= trs_len - 1;
306 if (room <= 0)
307 return;
308 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
309 }
310 mch_memmove(buf, trs, (size_t)trs_len);
311 --len;
312 }
313 buf += trs_len;
314 }
315}
316
Bram Moolenaar071d4272004-06-13 20:20:40 +0000317/*
318 * Translate a string into allocated memory, replacing special chars with
319 * printable chars. Returns NULL when out of memory.
320 */
321 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100322transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323{
324 char_u *res;
325 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 int l, len, c;
327 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328
Bram Moolenaar071d4272004-06-13 20:20:40 +0000329 if (has_mbyte)
330 {
331 /* Compute the length of the result, taking account of unprintable
332 * multi-byte characters. */
333 len = 0;
334 p = s;
335 while (*p != NUL)
336 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000337 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338 {
339 c = (*mb_ptr2char)(p);
340 p += l;
341 if (vim_isprintc(c))
342 len += l;
343 else
344 {
345 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000346 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000347 }
348 }
349 else
350 {
351 l = byte2cells(*p++);
352 if (l > 0)
353 len += l;
354 else
355 len += 4; /* illegal byte sequence */
356 }
357 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200358 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 }
360 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200361 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000362 if (res != NULL)
363 {
364 *res = NUL;
365 p = s;
366 while (*p != NUL)
367 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000368 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000369 {
370 c = (*mb_ptr2char)(p);
371 if (vim_isprintc(c))
372 STRNCAT(res, p, l); /* append printable multi-byte char */
373 else
374 transchar_hex(res + STRLEN(res), c);
375 p += l;
376 }
377 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000378 STRCAT(res, transchar_byte(*p++));
379 }
380 }
381 return res;
382}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000383
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000385 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
386 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000387 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
388 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000389 */
390 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100391str_foldcase(
392 char_u *str,
393 int orglen,
394 char_u *buf,
395 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000396{
397 garray_T ga;
398 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000399 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000400
401#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
402#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000403#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
404#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000405
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000406 /* Copy "str" into "buf" or allocated memory, unmodified. */
407 if (buf == NULL)
408 {
409 ga_init2(&ga, 1, 10);
410 if (ga_grow(&ga, len + 1) == FAIL)
411 return NULL;
412 mch_memmove(ga.ga_data, str, (size_t)len);
413 ga.ga_len = len;
414 }
415 else
416 {
417 if (len >= buflen) /* Ugly! */
418 len = buflen - 1;
419 mch_memmove(buf, str, (size_t)len);
420 }
421 if (buf == NULL)
422 GA_CHAR(len) = NUL;
423 else
424 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000425
426 /* Make each character lower case. */
427 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000428 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000429 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000430 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000431 {
432 if (enc_utf8)
433 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000434 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100435 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000436 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000437
Bram Moolenaarb9839212008-06-28 11:03:50 +0000438 /* Only replace the character when it is not an invalid
439 * sequence (ASCII character or more than one byte) and
440 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100441 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100443 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000444
445 /* If the byte length changes need to shift the following
446 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100447 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100449 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000450 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100451 if (buf == NULL
452 ? ga_grow(&ga, nlen - olen + 1) == FAIL
453 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000454 {
455 /* out of memory, keep old char */
456 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100457 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000459 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100460 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000462 if (buf == NULL)
463 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100464 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
465 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000466 }
467 else
468 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100469 STRMOVE(buf + i + nlen, buf + i + olen);
470 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000471 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000472 }
473 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000474 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 }
476 }
477 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000478 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000479 }
480 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000481 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000482 if (buf == NULL)
483 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
484 else
485 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000486 ++i;
487 }
488 }
489
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000490 if (buf == NULL)
491 return (char_u *)ga.ga_data;
492 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000493}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494
495/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100496 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000497 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100498 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000499 * Does NOT work for multi-byte characters, c must be <= 255.
500 * Also doesn't work for the first byte of a multi-byte, "c" must be a
501 * character!
502 */
503static char_u transchar_buf[7];
504
505 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100506transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000507{
508 int i;
509
510 i = 0;
511 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
512 {
513 transchar_buf[0] = '~';
514 transchar_buf[1] = '@';
515 i = 2;
516 c = K_SECOND(c);
517 }
518
519 if ((!chartab_initialized && (
520#ifdef EBCDIC
521 (c >= 64 && c < 255)
522#else
523 (c >= ' ' && c <= '~')
524#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000525 )) || (c < 256 && vim_isprintc_strict(c)))
526 {
527 /* printable character */
528 transchar_buf[i] = c;
529 transchar_buf[i + 1] = NUL;
530 }
531 else
532 transchar_nonprint(transchar_buf + i, c);
533 return transchar_buf;
534}
535
Bram Moolenaar071d4272004-06-13 20:20:40 +0000536/*
537 * Like transchar(), but called with a byte instead of a character. Checks
538 * for an illegal UTF-8 byte.
539 */
540 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100541transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000542{
543 if (enc_utf8 && c >= 0x80)
544 {
545 transchar_nonprint(transchar_buf, c);
546 return transchar_buf;
547 }
548 return transchar(c);
549}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550
551/*
552 * Convert non-printable character to two or more printable characters in
553 * "buf[]". "buf" needs to be able to hold five bytes.
554 * Does NOT work for multi-byte characters, c must be <= 255.
555 */
556 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100557transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000558{
559 if (c == NL)
560 c = NUL; /* we use newline in place of a NUL */
561 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
562 c = NL; /* we use CR in place of NL in this case */
563
564 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
565 transchar_hex(buf, c);
566
567#ifdef EBCDIC
568 /* For EBCDIC only the characters 0-63 and 255 are not printable */
569 else if (CtrlChar(c) != 0 || c == DEL)
570#else
571 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
572#endif
573 {
574 buf[0] = '^';
575#ifdef EBCDIC
576 if (c == DEL)
577 buf[1] = '?'; /* DEL displayed as ^? */
578 else
579 buf[1] = CtrlChar(c);
580#else
581 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
582#endif
583
584 buf[2] = NUL;
585 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000586 else if (enc_utf8 && c >= 0x80)
587 {
588 transchar_hex(buf, c);
589 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590#ifndef EBCDIC
591 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
592 {
593 buf[0] = '|';
594 buf[1] = c - 0x80;
595 buf[2] = NUL;
596 }
597#else
598 else if (c < 64)
599 {
600 buf[0] = '~';
601 buf[1] = MetaChar(c);
602 buf[2] = NUL;
603 }
604#endif
605 else /* 0x80 - 0x9f and 0xff */
606 {
607 /*
608 * TODO: EBCDIC I don't know what to do with this chars, so I display
609 * them as '~?' for now
610 */
611 buf[0] = '~';
612#ifdef EBCDIC
613 buf[1] = '?'; /* 0xff displayed as ~? */
614#else
615 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
616#endif
617 buf[2] = NUL;
618 }
619}
620
621 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100622transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000623{
624 int i = 0;
625
626 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000627 if (c > 255)
628 {
629 buf[++i] = nr2hex((unsigned)c >> 12);
630 buf[++i] = nr2hex((unsigned)c >> 8);
631 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000632 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000633 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000634 buf[++i] = '>';
635 buf[++i] = NUL;
636}
637
638/*
639 * Convert the lower 4 bits of byte "c" to its hex character.
640 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
641 * function key 1.
642 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000643 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100644nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000645{
646 if ((c & 0xf) <= 9)
647 return (c & 0xf) + '0';
648 return (c & 0xf) - 10 + 'a';
649}
650
651/*
652 * Return number of display cells occupied by byte "b".
653 * Caller must make sure 0 <= b <= 255.
654 * For multi-byte mode "b" must be the first byte of a character.
655 * A TAB is counted as two cells: "^I".
656 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
657 * cells depends on further bytes.
658 */
659 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100660byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000661{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000662 if (enc_utf8 && b >= 0x80)
663 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100664 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665}
666
667/*
668 * Return number of display cells occupied by character "c".
669 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
670 * A TAB is counted as two cells: "^I" or four: "<09>".
671 */
672 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100673char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000674{
675 if (IS_SPECIAL(c))
676 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677 if (c >= 0x80)
678 {
679 /* UTF-8: above 0x80 need to check the value */
680 if (enc_utf8)
681 return utf_char2cells(c);
682 /* DBCS: double-byte means double-width, except for euc-jp with first
683 * byte 0x8e */
684 if (enc_dbcs != 0 && c >= 0x100)
685 {
686 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
687 return 1;
688 return 2;
689 }
690 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100691 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000692}
693
694/*
695 * Return number of display cells occupied by character at "*p".
696 * A TAB is counted as two cells: "^I" or four: "<09>".
697 */
698 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100699ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000700{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000701 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
702 if (enc_utf8 && *p >= 0x80)
703 return utf_ptr2cells(p);
704 /* For DBCS we can tell the cell count from the first byte. */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100705 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000706}
707
708/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100709 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000710 * counting TABs as two characters: "^I".
711 */
712 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100713vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000714{
715 return vim_strnsize(s, (int)MAXCOL);
716}
717
718/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100719 * Return the number of character cells string "s[len]" will take on the
720 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000721 */
722 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100723vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724{
725 int size = 0;
726
727 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000728 if (has_mbyte)
729 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000730 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731
732 size += ptr2cells(s);
733 s += l;
734 len -= l - 1;
735 }
736 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000737 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100738
Bram Moolenaar071d4272004-06-13 20:20:40 +0000739 return size;
740}
741
742/*
743 * Return the number of characters 'c' will take on the screen, taking
744 * into account the size of a tab.
745 * Use a define to make it fast, this is used very often!!!
746 * Also see getvcol() below.
747 */
748
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200749#ifdef FEAT_VARTABS
750# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
751 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
752 { \
753 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
754 } \
755 else \
756 return ptr2cells(p);
757#else
758# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000759 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
760 { \
761 int ts; \
762 ts = (buf)->b_p_ts; \
763 return (int)(ts - (col % ts)); \
764 } \
765 else \
766 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200767#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000768
Bram Moolenaar071d4272004-06-13 20:20:40 +0000769 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100770chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000771{
772 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
773}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774
775#ifdef FEAT_LINEBREAK
776 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100777win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000778{
779 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
780}
781#endif
782
783/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200784 * Return the number of characters the string 's' will take on the screen,
785 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000786 */
787 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100788linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789{
Bram Moolenaardc536092010-07-18 15:45:49 +0200790 return linetabsize_col(0, s);
791}
792
793/*
794 * Like linetabsize(), but starting at column "startcol".
795 */
796 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100797linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200798{
799 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200800 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000801
802 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200803 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000804 return (int)col;
805}
806
807/*
808 * Like linetabsize(), but for a given window instead of the current one.
809 */
810 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100811win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000812{
813 colnr_T col = 0;
814 char_u *s;
815
Bram Moolenaar597a4222014-06-25 14:39:50 +0200816 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100817 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200818 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000819 return (int)col;
820}
821
822/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000823 * Return TRUE if 'c' is a normal identifier character:
824 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 */
826 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100827vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100829 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830}
831
832/*
833 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100834 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000835 * For multi-byte characters mb_get_class() is used (builtin rules).
836 */
837 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100838vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000839{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100840 return vim_iswordc_buf(c, curbuf);
841}
842
843 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100844vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100845{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 if (c >= 0x100)
847 {
848 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000849 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000850 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100851 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100852 return FALSE;
853 }
854 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855}
856
857/*
858 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
859 */
860 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100861vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000862{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100863 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000864}
865
Bram Moolenaar071d4272004-06-13 20:20:40 +0000866 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100867vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000868{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100869 int c = *p;
870
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100871 if (has_mbyte && MB_BYTE2LEN(c) > 1)
872 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100873 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000874}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875
876/*
877 * return TRUE if 'c' is a valid file-name character
878 * Assume characters above 0x100 are valid (multi-byte).
879 */
880 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100881vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000882{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100883 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000884}
885
886/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000887 * return TRUE if 'c' is a valid file-name character or a wildcard character
888 * Assume characters above 0x100 are valid (multi-byte).
889 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
890 * returns false.
891 */
892 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100893vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000894{
895 char_u buf[2];
896
897 buf[0] = (char_u)c;
898 buf[1] = NUL;
899 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
900}
901
902/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200903 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000904 * Assume characters above 0x100 are printable (multi-byte), except for
905 * Unicode.
906 */
907 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100908vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000909{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000910 if (enc_utf8 && c >= 0x100)
911 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100912 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000913}
914
915/*
916 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
917 * byte of a double-byte character.
918 */
919 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100920vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000921{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000922 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
923 return FALSE;
924 if (enc_utf8 && c >= 0x100)
925 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100926 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000927}
928
929/*
930 * like chartabsize(), but also check for line breaks on the screen
931 */
932 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100933lbr_chartabsize(
934 char_u *line UNUSED, /* start of the line */
935 unsigned char *s,
936 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000937{
938#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +0100939 if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL
940 && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000941 {
942#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000943 if (curwin->w_p_wrap)
944 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000945 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
946#ifdef FEAT_LINEBREAK
947 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200948 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000949#endif
950}
951
952/*
953 * Call lbr_chartabsize() and advance the pointer.
954 */
955 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100956lbr_chartabsize_adv(
957 char_u *line, /* start of the line */
958 char_u **s,
959 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000960{
961 int retval;
962
Bram Moolenaar597a4222014-06-25 14:39:50 +0200963 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100964 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000965 return retval;
966}
967
968/*
969 * This function is used very often, keep it fast!!!!
970 *
971 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
972 * string at start of line. Warning: *headp is only set if it's a non-zero
973 * value, init to 0 before calling.
974 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000975 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100976win_lbr_chartabsize(
977 win_T *wp,
978 char_u *line UNUSED, /* start of the line */
979 char_u *s,
980 colnr_T col,
981 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000982{
983#ifdef FEAT_LINEBREAK
984 int c;
985 int size;
986 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +0200987 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000988 colnr_T colmax;
989 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000991 int numberextra;
992 char_u *ps;
993 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000994 int n;
Bram Moolenaaree857022019-11-09 23:26:40 +0100995 char_u *sbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000996
997 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +0200998 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000999 */
Bram Moolenaaree857022019-11-09 23:26:40 +01001000 if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001#endif
1002 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001003 if (wp->w_p_wrap)
1004 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001005 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1006 }
1007
1008#ifdef FEAT_LINEBREAK
1009 /*
1010 * First get normal size, without 'linebreak'
1011 */
1012 size = win_chartabsize(wp, s, col);
1013 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001014 if (tab_corr)
1015 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001016
1017 /*
1018 * If 'linebreak' set check at a blank before a non-blank if the line
1019 * needs a break here
1020 */
1021 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001022 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001023 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001024 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001025 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001026 {
1027 /*
1028 * Count all characters from first non-blank after a blank up to next
1029 * non-blank after a blank.
1030 */
1031 numberextra = win_col_off(wp);
1032 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001033 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001034 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001035 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001036 colmax += col_adj;
1037 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001038 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001039 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001040 }
1041
Bram Moolenaar071d4272004-06-13 20:20:40 +00001042 for (;;)
1043 {
1044 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001045 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001046 c = *s;
1047 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001048 && (VIM_ISBREAK(c)
1049 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001050 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001051 break;
1052
1053 col2 += win_chartabsize(wp, s, col2);
1054 if (col2 >= colmax) /* doesn't fit */
1055 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001056 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057 break;
1058 }
1059 }
1060 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001061 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1062 && wp->w_p_wrap && in_win_border(wp, col))
1063 {
1064 ++size; /* Count the ">" in the last column. */
1065 mb_added = 1;
1066 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001067
1068 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001069 * May have to add something for 'breakindent' and/or 'showbreak'
1070 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001071 * Set *headp to the size of what we add.
1072 */
1073 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001074 sbr = get_showbreak_value(wp);
1075 if ((*sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001077 colnr_T sbrlen = 0;
1078 int numberwidth = win_col_off(wp);
1079
1080 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001082 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001083 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001084 col -= wp->w_width;
1085 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001086 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001087 col %= numberextra;
Bram Moolenaaree857022019-11-09 23:26:40 +01001088 if (*sbr != NUL)
Bram Moolenaar1c852102014-10-15 21:26:40 +02001089 {
Bram Moolenaaree857022019-11-09 23:26:40 +01001090 sbrlen = (colnr_T)MB_CHARLEN(sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001091 if (col >= sbrlen)
1092 col -= sbrlen;
1093 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001094 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001095 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001096 else if (col > 0 && numberextra > 0)
1097 col += numberwidth - win_col_off2(wp);
1098
1099 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001101 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001103 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001104 if (*sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001105 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001106 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001107 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001108 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001109 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001110 int prev_width = col
1111 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001112
1113 if (width <= 0)
1114 width = (colnr_T)1;
Bram Moolenaaree857022019-11-09 23:26:40 +01001115 added += ((size - prev_width) / width) * vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001116 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001117 // wrapped, add another length of 'sbr'
Bram Moolenaaree857022019-11-09 23:26:40 +01001118 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001119 }
1120 else
Bram Moolenaaree857022019-11-09 23:26:40 +01001121 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001122 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001123 if (wp->w_p_bri)
1124 added += get_breakindent_win(wp, line);
1125
Bram Moolenaar95765082014-08-24 21:19:25 +02001126 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001127 if (col != 0)
1128 added = 0;
1129 }
1130 }
1131 if (headp != NULL)
1132 *headp = added + mb_added;
1133 return size;
1134#endif
1135}
1136
Bram Moolenaar071d4272004-06-13 20:20:40 +00001137/*
1138 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1139 * 'wrap' is on. This means we need to check for a double-byte character that
1140 * doesn't fit at the end of the screen line.
1141 */
1142 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001143win_nolbr_chartabsize(
1144 win_T *wp,
1145 char_u *s,
1146 colnr_T col,
1147 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001148{
1149 int n;
1150
1151 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1152 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001153# ifdef FEAT_VARTABS
1154 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1155 wp->w_buffer->b_p_vts_array);
1156# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001157 n = wp->w_buffer->b_p_ts;
1158 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001159# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001160 }
1161 n = ptr2cells(s);
1162 /* Add one cell for a double-width character in the last column of the
1163 * window, displayed with a ">". */
1164 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1165 {
1166 if (headp != NULL)
1167 *headp = 1;
1168 return 3;
1169 }
1170 return n;
1171}
1172
1173/*
1174 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1175 * "wp".
1176 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001177 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001178in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001179{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001180 int width1; /* width of first line (after line number) */
1181 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001182
Bram Moolenaar071d4272004-06-13 20:20:40 +00001183 if (wp->w_width == 0) /* there is no border */
1184 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001185 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001186 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001187 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001188 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001189 return TRUE;
1190 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001191 if (width2 <= 0)
1192 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001193 return ((vcol - width1) % width2 == width2 - 1);
1194}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001195
1196/*
1197 * Get virtual column number of pos.
1198 * start: on the first position of this character (TAB, ctrl)
1199 * cursor: where the cursor is on this character (first char, except for TAB)
1200 * end: on the last position of this character (TAB, ctrl)
1201 *
1202 * This is used very often, keep it fast!
1203 */
1204 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001205getvcol(
1206 win_T *wp,
1207 pos_T *pos,
1208 colnr_T *start,
1209 colnr_T *cursor,
1210 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001211{
1212 colnr_T vcol;
1213 char_u *ptr; /* points to current char */
1214 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001215 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001216 int incr;
1217 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001218#ifdef FEAT_VARTABS
1219 int *vts = wp->w_buffer->b_p_vts_array;
1220#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001221 int ts = wp->w_buffer->b_p_ts;
1222 int c;
1223
1224 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001225 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001226 if (pos->col == MAXCOL)
1227 posptr = NULL; /* continue until the NUL */
1228 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001229 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001230 /* Special check for an empty line, which can happen on exit, when
1231 * ml_get_buf() always returns an empty string. */
1232 if (*ptr == NUL)
1233 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001234 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001235 if (has_mbyte)
1236 /* always start on the first byte */
1237 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001238 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001239
1240 /*
1241 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001242 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1243 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001244 * Also use this when 'list' is set but tabs take their normal size.
1245 */
1246 if ((!wp->w_p_list || lcs_tab1 != NUL)
1247#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +01001248 && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001249#endif
1250 )
1251 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001252 for (;;)
1253 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001254 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001255 c = *ptr;
1256 /* make sure we don't go past the end of the line */
1257 if (c == NUL)
1258 {
1259 incr = 1; /* NUL at end of line only takes one column */
1260 break;
1261 }
1262 /* A tab gets expanded, depending on the current column */
1263 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001264#ifdef FEAT_VARTABS
1265 incr = tabstop_padding(vcol, ts, vts);
1266#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001268#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 else
1270 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 if (has_mbyte)
1272 {
1273 /* For utf-8, if the byte is >= 0x80, need to look at
1274 * further bytes to find the cell width. */
1275 if (enc_utf8 && c >= 0x80)
1276 incr = utf_ptr2cells(ptr);
1277 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001278 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001279
1280 /* If a double-cell char doesn't fit at the end of a line
1281 * it wraps to the next line, it's like this char is three
1282 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001283 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1284 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001285 {
1286 ++incr;
1287 head = 1;
1288 }
1289 }
1290 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001291 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001292 }
1293
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001294 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001295 break;
1296
1297 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001298 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001299 }
1300 }
1301 else
1302 {
1303 for (;;)
1304 {
1305 /* A tab gets expanded, depending on the current column */
1306 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001307 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001308 /* make sure we don't go past the end of the line */
1309 if (*ptr == NUL)
1310 {
1311 incr = 1; /* NUL at end of line only takes one column */
1312 break;
1313 }
1314
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001315 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001316 break;
1317
1318 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001319 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001320 }
1321 }
1322 if (start != NULL)
1323 *start = vcol + head;
1324 if (end != NULL)
1325 *end = vcol + incr - 1;
1326 if (cursor != NULL)
1327 {
1328 if (*ptr == TAB
1329 && (State & NORMAL)
1330 && !wp->w_p_list
1331 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001332 && !(VIsual_active
1333 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 )
1335 *cursor = vcol + incr - 1; /* cursor at end */
1336 else
1337 *cursor = vcol + head; /* cursor at start */
1338 }
1339}
1340
1341/*
1342 * Get virtual cursor column in the current window, pretending 'list' is off.
1343 */
1344 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001345getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001346{
1347 int list_save = curwin->w_p_list;
1348 colnr_T vcol;
1349
1350 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001351 if (posp->coladd)
1352 getvvcol(curwin, posp, NULL, &vcol, NULL);
1353 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001354 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001355 curwin->w_p_list = list_save;
1356 return vcol;
1357}
1358
Bram Moolenaar071d4272004-06-13 20:20:40 +00001359/*
1360 * Get virtual column in virtual mode.
1361 */
1362 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001363getvvcol(
1364 win_T *wp,
1365 pos_T *pos,
1366 colnr_T *start,
1367 colnr_T *cursor,
1368 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001369{
1370 colnr_T col;
1371 colnr_T coladd;
1372 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001373 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374
1375 if (virtual_active())
1376 {
1377 /* For virtual mode, only want one value */
1378 getvcol(wp, pos, &col, NULL, NULL);
1379
1380 coladd = pos->coladd;
1381 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001382 /* Cannot put the cursor on part of a wide character. */
1383 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001384 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001385 {
1386 int c = (*mb_ptr2char)(ptr + pos->col);
1387
1388 if (c != TAB && vim_isprintc(c))
1389 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001390 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001391 if (coladd > endadd) /* past end of line */
1392 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001393 else
1394 coladd = 0;
1395 }
1396 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001397 col += coladd;
1398 if (start != NULL)
1399 *start = col;
1400 if (cursor != NULL)
1401 *cursor = col;
1402 if (end != NULL)
1403 *end = col + endadd;
1404 }
1405 else
1406 getvcol(wp, pos, start, cursor, end);
1407}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001408
Bram Moolenaar071d4272004-06-13 20:20:40 +00001409/*
1410 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1411 * Used for Visual block mode.
1412 */
1413 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001414getvcols(
1415 win_T *wp,
1416 pos_T *pos1,
1417 pos_T *pos2,
1418 colnr_T *left,
1419 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001420{
1421 colnr_T from1, from2, to1, to2;
1422
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001423 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001424 {
1425 getvvcol(wp, pos1, &from1, NULL, &to1);
1426 getvvcol(wp, pos2, &from2, NULL, &to2);
1427 }
1428 else
1429 {
1430 getvvcol(wp, pos2, &from1, NULL, &to1);
1431 getvvcol(wp, pos1, &from2, NULL, &to2);
1432 }
1433 if (from2 < from1)
1434 *left = from2;
1435 else
1436 *left = from1;
1437 if (to2 > to1)
1438 {
1439 if (*p_sel == 'e' && from2 - 1 >= to1)
1440 *right = from2 - 1;
1441 else
1442 *right = to2;
1443 }
1444 else
1445 *right = to1;
1446}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001447
1448/*
1449 * skipwhite: skip over ' ' and '\t'.
1450 */
1451 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001452skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001453{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001454 char_u *p = q;
1455
Bram Moolenaar1c465442017-03-12 20:10:05 +01001456 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001457 ++p;
1458 return p;
1459}
1460
1461/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001462 * getwhitecols: return the number of whitespace
1463 * columns (bytes) at the start of a given line
1464 */
1465 int
1466getwhitecols_curline()
1467{
1468 return getwhitecols(ml_get_curline());
1469}
1470
1471 int
1472getwhitecols(char_u *p)
1473{
1474 return skipwhite(p) - p;
1475}
1476
1477/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001478 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001479 */
1480 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001481skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001482{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001483 char_u *p = q;
1484
Bram Moolenaar071d4272004-06-13 20:20:40 +00001485 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1486 ++p;
1487 return p;
1488}
1489
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001490#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001491/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001492 * skip over binary digits
1493 */
1494 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001495skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001496{
1497 char_u *p = q;
1498
1499 while (vim_isbdigit(*p)) /* skip to next non-digit */
1500 ++p;
1501 return p;
1502}
1503
1504/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001505 * skip over digits and hex characters
1506 */
1507 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001508skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001509{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001510 char_u *p = q;
1511
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001512 while (vim_isxdigit(*p)) /* skip to next non-digit */
1513 ++p;
1514 return p;
1515}
1516#endif
1517
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001518/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001519 * skip to bin digit (or NUL after the string)
1520 */
1521 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001522skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001523{
1524 char_u *p = q;
1525
1526 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1527 ++p;
1528 return p;
1529}
1530
1531/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001532 * skip to digit (or NUL after the string)
1533 */
1534 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001535skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001536{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001537 char_u *p = q;
1538
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001539 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1540 ++p;
1541 return p;
1542}
1543
1544/*
1545 * skip to hex character (or NUL after the string)
1546 */
1547 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001548skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001549{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001550 char_u *p = q;
1551
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001552 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1553 ++p;
1554 return p;
1555}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001556
Bram Moolenaar071d4272004-06-13 20:20:40 +00001557/*
1558 * Variant of isdigit() that can handle characters > 0x100.
1559 * We don't use isdigit() here, because on some systems it also considers
1560 * superscript 1 to be a digit.
1561 * Use the VIM_ISDIGIT() macro for simple arguments.
1562 */
1563 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001564vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001565{
1566 return (c >= '0' && c <= '9');
1567}
1568
1569/*
1570 * Variant of isxdigit() that can handle characters > 0x100.
1571 * We don't use isxdigit() here, because on some systems it also considers
1572 * superscript 1 to be a digit.
1573 */
1574 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001575vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001576{
1577 return (c >= '0' && c <= '9')
1578 || (c >= 'a' && c <= 'f')
1579 || (c >= 'A' && c <= 'F');
1580}
1581
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001582/*
1583 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1584 * characters > 0x100.
1585 */
1586 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001587vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001588{
1589 return (c == '0' || c == '1');
1590}
1591
Bram Moolenaar78622822005-08-23 21:00:13 +00001592/*
1593 * Vim's own character class functions. These exist because many library
1594 * islower()/toupper() etc. do not work properly: they crash when used with
1595 * invalid values or can't handle latin1 when the locale is C.
1596 * Speed is most important here.
1597 */
1598#define LATIN1LOWER 'l'
1599#define LATIN1UPPER 'U'
1600
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001601static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001602static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1603static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001604
1605 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001606vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001607{
1608 if (c <= '@')
1609 return FALSE;
1610 if (c >= 0x80)
1611 {
1612 if (enc_utf8)
1613 return utf_islower(c);
1614 if (c >= 0x100)
1615 {
1616#ifdef HAVE_ISWLOWER
1617 if (has_mbyte)
1618 return iswlower(c);
1619#endif
1620 /* islower() can't handle these chars and may crash */
1621 return FALSE;
1622 }
1623 if (enc_latin1like)
1624 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1625 }
1626 return islower(c);
1627}
1628
1629 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001630vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001631{
1632 if (c <= '@')
1633 return FALSE;
1634 if (c >= 0x80)
1635 {
1636 if (enc_utf8)
1637 return utf_isupper(c);
1638 if (c >= 0x100)
1639 {
1640#ifdef HAVE_ISWUPPER
1641 if (has_mbyte)
1642 return iswupper(c);
1643#endif
1644 /* islower() can't handle these chars and may crash */
1645 return FALSE;
1646 }
1647 if (enc_latin1like)
1648 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1649 }
1650 return isupper(c);
1651}
1652
1653 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001654vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001655{
1656 if (c <= '@')
1657 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001658 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001659 {
1660 if (enc_utf8)
1661 return utf_toupper(c);
1662 if (c >= 0x100)
1663 {
1664#ifdef HAVE_TOWUPPER
1665 if (has_mbyte)
1666 return towupper(c);
1667#endif
1668 /* toupper() can't handle these chars and may crash */
1669 return c;
1670 }
1671 if (enc_latin1like)
1672 return latin1upper[c];
1673 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001674 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1675 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001676 return TOUPPER_LOC(c);
1677}
1678
1679 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001680vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001681{
1682 if (c <= '@')
1683 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001684 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001685 {
1686 if (enc_utf8)
1687 return utf_tolower(c);
1688 if (c >= 0x100)
1689 {
1690#ifdef HAVE_TOWLOWER
1691 if (has_mbyte)
1692 return towlower(c);
1693#endif
1694 /* tolower() can't handle these chars and may crash */
1695 return c;
1696 }
1697 if (enc_latin1like)
1698 return latin1lower[c];
1699 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001700 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1701 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001702 return TOLOWER_LOC(c);
1703}
Bram Moolenaar78622822005-08-23 21:00:13 +00001704
Bram Moolenaar071d4272004-06-13 20:20:40 +00001705/*
1706 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1707 */
1708 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001709skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001710{
1711 while (*p != ' ' && *p != '\t' && *p != NUL)
1712 ++p;
1713 return p;
1714}
1715
Bram Moolenaar071d4272004-06-13 20:20:40 +00001716/*
1717 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1718 */
1719 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001720skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001721{
1722 while (*p != ' ' && *p != '\t' && *p != NUL)
1723 {
1724 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1725 ++p;
1726 ++p;
1727 }
1728 return p;
1729}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001730
1731/*
1732 * Getdigits: Get a number from a string and skip over it.
1733 * Note: the argument is a pointer to a char_u pointer!
1734 */
1735 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001736getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001737{
1738 char_u *p;
1739 long retval;
1740
1741 p = *pp;
1742 retval = atol((char *)p);
1743 if (*p == '-') /* skip negative sign */
1744 ++p;
1745 p = skipdigits(p); /* skip to next non-digit */
1746 *pp = p;
1747 return retval;
1748}
1749
1750/*
1751 * Return TRUE if "lbuf" is empty or only contains blanks.
1752 */
1753 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001754vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001755{
1756 char_u *p;
1757
1758 p = skipwhite(lbuf);
1759 return (*p == NUL || *p == '\r' || *p == '\n');
1760}
1761
1762/*
1763 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001764 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1765 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001766 * 0 decimal
1767 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001768 * 'B' bin
1769 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001770 * 'X' hex
1771 * 'x' hex
1772 * If "len" is not NULL, the length of the number in characters is returned.
1773 * If "nptr" is not NULL, the signed result is returned in it.
1774 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001775 * If "what" contains STR2NR_BIN recognize binary numbers
1776 * If "what" contains STR2NR_OCT recognize octal numbers
1777 * If "what" contains STR2NR_HEX recognize hex numbers
1778 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001779 * If "what" contains STR2NR_QUOTE ignore embedded single quotes
Bram Moolenaarce157752017-10-28 16:07:33 +02001780 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001781 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001782 */
1783 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001784vim_str2nr(
1785 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001786 int *prep, // return: type of number 0 = decimal, 'x'
1787 // or 'X' is hex, '0' = octal, 'b' or 'B'
1788 // is bin
1789 int *len, // return: detected length of number
1790 int what, // what numbers to recognize
1791 varnumber_T *nptr, // return: signed result
1792 uvarnumber_T *unptr, // return: unsigned result
1793 int maxlen, // max length of string to check
1794 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001795{
1796 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001797 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001798 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001799 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001800 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001801
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001802 if (len != NULL)
1803 *len = 0;
1804
Bram Moolenaar071d4272004-06-13 20:20:40 +00001805 if (ptr[0] == '-')
1806 {
1807 negative = TRUE;
1808 ++ptr;
1809 }
1810
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001811 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001812 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1813 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001814 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001815 pre = ptr[1];
1816 if ((what & STR2NR_HEX)
1817 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1818 && (maxlen == 0 || maxlen > 2))
1819 /* hexadecimal */
1820 ptr += 2;
1821 else if ((what & STR2NR_BIN)
1822 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1823 && (maxlen == 0 || maxlen > 2))
1824 /* binary */
1825 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001826 else
1827 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001828 /* decimal or octal, default is decimal */
1829 pre = 0;
1830 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001831 {
1832 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001833 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001834 {
1835 if (ptr[n] > '7')
1836 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001837 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001838 break;
1839 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001840 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001841 }
1842 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001843 }
1844 }
1845
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001846 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001847 n = 1;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001848 if (pre == 'B' || pre == 'b'
1849 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001850 {
1851 /* bin */
1852 if (pre != 0)
1853 n += 2; /* skip over "0b" */
1854 while ('0' <= *ptr && *ptr <= '1')
1855 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001856 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001857 if (un <= UVARNUM_MAX / 2)
1858 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001859 else
1860 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001861 ++ptr;
1862 if (n++ == maxlen)
1863 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001864 if ((what & STR2NR_QUOTE) && *ptr == '\''
1865 && '0' <= ptr[1] && ptr[1] <= '1')
1866 {
1867 ++ptr;
1868 if (n++ == maxlen)
1869 break;
1870 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001871 }
1872 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001873 else if (pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001874 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001875 /* octal */
1876 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001877 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001878 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001879 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001880 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1881 else
1882 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001883 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001884 if (n++ == maxlen)
1885 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001886 if ((what & STR2NR_QUOTE) && *ptr == '\''
1887 && '0' <= ptr[1] && ptr[1] <= '7')
1888 {
1889 ++ptr;
1890 if (n++ == maxlen)
1891 break;
1892 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001893 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001894 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001895 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001896 {
1897 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001898 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001899 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001900 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001901 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001902 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001903 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001904 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1905 else
1906 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001907 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001908 if (n++ == maxlen)
1909 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001910 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1911 {
1912 ++ptr;
1913 if (n++ == maxlen)
1914 break;
1915 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001916 }
1917 }
1918 else
1919 {
1920 /* decimal */
1921 while (VIM_ISDIGIT(*ptr))
1922 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001923 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1924
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001925 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001926 if (un < UVARNUM_MAX / 10
1927 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1928 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001929 else
1930 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001931 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001932 if (n++ == maxlen)
1933 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001934 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
1935 {
1936 ++ptr;
1937 if (n++ == maxlen)
1938 break;
1939 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001940 }
1941 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001942
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001943 // Check for an alpha-numeric character immediately following, that is
1944 // most likely a typo.
1945 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1946 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001947
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001948 if (prep != NULL)
1949 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001950 if (len != NULL)
1951 *len = (int)(ptr - start);
1952 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001953 {
1954 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001955 {
1956 /* avoid ubsan error for overflow */
1957 if (un > VARNUM_MAX)
1958 *nptr = VARNUM_MIN;
1959 else
1960 *nptr = -(varnumber_T)un;
1961 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001962 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001963 {
1964 if (un > VARNUM_MAX)
1965 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001966 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001967 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001968 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001969 if (unptr != NULL)
1970 *unptr = un;
1971}
1972
1973/*
1974 * Return the value of a single hex character.
1975 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1976 */
1977 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001978hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001979{
1980 if (c >= 'a' && c <= 'f')
1981 return c - 'a' + 10;
1982 if (c >= 'A' && c <= 'F')
1983 return c - 'A' + 10;
1984 return c - '0';
1985}
1986
Bram Moolenaar4033c552017-09-16 20:54:51 +02001987#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001988/*
1989 * Convert two hex characters to a byte.
1990 * Return -1 if one of the characters is not hex.
1991 */
1992 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001993hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001994{
1995 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1996 return -1;
1997 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1998}
1999#endif
2000
2001/*
2002 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002003 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00002004 * backslash is not a normal file name character.
2005 * '$' is a valid file name character, we don't remove the backslash before
2006 * it. This means it is not possible to use an environment variable after a
2007 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2008 * Although "\ name" is valid, the backslash in "Program\ files" must be
2009 * removed. Assume a file name doesn't start with a space.
2010 * For multi-byte names, never remove a backslash before a non-ascii
2011 * character, assume that all multi-byte characters are valid file name
2012 * characters.
2013 */
2014 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002015rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002016{
2017#ifdef BACKSLASH_IN_FILENAME
2018 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002019 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002020 && (str[1] == ' '
2021 || (str[1] != NUL
2022 && str[1] != '*'
2023 && str[1] != '?'
2024 && !vim_isfilec(str[1]))));
2025#else
2026 return (str[0] == '\\' && str[1] != NUL);
2027#endif
2028}
2029
2030/*
2031 * Halve the number of backslashes in a file name argument.
2032 * For MS-DOS we only do this if the character after the backslash
2033 * is not a normal file character.
2034 */
2035 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002036backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002037{
2038 for ( ; *p; ++p)
2039 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002040 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002041}
2042
2043/*
2044 * backslash_halve() plus save the result in allocated memory.
Bram Moolenaare2c453d2019-08-21 14:37:09 +02002045 * However, returns "p" when out of memory.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002046 */
2047 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002048backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002049{
2050 char_u *res;
2051
2052 res = vim_strsave(p);
2053 if (res == NULL)
2054 return p;
2055 backslash_halve(res);
2056 return res;
2057}
2058
2059#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2060/*
2061 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2062 * The first 64 entries have been added to map control characters defined in
2063 * ascii.h
2064 */
2065static char_u ebcdic2ascii_tab[256] =
2066{
2067 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2068 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2069 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2070 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2071 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2072 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2073 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2074 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2075 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2076 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2077 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2078 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2079 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2080 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2081 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2082 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2083 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2084 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2085 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2086 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2087 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2088 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2089 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2090 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2091 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2092 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2093 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2094 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2095 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2096 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2097 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2098 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2099};
2100
2101/*
2102 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2103 * wanting 7-bit ASCII characters out the other end.
2104 */
2105 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002106ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002107{
2108 int i;
2109
2110 for (i = 0; i < len; i++)
2111 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2112}
2113#endif