blob: 12e10e198998d1a01336160841c02d39410d228b [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
13# include <wchar.h> /* for towupper() and towlower() */
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
21/* b_chartab[] is an array of 32 bytes, each bit representing one of the
22 * characters 0-255. */
23#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010027/* table used below, see init_chartab() for an explanation */
28static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
33#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
34#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
35#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
36#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
37
38/*
39 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000040 * characters for current buffer.
41 *
42 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
43 * 'isprint' and 'encoding'.
44 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010045 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000046 * - For non-multi-byte index with the byte (same as the character).
47 * - For DBCS index with the first byte.
48 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
49 * the same as the character, if the first byte is 0x80 and above it depends
50 * on further bytes).
51 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010052 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000053 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
54 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
55 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
56 * translate the character before displaying it). Note that only DBCS
57 * characters can have 2 display cells and still be printable.
58 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
59 * - CT_ID_CHAR bit is set when the character can be in an identifier.
60 *
61 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
62 * error, OK otherwise.
63 */
64 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010065init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000066{
67 return buf_init_chartab(curbuf, TRUE);
68}
69
70 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010071buf_init_chartab(
72 buf_T *buf,
73 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000074{
75 int c;
76 int c2;
77 char_u *p;
78 int i;
79 int tilde;
80 int do_isalpha;
81
82 if (global)
83 {
84 /*
85 * Set the default size for printable characters:
86 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
87 * This also inits all 'isident' and 'isfname' flags to FALSE.
88 *
89 * EBCDIC: all chars below ' ' are not printable, all others are
90 * printable.
91 */
92 c = 0;
93 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010094 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000095#ifdef EBCDIC
96 while (c < 255)
97#else
98 while (c <= '~')
99#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100100 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000101 while (c < 256)
102 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100105 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100108 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100111 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100114 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000115 }
116
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 /* Assume that every multi-byte char is a filename character. */
118 for (c = 1; c < 256; ++c)
119 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
120 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
121 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100122 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000123 }
124
125 /*
126 * Init word char flags all to FALSE
127 */
128 vim_memset(buf->b_chartab, 0, (size_t)32);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000129 if (enc_dbcs != 0)
130 for (c = 0; c < 256; ++c)
131 {
132 /* double-byte characters are probably word characters */
133 if (MB_BYTE2LEN(c) == 2)
134 SET_CHARTAB(buf, c);
135 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000136
137#ifdef FEAT_LISP
138 /*
139 * In lisp mode the '-' character is included in keywords.
140 */
141 if (buf->b_p_lisp)
142 SET_CHARTAB(buf, '-');
143#endif
144
145 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
146 * options Each option is a list of characters, character numbers or
147 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
148 */
149 for (i = global ? 0 : 3; i <= 3; ++i)
150 {
151 if (i == 0)
152 p = p_isi; /* first round: 'isident' */
153 else if (i == 1)
154 p = p_isp; /* second round: 'isprint' */
155 else if (i == 2)
156 p = p_isf; /* third round: 'isfname' */
157 else /* i == 3 */
158 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
159
160 while (*p)
161 {
162 tilde = FALSE;
163 do_isalpha = FALSE;
164 if (*p == '^' && p[1] != NUL)
165 {
166 tilde = TRUE;
167 ++p;
168 }
169 if (VIM_ISDIGIT(*p))
170 c = getdigits(&p);
171 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000172 if (has_mbyte)
173 c = mb_ptr2char_adv(&p);
174 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000175 c = *p++;
176 c2 = -1;
177 if (*p == '-' && p[1] != NUL)
178 {
179 ++p;
180 if (VIM_ISDIGIT(*p))
181 c2 = getdigits(&p);
182 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000183 if (has_mbyte)
184 c2 = mb_ptr2char_adv(&p);
185 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000186 c2 = *p++;
187 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000188 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000189 || !(*p == NUL || *p == ','))
190 return FAIL;
191
192 if (c2 == -1) /* not a range */
193 {
194 /*
195 * A single '@' (not "@-@"):
196 * Decide on letters being ID/printable/keyword chars with
197 * standard function isalpha(). This takes care of locale for
198 * single-byte characters).
199 */
200 if (c == '@')
201 {
202 do_isalpha = TRUE;
203 c = 1;
204 c2 = 255;
205 }
206 else
207 c2 = c;
208 }
209 while (c <= c2)
210 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000211 /* Use the MB_ functions here, because isalpha() doesn't
212 * work properly when 'encoding' is "latin1" and the locale is
213 * "C". */
Bram Moolenaar14184a32019-02-16 15:10:30 +0100214 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000215 {
216 if (i == 0) /* (re)set ID flag */
217 {
218 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100219 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000220 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100221 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222 }
223 else if (i == 1) /* (re)set printable */
224 {
225 if ((c < ' '
226#ifndef EBCDIC
227 || c > '~'
228#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100229 // For double-byte we keep the cell width, so
230 // that we can detect it from the first byte.
231 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000232 {
233 if (tilde)
234 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100235 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000236 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100237 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238 }
239 else
240 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100241 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
242 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000243 }
244 }
245 }
246 else if (i == 2) /* (re)set fname flag */
247 {
248 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100249 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000250 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100251 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000252 }
253 else /* i == 3 */ /* (re)set keyword flag */
254 {
255 if (tilde)
256 RESET_CHARTAB(buf, c);
257 else
258 SET_CHARTAB(buf, c);
259 }
260 }
261 ++c;
262 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100263
264 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000265 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100266 if (c == ',' && *p == NUL)
267 /* Trailing comma is not allowed. */
268 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000269 }
270 }
271 chartab_initialized = TRUE;
272 return OK;
273}
274
275/*
276 * Translate any special characters in buf[bufsize] in-place.
277 * The result is a string with only printable characters, but if there is not
278 * enough room, not all characters will be translated.
279 */
280 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100281trans_characters(
282 char_u *buf,
283 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000284{
285 int len; /* length of string needing translation */
286 int room; /* room in buffer after string */
287 char_u *trs; /* translated character */
288 int trs_len; /* length of trs[] */
289
290 len = (int)STRLEN(buf);
291 room = bufsize - len;
292 while (*buf != 0)
293 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000294 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000295 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296 len -= trs_len;
297 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000298 {
299 trs = transchar_byte(*buf);
300 trs_len = (int)STRLEN(trs);
301 if (trs_len > 1)
302 {
303 room -= trs_len - 1;
304 if (room <= 0)
305 return;
306 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
307 }
308 mch_memmove(buf, trs, (size_t)trs_len);
309 --len;
310 }
311 buf += trs_len;
312 }
313}
314
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000315#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
316 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000317/*
318 * Translate a string into allocated memory, replacing special chars with
319 * printable chars. Returns NULL when out of memory.
320 */
321 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100322transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323{
324 char_u *res;
325 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 int l, len, c;
327 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328
Bram Moolenaar071d4272004-06-13 20:20:40 +0000329 if (has_mbyte)
330 {
331 /* Compute the length of the result, taking account of unprintable
332 * multi-byte characters. */
333 len = 0;
334 p = s;
335 while (*p != NUL)
336 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000337 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338 {
339 c = (*mb_ptr2char)(p);
340 p += l;
341 if (vim_isprintc(c))
342 len += l;
343 else
344 {
345 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000346 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000347 }
348 }
349 else
350 {
351 l = byte2cells(*p++);
352 if (l > 0)
353 len += l;
354 else
355 len += 4; /* illegal byte sequence */
356 }
357 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200358 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 }
360 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200361 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000362 if (res != NULL)
363 {
364 *res = NUL;
365 p = s;
366 while (*p != NUL)
367 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000368 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000369 {
370 c = (*mb_ptr2char)(p);
371 if (vim_isprintc(c))
372 STRNCAT(res, p, l); /* append printable multi-byte char */
373 else
374 transchar_hex(res + STRLEN(res), c);
375 p += l;
376 }
377 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000378 STRCAT(res, transchar_byte(*p++));
379 }
380 }
381 return res;
382}
383#endif
384
385#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
386/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000387 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
388 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000389 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
390 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000391 */
392 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100393str_foldcase(
394 char_u *str,
395 int orglen,
396 char_u *buf,
397 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000398{
399 garray_T ga;
400 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000401 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402
403#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
404#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000405#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
406#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000407
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000408 /* Copy "str" into "buf" or allocated memory, unmodified. */
409 if (buf == NULL)
410 {
411 ga_init2(&ga, 1, 10);
412 if (ga_grow(&ga, len + 1) == FAIL)
413 return NULL;
414 mch_memmove(ga.ga_data, str, (size_t)len);
415 ga.ga_len = len;
416 }
417 else
418 {
419 if (len >= buflen) /* Ugly! */
420 len = buflen - 1;
421 mch_memmove(buf, str, (size_t)len);
422 }
423 if (buf == NULL)
424 GA_CHAR(len) = NUL;
425 else
426 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000427
428 /* Make each character lower case. */
429 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000430 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000431 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000432 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000433 {
434 if (enc_utf8)
435 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000436 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100437 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000438 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000439
Bram Moolenaarb9839212008-06-28 11:03:50 +0000440 /* Only replace the character when it is not an invalid
441 * sequence (ASCII character or more than one byte) and
442 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100443 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000444 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100445 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446
447 /* If the byte length changes need to shift the following
448 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100449 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000450 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100451 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000452 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100453 if (buf == NULL
454 ? ga_grow(&ga, nlen - olen + 1) == FAIL
455 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000456 {
457 /* out of memory, keep old char */
458 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100459 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000460 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000461 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100462 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000463 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000464 if (buf == NULL)
465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
467 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000468 }
469 else
470 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100471 STRMOVE(buf + i + nlen, buf + i + olen);
472 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000473 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000474 }
475 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000476 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000477 }
478 }
479 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000480 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000481 }
482 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000483 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000484 if (buf == NULL)
485 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
486 else
487 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000488 ++i;
489 }
490 }
491
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000492 if (buf == NULL)
493 return (char_u *)ga.ga_data;
494 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000495}
496#endif
497
498/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100499 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100501 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000502 * Does NOT work for multi-byte characters, c must be <= 255.
503 * Also doesn't work for the first byte of a multi-byte, "c" must be a
504 * character!
505 */
506static char_u transchar_buf[7];
507
508 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100509transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000510{
511 int i;
512
513 i = 0;
514 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
515 {
516 transchar_buf[0] = '~';
517 transchar_buf[1] = '@';
518 i = 2;
519 c = K_SECOND(c);
520 }
521
522 if ((!chartab_initialized && (
523#ifdef EBCDIC
524 (c >= 64 && c < 255)
525#else
526 (c >= ' ' && c <= '~')
527#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000528 )) || (c < 256 && vim_isprintc_strict(c)))
529 {
530 /* printable character */
531 transchar_buf[i] = c;
532 transchar_buf[i + 1] = NUL;
533 }
534 else
535 transchar_nonprint(transchar_buf + i, c);
536 return transchar_buf;
537}
538
Bram Moolenaar071d4272004-06-13 20:20:40 +0000539/*
540 * Like transchar(), but called with a byte instead of a character. Checks
541 * for an illegal UTF-8 byte.
542 */
543 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100544transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545{
546 if (enc_utf8 && c >= 0x80)
547 {
548 transchar_nonprint(transchar_buf, c);
549 return transchar_buf;
550 }
551 return transchar(c);
552}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000553
554/*
555 * Convert non-printable character to two or more printable characters in
556 * "buf[]". "buf" needs to be able to hold five bytes.
557 * Does NOT work for multi-byte characters, c must be <= 255.
558 */
559 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100560transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000561{
562 if (c == NL)
563 c = NUL; /* we use newline in place of a NUL */
564 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
565 c = NL; /* we use CR in place of NL in this case */
566
567 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
568 transchar_hex(buf, c);
569
570#ifdef EBCDIC
571 /* For EBCDIC only the characters 0-63 and 255 are not printable */
572 else if (CtrlChar(c) != 0 || c == DEL)
573#else
574 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
575#endif
576 {
577 buf[0] = '^';
578#ifdef EBCDIC
579 if (c == DEL)
580 buf[1] = '?'; /* DEL displayed as ^? */
581 else
582 buf[1] = CtrlChar(c);
583#else
584 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
585#endif
586
587 buf[2] = NUL;
588 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000589 else if (enc_utf8 && c >= 0x80)
590 {
591 transchar_hex(buf, c);
592 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000593#ifndef EBCDIC
594 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
595 {
596 buf[0] = '|';
597 buf[1] = c - 0x80;
598 buf[2] = NUL;
599 }
600#else
601 else if (c < 64)
602 {
603 buf[0] = '~';
604 buf[1] = MetaChar(c);
605 buf[2] = NUL;
606 }
607#endif
608 else /* 0x80 - 0x9f and 0xff */
609 {
610 /*
611 * TODO: EBCDIC I don't know what to do with this chars, so I display
612 * them as '~?' for now
613 */
614 buf[0] = '~';
615#ifdef EBCDIC
616 buf[1] = '?'; /* 0xff displayed as ~? */
617#else
618 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
619#endif
620 buf[2] = NUL;
621 }
622}
623
624 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100625transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000626{
627 int i = 0;
628
629 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630 if (c > 255)
631 {
632 buf[++i] = nr2hex((unsigned)c >> 12);
633 buf[++i] = nr2hex((unsigned)c >> 8);
634 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000635 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000636 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = '>';
638 buf[++i] = NUL;
639}
640
641/*
642 * Convert the lower 4 bits of byte "c" to its hex character.
643 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
644 * function key 1.
645 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000646 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100647nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648{
649 if ((c & 0xf) <= 9)
650 return (c & 0xf) + '0';
651 return (c & 0xf) - 10 + 'a';
652}
653
654/*
655 * Return number of display cells occupied by byte "b".
656 * Caller must make sure 0 <= b <= 255.
657 * For multi-byte mode "b" must be the first byte of a character.
658 * A TAB is counted as two cells: "^I".
659 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
660 * cells depends on further bytes.
661 */
662 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100663byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 if (enc_utf8 && b >= 0x80)
666 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100667 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000668}
669
670/*
671 * Return number of display cells occupied by character "c".
672 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
673 * A TAB is counted as two cells: "^I" or four: "<09>".
674 */
675 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100676char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677{
678 if (IS_SPECIAL(c))
679 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680 if (c >= 0x80)
681 {
682 /* UTF-8: above 0x80 need to check the value */
683 if (enc_utf8)
684 return utf_char2cells(c);
685 /* DBCS: double-byte means double-width, except for euc-jp with first
686 * byte 0x8e */
687 if (enc_dbcs != 0 && c >= 0x100)
688 {
689 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
690 return 1;
691 return 2;
692 }
693 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100694 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695}
696
697/*
698 * Return number of display cells occupied by character at "*p".
699 * A TAB is counted as two cells: "^I" or four: "<09>".
700 */
701 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100702ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000704 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
705 if (enc_utf8 && *p >= 0x80)
706 return utf_ptr2cells(p);
707 /* For DBCS we can tell the cell count from the first byte. */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100708 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000709}
710
711/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100712 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713 * counting TABs as two characters: "^I".
714 */
715 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100716vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717{
718 return vim_strnsize(s, (int)MAXCOL);
719}
720
721/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100722 * Return the number of character cells string "s[len]" will take on the
723 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724 */
725 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100726vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727{
728 int size = 0;
729
730 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 if (has_mbyte)
732 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000733 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734
735 size += ptr2cells(s);
736 s += l;
737 len -= l - 1;
738 }
739 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000740 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100741
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 return size;
743}
744
745/*
746 * Return the number of characters 'c' will take on the screen, taking
747 * into account the size of a tab.
748 * Use a define to make it fast, this is used very often!!!
749 * Also see getvcol() below.
750 */
751
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200752#ifdef FEAT_VARTABS
753# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
754 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
755 { \
756 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
757 } \
758 else \
759 return ptr2cells(p);
760#else
761# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000762 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
763 { \
764 int ts; \
765 ts = (buf)->b_p_ts; \
766 return (int)(ts - (col % ts)); \
767 } \
768 else \
769 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000771
Bram Moolenaar071d4272004-06-13 20:20:40 +0000772 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100773chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774{
775 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
776}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777
778#ifdef FEAT_LINEBREAK
779 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100780win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000781{
782 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
783}
784#endif
785
786/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200787 * Return the number of characters the string 's' will take on the screen,
788 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789 */
790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
Bram Moolenaardc536092010-07-18 15:45:49 +0200793 return linetabsize_col(0, s);
794}
795
796/*
797 * Like linetabsize(), but starting at column "startcol".
798 */
799 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100800linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200801{
802 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200803 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000804
805 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200806 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 return (int)col;
808}
809
810/*
811 * Like linetabsize(), but for a given window instead of the current one.
812 */
813 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100814win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815{
816 colnr_T col = 0;
817 char_u *s;
818
Bram Moolenaar597a4222014-06-25 14:39:50 +0200819 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100820 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822 return (int)col;
823}
824
825/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000826 * Return TRUE if 'c' is a normal identifier character:
827 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828 */
829 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100830vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100832 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833}
834
835/*
836 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100837 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000838 * For multi-byte characters mb_get_class() is used (builtin rules).
839 */
840 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100841vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000842{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100843 return vim_iswordc_buf(c, curbuf);
844}
845
846 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100847vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100848{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000849 if (c >= 0x100)
850 {
851 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000852 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000853 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100854 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100855 return FALSE;
856 }
857 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000858}
859
860/*
861 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
862 */
863 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100864vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000865{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100866 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000867}
868
Bram Moolenaar071d4272004-06-13 20:20:40 +0000869 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100870vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000871{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100872 int c = *p;
873
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100874 if (has_mbyte && MB_BYTE2LEN(c) > 1)
875 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100876 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000878
879/*
880 * return TRUE if 'c' is a valid file-name character
881 * Assume characters above 0x100 are valid (multi-byte).
882 */
883 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100884vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100886 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887}
888
889/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000890 * return TRUE if 'c' is a valid file-name character or a wildcard character
891 * Assume characters above 0x100 are valid (multi-byte).
892 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
893 * returns false.
894 */
895 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100896vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000897{
898 char_u buf[2];
899
900 buf[0] = (char_u)c;
901 buf[1] = NUL;
902 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
903}
904
905/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200906 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000907 * Assume characters above 0x100 are printable (multi-byte), except for
908 * Unicode.
909 */
910 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100911vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000912{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000913 if (enc_utf8 && c >= 0x100)
914 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100915 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000916}
917
918/*
919 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
920 * byte of a double-byte character.
921 */
922 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100923vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000924{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000925 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
926 return FALSE;
927 if (enc_utf8 && c >= 0x100)
928 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100929 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000930}
931
932/*
933 * like chartabsize(), but also check for line breaks on the screen
934 */
935 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100936lbr_chartabsize(
937 char_u *line UNUSED, /* start of the line */
938 unsigned char *s,
939 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000940{
941#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +0200942 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000943 {
944#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000945 if (curwin->w_p_wrap)
946 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000947 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
948#ifdef FEAT_LINEBREAK
949 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200950 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000951#endif
952}
953
954/*
955 * Call lbr_chartabsize() and advance the pointer.
956 */
957 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100958lbr_chartabsize_adv(
959 char_u *line, /* start of the line */
960 char_u **s,
961 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000962{
963 int retval;
964
Bram Moolenaar597a4222014-06-25 14:39:50 +0200965 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100966 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000967 return retval;
968}
969
970/*
971 * This function is used very often, keep it fast!!!!
972 *
973 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
974 * string at start of line. Warning: *headp is only set if it's a non-zero
975 * value, init to 0 before calling.
976 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000977 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100978win_lbr_chartabsize(
979 win_T *wp,
980 char_u *line UNUSED, /* start of the line */
981 char_u *s,
982 colnr_T col,
983 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000984{
985#ifdef FEAT_LINEBREAK
986 int c;
987 int size;
988 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +0200989 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 colnr_T colmax;
991 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000992 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000993 int numberextra;
994 char_u *ps;
995 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000996 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997
998 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +0200999 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001000 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001001 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001002#endif
1003 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004 if (wp->w_p_wrap)
1005 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001006 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1007 }
1008
1009#ifdef FEAT_LINEBREAK
1010 /*
1011 * First get normal size, without 'linebreak'
1012 */
1013 size = win_chartabsize(wp, s, col);
1014 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001015 if (tab_corr)
1016 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001017
1018 /*
1019 * If 'linebreak' set check at a blank before a non-blank if the line
1020 * needs a break here
1021 */
1022 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001023 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001024 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001025 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001026 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001027 {
1028 /*
1029 * Count all characters from first non-blank after a blank up to next
1030 * non-blank after a blank.
1031 */
1032 numberextra = win_col_off(wp);
1033 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001034 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001035 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001036 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001037 colmax += col_adj;
1038 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001039 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001040 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001041 }
1042
Bram Moolenaar071d4272004-06-13 20:20:40 +00001043 for (;;)
1044 {
1045 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001046 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001047 c = *s;
1048 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001049 && (VIM_ISBREAK(c)
1050 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001051 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001052 break;
1053
1054 col2 += win_chartabsize(wp, s, col2);
1055 if (col2 >= colmax) /* doesn't fit */
1056 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001057 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001058 break;
1059 }
1060 }
1061 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001062 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1063 && wp->w_p_wrap && in_win_border(wp, col))
1064 {
1065 ++size; /* Count the ">" in the last column. */
1066 mb_added = 1;
1067 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001068
1069 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001070 * May have to add something for 'breakindent' and/or 'showbreak'
1071 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001072 * Set *headp to the size of what we add.
1073 */
1074 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001075 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001077 colnr_T sbrlen = 0;
1078 int numberwidth = win_col_off(wp);
1079
1080 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001082 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001083 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001084 col -= wp->w_width;
1085 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001086 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001087 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001088 if (*p_sbr != NUL)
1089 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001090 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001091 if (col >= sbrlen)
1092 col -= sbrlen;
1093 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001094 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001095 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001096 else if (col > 0 && numberextra > 0)
1097 col += numberwidth - win_col_off2(wp);
1098
1099 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001101 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001103 added = 0;
1104 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001105 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001106 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001107 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001108 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001109 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001110 int prev_width = col
1111 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001112
1113 if (width <= 0)
1114 width = (colnr_T)1;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001115 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1116 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001117 // wrapped, add another length of 'sbr'
Bram Moolenaard574ea22015-01-14 19:35:14 +01001118 added += vim_strsize(p_sbr);
1119 }
1120 else
1121 added += vim_strsize(p_sbr);
1122 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001123 if (wp->w_p_bri)
1124 added += get_breakindent_win(wp, line);
1125
Bram Moolenaar95765082014-08-24 21:19:25 +02001126 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001127 if (col != 0)
1128 added = 0;
1129 }
1130 }
1131 if (headp != NULL)
1132 *headp = added + mb_added;
1133 return size;
1134#endif
1135}
1136
Bram Moolenaar071d4272004-06-13 20:20:40 +00001137/*
1138 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1139 * 'wrap' is on. This means we need to check for a double-byte character that
1140 * doesn't fit at the end of the screen line.
1141 */
1142 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001143win_nolbr_chartabsize(
1144 win_T *wp,
1145 char_u *s,
1146 colnr_T col,
1147 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001148{
1149 int n;
1150
1151 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1152 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001153# ifdef FEAT_VARTABS
1154 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1155 wp->w_buffer->b_p_vts_array);
1156# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001157 n = wp->w_buffer->b_p_ts;
1158 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001159# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001160 }
1161 n = ptr2cells(s);
1162 /* Add one cell for a double-width character in the last column of the
1163 * window, displayed with a ">". */
1164 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1165 {
1166 if (headp != NULL)
1167 *headp = 1;
1168 return 3;
1169 }
1170 return n;
1171}
1172
1173/*
1174 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1175 * "wp".
1176 */
1177 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001178in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001179{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001180 int width1; /* width of first line (after line number) */
1181 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001182
Bram Moolenaar071d4272004-06-13 20:20:40 +00001183 if (wp->w_width == 0) /* there is no border */
1184 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001185 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001186 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001187 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001188 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001189 return TRUE;
1190 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001191 if (width2 <= 0)
1192 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001193 return ((vcol - width1) % width2 == width2 - 1);
1194}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001195
1196/*
1197 * Get virtual column number of pos.
1198 * start: on the first position of this character (TAB, ctrl)
1199 * cursor: where the cursor is on this character (first char, except for TAB)
1200 * end: on the last position of this character (TAB, ctrl)
1201 *
1202 * This is used very often, keep it fast!
1203 */
1204 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001205getvcol(
1206 win_T *wp,
1207 pos_T *pos,
1208 colnr_T *start,
1209 colnr_T *cursor,
1210 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001211{
1212 colnr_T vcol;
1213 char_u *ptr; /* points to current char */
1214 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001215 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001216 int incr;
1217 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001218#ifdef FEAT_VARTABS
1219 int *vts = wp->w_buffer->b_p_vts_array;
1220#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001221 int ts = wp->w_buffer->b_p_ts;
1222 int c;
1223
1224 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001225 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001226 if (pos->col == MAXCOL)
1227 posptr = NULL; /* continue until the NUL */
1228 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001229 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001230 /* Special check for an empty line, which can happen on exit, when
1231 * ml_get_buf() always returns an empty string. */
1232 if (*ptr == NUL)
1233 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001234 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001235 if (has_mbyte)
1236 /* always start on the first byte */
1237 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001238 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001239
1240 /*
1241 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001242 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1243 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001244 * Also use this when 'list' is set but tabs take their normal size.
1245 */
1246 if ((!wp->w_p_list || lcs_tab1 != NUL)
1247#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001248 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001249#endif
1250 )
1251 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001252 for (;;)
1253 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001254 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001255 c = *ptr;
1256 /* make sure we don't go past the end of the line */
1257 if (c == NUL)
1258 {
1259 incr = 1; /* NUL at end of line only takes one column */
1260 break;
1261 }
1262 /* A tab gets expanded, depending on the current column */
1263 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001264#ifdef FEAT_VARTABS
1265 incr = tabstop_padding(vcol, ts, vts);
1266#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001268#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 else
1270 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 if (has_mbyte)
1272 {
1273 /* For utf-8, if the byte is >= 0x80, need to look at
1274 * further bytes to find the cell width. */
1275 if (enc_utf8 && c >= 0x80)
1276 incr = utf_ptr2cells(ptr);
1277 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001278 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001279
1280 /* If a double-cell char doesn't fit at the end of a line
1281 * it wraps to the next line, it's like this char is three
1282 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001283 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1284 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001285 {
1286 ++incr;
1287 head = 1;
1288 }
1289 }
1290 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001291 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001292 }
1293
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001294 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001295 break;
1296
1297 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001298 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001299 }
1300 }
1301 else
1302 {
1303 for (;;)
1304 {
1305 /* A tab gets expanded, depending on the current column */
1306 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001307 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001308 /* make sure we don't go past the end of the line */
1309 if (*ptr == NUL)
1310 {
1311 incr = 1; /* NUL at end of line only takes one column */
1312 break;
1313 }
1314
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001315 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001316 break;
1317
1318 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001319 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001320 }
1321 }
1322 if (start != NULL)
1323 *start = vcol + head;
1324 if (end != NULL)
1325 *end = vcol + incr - 1;
1326 if (cursor != NULL)
1327 {
1328 if (*ptr == TAB
1329 && (State & NORMAL)
1330 && !wp->w_p_list
1331 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001332 && !(VIsual_active
1333 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 )
1335 *cursor = vcol + incr - 1; /* cursor at end */
1336 else
1337 *cursor = vcol + head; /* cursor at start */
1338 }
1339}
1340
1341/*
1342 * Get virtual cursor column in the current window, pretending 'list' is off.
1343 */
1344 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001345getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001346{
1347 int list_save = curwin->w_p_list;
1348 colnr_T vcol;
1349
1350 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001351 if (posp->coladd)
1352 getvvcol(curwin, posp, NULL, &vcol, NULL);
1353 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001354 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001355 curwin->w_p_list = list_save;
1356 return vcol;
1357}
1358
Bram Moolenaar071d4272004-06-13 20:20:40 +00001359/*
1360 * Get virtual column in virtual mode.
1361 */
1362 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001363getvvcol(
1364 win_T *wp,
1365 pos_T *pos,
1366 colnr_T *start,
1367 colnr_T *cursor,
1368 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001369{
1370 colnr_T col;
1371 colnr_T coladd;
1372 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001373 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374
1375 if (virtual_active())
1376 {
1377 /* For virtual mode, only want one value */
1378 getvcol(wp, pos, &col, NULL, NULL);
1379
1380 coladd = pos->coladd;
1381 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001382 /* Cannot put the cursor on part of a wide character. */
1383 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001384 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001385 {
1386 int c = (*mb_ptr2char)(ptr + pos->col);
1387
1388 if (c != TAB && vim_isprintc(c))
1389 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001390 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001391 if (coladd > endadd) /* past end of line */
1392 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001393 else
1394 coladd = 0;
1395 }
1396 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001397 col += coladd;
1398 if (start != NULL)
1399 *start = col;
1400 if (cursor != NULL)
1401 *cursor = col;
1402 if (end != NULL)
1403 *end = col + endadd;
1404 }
1405 else
1406 getvcol(wp, pos, start, cursor, end);
1407}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001408
Bram Moolenaar071d4272004-06-13 20:20:40 +00001409/*
1410 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1411 * Used for Visual block mode.
1412 */
1413 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001414getvcols(
1415 win_T *wp,
1416 pos_T *pos1,
1417 pos_T *pos2,
1418 colnr_T *left,
1419 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001420{
1421 colnr_T from1, from2, to1, to2;
1422
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001423 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001424 {
1425 getvvcol(wp, pos1, &from1, NULL, &to1);
1426 getvvcol(wp, pos2, &from2, NULL, &to2);
1427 }
1428 else
1429 {
1430 getvvcol(wp, pos2, &from1, NULL, &to1);
1431 getvvcol(wp, pos1, &from2, NULL, &to2);
1432 }
1433 if (from2 < from1)
1434 *left = from2;
1435 else
1436 *left = from1;
1437 if (to2 > to1)
1438 {
1439 if (*p_sel == 'e' && from2 - 1 >= to1)
1440 *right = from2 - 1;
1441 else
1442 *right = to2;
1443 }
1444 else
1445 *right = to1;
1446}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001447
1448/*
1449 * skipwhite: skip over ' ' and '\t'.
1450 */
1451 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001452skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001453{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001454 char_u *p = q;
1455
Bram Moolenaar1c465442017-03-12 20:10:05 +01001456 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001457 ++p;
1458 return p;
1459}
1460
1461/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001462 * getwhitecols: return the number of whitespace
1463 * columns (bytes) at the start of a given line
1464 */
1465 int
1466getwhitecols_curline()
1467{
1468 return getwhitecols(ml_get_curline());
1469}
1470
1471 int
1472getwhitecols(char_u *p)
1473{
1474 return skipwhite(p) - p;
1475}
1476
1477/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001478 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001479 */
1480 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001481skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001482{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001483 char_u *p = q;
1484
Bram Moolenaar071d4272004-06-13 20:20:40 +00001485 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1486 ++p;
1487 return p;
1488}
1489
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001490#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001491/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001492 * skip over binary digits
1493 */
1494 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001495skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001496{
1497 char_u *p = q;
1498
1499 while (vim_isbdigit(*p)) /* skip to next non-digit */
1500 ++p;
1501 return p;
1502}
1503
1504/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001505 * skip over digits and hex characters
1506 */
1507 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001508skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001509{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001510 char_u *p = q;
1511
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001512 while (vim_isxdigit(*p)) /* skip to next non-digit */
1513 ++p;
1514 return p;
1515}
1516#endif
1517
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001518/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001519 * skip to bin digit (or NUL after the string)
1520 */
1521 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001522skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001523{
1524 char_u *p = q;
1525
1526 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1527 ++p;
1528 return p;
1529}
1530
1531/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001532 * skip to digit (or NUL after the string)
1533 */
1534 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001535skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001536{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001537 char_u *p = q;
1538
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001539 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1540 ++p;
1541 return p;
1542}
1543
1544/*
1545 * skip to hex character (or NUL after the string)
1546 */
1547 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001548skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001549{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001550 char_u *p = q;
1551
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001552 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1553 ++p;
1554 return p;
1555}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001556
Bram Moolenaar071d4272004-06-13 20:20:40 +00001557/*
1558 * Variant of isdigit() that can handle characters > 0x100.
1559 * We don't use isdigit() here, because on some systems it also considers
1560 * superscript 1 to be a digit.
1561 * Use the VIM_ISDIGIT() macro for simple arguments.
1562 */
1563 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001564vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001565{
1566 return (c >= '0' && c <= '9');
1567}
1568
1569/*
1570 * Variant of isxdigit() that can handle characters > 0x100.
1571 * We don't use isxdigit() here, because on some systems it also considers
1572 * superscript 1 to be a digit.
1573 */
1574 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001575vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001576{
1577 return (c >= '0' && c <= '9')
1578 || (c >= 'a' && c <= 'f')
1579 || (c >= 'A' && c <= 'F');
1580}
1581
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001582/*
1583 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1584 * characters > 0x100.
1585 */
1586 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001587vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001588{
1589 return (c == '0' || c == '1');
1590}
1591
Bram Moolenaar78622822005-08-23 21:00:13 +00001592/*
1593 * Vim's own character class functions. These exist because many library
1594 * islower()/toupper() etc. do not work properly: they crash when used with
1595 * invalid values or can't handle latin1 when the locale is C.
1596 * Speed is most important here.
1597 */
1598#define LATIN1LOWER 'l'
1599#define LATIN1UPPER 'U'
1600
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001601static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001602static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1603static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001604
1605 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001606vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001607{
1608 if (c <= '@')
1609 return FALSE;
1610 if (c >= 0x80)
1611 {
1612 if (enc_utf8)
1613 return utf_islower(c);
1614 if (c >= 0x100)
1615 {
1616#ifdef HAVE_ISWLOWER
1617 if (has_mbyte)
1618 return iswlower(c);
1619#endif
1620 /* islower() can't handle these chars and may crash */
1621 return FALSE;
1622 }
1623 if (enc_latin1like)
1624 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1625 }
1626 return islower(c);
1627}
1628
1629 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001630vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001631{
1632 if (c <= '@')
1633 return FALSE;
1634 if (c >= 0x80)
1635 {
1636 if (enc_utf8)
1637 return utf_isupper(c);
1638 if (c >= 0x100)
1639 {
1640#ifdef HAVE_ISWUPPER
1641 if (has_mbyte)
1642 return iswupper(c);
1643#endif
1644 /* islower() can't handle these chars and may crash */
1645 return FALSE;
1646 }
1647 if (enc_latin1like)
1648 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1649 }
1650 return isupper(c);
1651}
1652
1653 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001654vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001655{
1656 if (c <= '@')
1657 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001658 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001659 {
1660 if (enc_utf8)
1661 return utf_toupper(c);
1662 if (c >= 0x100)
1663 {
1664#ifdef HAVE_TOWUPPER
1665 if (has_mbyte)
1666 return towupper(c);
1667#endif
1668 /* toupper() can't handle these chars and may crash */
1669 return c;
1670 }
1671 if (enc_latin1like)
1672 return latin1upper[c];
1673 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001674 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1675 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001676 return TOUPPER_LOC(c);
1677}
1678
1679 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001680vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001681{
1682 if (c <= '@')
1683 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001684 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001685 {
1686 if (enc_utf8)
1687 return utf_tolower(c);
1688 if (c >= 0x100)
1689 {
1690#ifdef HAVE_TOWLOWER
1691 if (has_mbyte)
1692 return towlower(c);
1693#endif
1694 /* tolower() can't handle these chars and may crash */
1695 return c;
1696 }
1697 if (enc_latin1like)
1698 return latin1lower[c];
1699 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001700 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1701 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001702 return TOLOWER_LOC(c);
1703}
Bram Moolenaar78622822005-08-23 21:00:13 +00001704
Bram Moolenaar071d4272004-06-13 20:20:40 +00001705/*
1706 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1707 */
1708 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001709skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001710{
1711 while (*p != ' ' && *p != '\t' && *p != NUL)
1712 ++p;
1713 return p;
1714}
1715
Bram Moolenaar071d4272004-06-13 20:20:40 +00001716/*
1717 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1718 */
1719 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001720skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001721{
1722 while (*p != ' ' && *p != '\t' && *p != NUL)
1723 {
1724 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1725 ++p;
1726 ++p;
1727 }
1728 return p;
1729}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001730
1731/*
1732 * Getdigits: Get a number from a string and skip over it.
1733 * Note: the argument is a pointer to a char_u pointer!
1734 */
1735 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001736getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001737{
1738 char_u *p;
1739 long retval;
1740
1741 p = *pp;
1742 retval = atol((char *)p);
1743 if (*p == '-') /* skip negative sign */
1744 ++p;
1745 p = skipdigits(p); /* skip to next non-digit */
1746 *pp = p;
1747 return retval;
1748}
1749
1750/*
1751 * Return TRUE if "lbuf" is empty or only contains blanks.
1752 */
1753 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001754vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001755{
1756 char_u *p;
1757
1758 p = skipwhite(lbuf);
1759 return (*p == NUL || *p == '\r' || *p == '\n');
1760}
1761
1762/*
1763 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001764 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1765 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001766 * 0 decimal
1767 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001768 * 'B' bin
1769 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001770 * 'X' hex
1771 * 'x' hex
1772 * If "len" is not NULL, the length of the number in characters is returned.
1773 * If "nptr" is not NULL, the signed result is returned in it.
1774 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001775 * If "what" contains STR2NR_BIN recognize binary numbers
1776 * If "what" contains STR2NR_OCT recognize octal numbers
1777 * If "what" contains STR2NR_HEX recognize hex numbers
1778 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaarce157752017-10-28 16:07:33 +02001779 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001780 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001781 */
1782 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001783vim_str2nr(
1784 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001785 int *prep, // return: type of number 0 = decimal, 'x'
1786 // or 'X' is hex, '0' = octal, 'b' or 'B'
1787 // is bin
1788 int *len, // return: detected length of number
1789 int what, // what numbers to recognize
1790 varnumber_T *nptr, // return: signed result
1791 uvarnumber_T *unptr, // return: unsigned result
1792 int maxlen, // max length of string to check
1793 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001794{
1795 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001796 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001797 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001798 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001799 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001800
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001801 if (len != NULL)
1802 *len = 0;
1803
Bram Moolenaar071d4272004-06-13 20:20:40 +00001804 if (ptr[0] == '-')
1805 {
1806 negative = TRUE;
1807 ++ptr;
1808 }
1809
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001810 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001811 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1812 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001813 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001814 pre = ptr[1];
1815 if ((what & STR2NR_HEX)
1816 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1817 && (maxlen == 0 || maxlen > 2))
1818 /* hexadecimal */
1819 ptr += 2;
1820 else if ((what & STR2NR_BIN)
1821 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1822 && (maxlen == 0 || maxlen > 2))
1823 /* binary */
1824 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001825 else
1826 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001827 /* decimal or octal, default is decimal */
1828 pre = 0;
1829 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001830 {
1831 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001832 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001833 {
1834 if (ptr[n] > '7')
1835 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001836 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001837 break;
1838 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001839 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001840 }
1841 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001842 }
1843 }
1844
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001845 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001846 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001847 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1848 {
1849 /* bin */
1850 if (pre != 0)
1851 n += 2; /* skip over "0b" */
1852 while ('0' <= *ptr && *ptr <= '1')
1853 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001854 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001855 if (un <= UVARNUM_MAX / 2)
1856 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001857 else
1858 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001859 ++ptr;
1860 if (n++ == maxlen)
1861 break;
1862 }
1863 }
1864 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001865 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001866 /* octal */
1867 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001868 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001869 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001870 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001871 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1872 else
1873 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001874 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001875 if (n++ == maxlen)
1876 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001877 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001878 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001879 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001880 {
1881 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001882 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001883 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001884 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001885 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001886 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001887 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001888 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1889 else
1890 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001891 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001892 if (n++ == maxlen)
1893 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001894 }
1895 }
1896 else
1897 {
1898 /* decimal */
1899 while (VIM_ISDIGIT(*ptr))
1900 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001901 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1902
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001903 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001904 if (un < UVARNUM_MAX / 10
1905 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1906 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001907 else
1908 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001909 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001910 if (n++ == maxlen)
1911 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001912 }
1913 }
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001914 // Check for an alpha-numeric character immediately following, that is
1915 // most likely a typo.
1916 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1917 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001918
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001919 if (prep != NULL)
1920 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001921 if (len != NULL)
1922 *len = (int)(ptr - start);
1923 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001924 {
1925 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001926 {
1927 /* avoid ubsan error for overflow */
1928 if (un > VARNUM_MAX)
1929 *nptr = VARNUM_MIN;
1930 else
1931 *nptr = -(varnumber_T)un;
1932 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001933 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001934 {
1935 if (un > VARNUM_MAX)
1936 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001937 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001938 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001939 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001940 if (unptr != NULL)
1941 *unptr = un;
1942}
1943
1944/*
1945 * Return the value of a single hex character.
1946 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1947 */
1948 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001949hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001950{
1951 if (c >= 'a' && c <= 'f')
1952 return c - 'a' + 10;
1953 if (c >= 'A' && c <= 'F')
1954 return c - 'A' + 10;
1955 return c - '0';
1956}
1957
Bram Moolenaar4033c552017-09-16 20:54:51 +02001958#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001959/*
1960 * Convert two hex characters to a byte.
1961 * Return -1 if one of the characters is not hex.
1962 */
1963 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001964hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001965{
1966 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1967 return -1;
1968 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1969}
1970#endif
1971
1972/*
1973 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001974 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00001975 * backslash is not a normal file name character.
1976 * '$' is a valid file name character, we don't remove the backslash before
1977 * it. This means it is not possible to use an environment variable after a
1978 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1979 * Although "\ name" is valid, the backslash in "Program\ files" must be
1980 * removed. Assume a file name doesn't start with a space.
1981 * For multi-byte names, never remove a backslash before a non-ascii
1982 * character, assume that all multi-byte characters are valid file name
1983 * characters.
1984 */
1985 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001986rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001987{
1988#ifdef BACKSLASH_IN_FILENAME
1989 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001990 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00001991 && (str[1] == ' '
1992 || (str[1] != NUL
1993 && str[1] != '*'
1994 && str[1] != '?'
1995 && !vim_isfilec(str[1]))));
1996#else
1997 return (str[0] == '\\' && str[1] != NUL);
1998#endif
1999}
2000
2001/*
2002 * Halve the number of backslashes in a file name argument.
2003 * For MS-DOS we only do this if the character after the backslash
2004 * is not a normal file character.
2005 */
2006 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002007backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002008{
2009 for ( ; *p; ++p)
2010 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002011 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002012}
2013
2014/*
2015 * backslash_halve() plus save the result in allocated memory.
2016 */
2017 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002018backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002019{
2020 char_u *res;
2021
2022 res = vim_strsave(p);
2023 if (res == NULL)
2024 return p;
2025 backslash_halve(res);
2026 return res;
2027}
2028
2029#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2030/*
2031 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2032 * The first 64 entries have been added to map control characters defined in
2033 * ascii.h
2034 */
2035static char_u ebcdic2ascii_tab[256] =
2036{
2037 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2038 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2039 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2040 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2041 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2042 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2043 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2044 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2045 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2046 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2047 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2048 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2049 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2050 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2051 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2052 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2053 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2054 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2055 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2056 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2057 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2058 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2059 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2060 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2061 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2062 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2063 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2064 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2065 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2066 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2067 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2068 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2069};
2070
2071/*
2072 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2073 * wanting 7-bit ASCII characters out the other end.
2074 */
2075 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002076ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002077{
2078 int i;
2079
2080 for (i = 0; i < len; i++)
2081 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2082}
2083#endif