blob: cff62e18572b2c9c30fef11200046bbc404eda99 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
13# include <wchar.h> /* for towupper() and towlower() */
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
21/* b_chartab[] is an array of 32 bytes, each bit representing one of the
22 * characters 0-255. */
23#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010027/* table used below, see init_chartab() for an explanation */
28static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
33#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
34#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
35#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
36#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
37
38/*
39 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000040 * characters for current buffer.
41 *
42 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
43 * 'isprint' and 'encoding'.
44 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010045 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000046 * - For non-multi-byte index with the byte (same as the character).
47 * - For DBCS index with the first byte.
48 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
49 * the same as the character, if the first byte is 0x80 and above it depends
50 * on further bytes).
51 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010052 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000053 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
54 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
55 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
56 * translate the character before displaying it). Note that only DBCS
57 * characters can have 2 display cells and still be printable.
58 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
59 * - CT_ID_CHAR bit is set when the character can be in an identifier.
60 *
61 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
62 * error, OK otherwise.
63 */
64 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010065init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000066{
67 return buf_init_chartab(curbuf, TRUE);
68}
69
70 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010071buf_init_chartab(
72 buf_T *buf,
73 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000074{
75 int c;
76 int c2;
77 char_u *p;
78 int i;
79 int tilde;
80 int do_isalpha;
81
82 if (global)
83 {
84 /*
85 * Set the default size for printable characters:
86 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
87 * This also inits all 'isident' and 'isfname' flags to FALSE.
88 *
89 * EBCDIC: all chars below ' ' are not printable, all others are
90 * printable.
91 */
92 c = 0;
93 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010094 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000095#ifdef EBCDIC
96 while (c < 255)
97#else
98 while (c <= '~')
99#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100100 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000101 while (c < 256)
102 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100105 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100108 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100111 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100114 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000115 }
116
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 /* Assume that every multi-byte char is a filename character. */
118 for (c = 1; c < 256; ++c)
119 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
120 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
121 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100122 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000123 }
124
125 /*
126 * Init word char flags all to FALSE
127 */
128 vim_memset(buf->b_chartab, 0, (size_t)32);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000129 if (enc_dbcs != 0)
130 for (c = 0; c < 256; ++c)
131 {
132 /* double-byte characters are probably word characters */
133 if (MB_BYTE2LEN(c) == 2)
134 SET_CHARTAB(buf, c);
135 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000136
137#ifdef FEAT_LISP
138 /*
139 * In lisp mode the '-' character is included in keywords.
140 */
141 if (buf->b_p_lisp)
142 SET_CHARTAB(buf, '-');
143#endif
144
145 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
146 * options Each option is a list of characters, character numbers or
147 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
148 */
149 for (i = global ? 0 : 3; i <= 3; ++i)
150 {
151 if (i == 0)
152 p = p_isi; /* first round: 'isident' */
153 else if (i == 1)
154 p = p_isp; /* second round: 'isprint' */
155 else if (i == 2)
156 p = p_isf; /* third round: 'isfname' */
157 else /* i == 3 */
158 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
159
160 while (*p)
161 {
162 tilde = FALSE;
163 do_isalpha = FALSE;
164 if (*p == '^' && p[1] != NUL)
165 {
166 tilde = TRUE;
167 ++p;
168 }
169 if (VIM_ISDIGIT(*p))
170 c = getdigits(&p);
171 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000172 if (has_mbyte)
173 c = mb_ptr2char_adv(&p);
174 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000175 c = *p++;
176 c2 = -1;
177 if (*p == '-' && p[1] != NUL)
178 {
179 ++p;
180 if (VIM_ISDIGIT(*p))
181 c2 = getdigits(&p);
182 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000183 if (has_mbyte)
184 c2 = mb_ptr2char_adv(&p);
185 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000186 c2 = *p++;
187 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000188 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000189 || !(*p == NUL || *p == ','))
190 return FAIL;
191
192 if (c2 == -1) /* not a range */
193 {
194 /*
195 * A single '@' (not "@-@"):
196 * Decide on letters being ID/printable/keyword chars with
197 * standard function isalpha(). This takes care of locale for
198 * single-byte characters).
199 */
200 if (c == '@')
201 {
202 do_isalpha = TRUE;
203 c = 1;
204 c2 = 255;
205 }
206 else
207 c2 = c;
208 }
209 while (c <= c2)
210 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000211 /* Use the MB_ functions here, because isalpha() doesn't
212 * work properly when 'encoding' is "latin1" and the locale is
213 * "C". */
Bram Moolenaar14184a32019-02-16 15:10:30 +0100214 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000215 {
216 if (i == 0) /* (re)set ID flag */
217 {
218 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100219 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000220 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100221 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222 }
223 else if (i == 1) /* (re)set printable */
224 {
225 if ((c < ' '
226#ifndef EBCDIC
227 || c > '~'
228#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100229 // For double-byte we keep the cell width, so
230 // that we can detect it from the first byte.
231 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000232 {
233 if (tilde)
234 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100235 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000236 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100237 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238 }
239 else
240 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100241 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
242 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000243 }
244 }
245 }
246 else if (i == 2) /* (re)set fname flag */
247 {
248 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100249 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000250 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100251 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000252 }
253 else /* i == 3 */ /* (re)set keyword flag */
254 {
255 if (tilde)
256 RESET_CHARTAB(buf, c);
257 else
258 SET_CHARTAB(buf, c);
259 }
260 }
261 ++c;
262 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100263
264 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000265 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100266 if (c == ',' && *p == NUL)
267 /* Trailing comma is not allowed. */
268 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000269 }
270 }
271 chartab_initialized = TRUE;
272 return OK;
273}
274
275/*
276 * Translate any special characters in buf[bufsize] in-place.
277 * The result is a string with only printable characters, but if there is not
278 * enough room, not all characters will be translated.
279 */
280 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100281trans_characters(
282 char_u *buf,
283 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000284{
285 int len; /* length of string needing translation */
286 int room; /* room in buffer after string */
287 char_u *trs; /* translated character */
288 int trs_len; /* length of trs[] */
289
290 len = (int)STRLEN(buf);
291 room = bufsize - len;
292 while (*buf != 0)
293 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000294 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000295 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296 len -= trs_len;
297 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000298 {
299 trs = transchar_byte(*buf);
300 trs_len = (int)STRLEN(trs);
301 if (trs_len > 1)
302 {
303 room -= trs_len - 1;
304 if (room <= 0)
305 return;
306 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
307 }
308 mch_memmove(buf, trs, (size_t)trs_len);
309 --len;
310 }
311 buf += trs_len;
312 }
313}
314
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000315#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
316 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000317/*
318 * Translate a string into allocated memory, replacing special chars with
319 * printable chars. Returns NULL when out of memory.
320 */
321 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100322transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323{
324 char_u *res;
325 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 int l, len, c;
327 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328
Bram Moolenaar071d4272004-06-13 20:20:40 +0000329 if (has_mbyte)
330 {
331 /* Compute the length of the result, taking account of unprintable
332 * multi-byte characters. */
333 len = 0;
334 p = s;
335 while (*p != NUL)
336 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000337 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338 {
339 c = (*mb_ptr2char)(p);
340 p += l;
341 if (vim_isprintc(c))
342 len += l;
343 else
344 {
345 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000346 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000347 }
348 }
349 else
350 {
351 l = byte2cells(*p++);
352 if (l > 0)
353 len += l;
354 else
355 len += 4; /* illegal byte sequence */
356 }
357 }
358 res = alloc((unsigned)(len + 1));
359 }
360 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000361 res = alloc((unsigned)(vim_strsize(s) + 1));
362 if (res != NULL)
363 {
364 *res = NUL;
365 p = s;
366 while (*p != NUL)
367 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000368 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000369 {
370 c = (*mb_ptr2char)(p);
371 if (vim_isprintc(c))
372 STRNCAT(res, p, l); /* append printable multi-byte char */
373 else
374 transchar_hex(res + STRLEN(res), c);
375 p += l;
376 }
377 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000378 STRCAT(res, transchar_byte(*p++));
379 }
380 }
381 return res;
382}
383#endif
384
385#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
386/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000387 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
388 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000389 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
390 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000391 */
392 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100393str_foldcase(
394 char_u *str,
395 int orglen,
396 char_u *buf,
397 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000398{
399 garray_T ga;
400 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000401 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402
403#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
404#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000405#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
406#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000407
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000408 /* Copy "str" into "buf" or allocated memory, unmodified. */
409 if (buf == NULL)
410 {
411 ga_init2(&ga, 1, 10);
412 if (ga_grow(&ga, len + 1) == FAIL)
413 return NULL;
414 mch_memmove(ga.ga_data, str, (size_t)len);
415 ga.ga_len = len;
416 }
417 else
418 {
419 if (len >= buflen) /* Ugly! */
420 len = buflen - 1;
421 mch_memmove(buf, str, (size_t)len);
422 }
423 if (buf == NULL)
424 GA_CHAR(len) = NUL;
425 else
426 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000427
428 /* Make each character lower case. */
429 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000430 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000431 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000432 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000433 {
434 if (enc_utf8)
435 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000436 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100437 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000438 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000439
Bram Moolenaarb9839212008-06-28 11:03:50 +0000440 /* Only replace the character when it is not an invalid
441 * sequence (ASCII character or more than one byte) and
442 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100443 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000444 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100445 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446
447 /* If the byte length changes need to shift the following
448 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100449 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000450 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100451 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000452 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100453 if (buf == NULL
454 ? ga_grow(&ga, nlen - olen + 1) == FAIL
455 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000456 {
457 /* out of memory, keep old char */
458 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100459 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000460 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000461 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100462 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000463 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000464 if (buf == NULL)
465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
467 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000468 }
469 else
470 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100471 STRMOVE(buf + i + nlen, buf + i + olen);
472 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000473 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000474 }
475 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000476 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000477 }
478 }
479 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000480 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000481 }
482 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000483 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000484 if (buf == NULL)
485 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
486 else
487 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000488 ++i;
489 }
490 }
491
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000492 if (buf == NULL)
493 return (char_u *)ga.ga_data;
494 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000495}
496#endif
497
498/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100499 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100501 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000502 * Does NOT work for multi-byte characters, c must be <= 255.
503 * Also doesn't work for the first byte of a multi-byte, "c" must be a
504 * character!
505 */
506static char_u transchar_buf[7];
507
508 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100509transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000510{
511 int i;
512
513 i = 0;
514 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
515 {
516 transchar_buf[0] = '~';
517 transchar_buf[1] = '@';
518 i = 2;
519 c = K_SECOND(c);
520 }
521
522 if ((!chartab_initialized && (
523#ifdef EBCDIC
524 (c >= 64 && c < 255)
525#else
526 (c >= ' ' && c <= '~')
527#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000528 )) || (c < 256 && vim_isprintc_strict(c)))
529 {
530 /* printable character */
531 transchar_buf[i] = c;
532 transchar_buf[i + 1] = NUL;
533 }
534 else
535 transchar_nonprint(transchar_buf + i, c);
536 return transchar_buf;
537}
538
Bram Moolenaar071d4272004-06-13 20:20:40 +0000539/*
540 * Like transchar(), but called with a byte instead of a character. Checks
541 * for an illegal UTF-8 byte.
542 */
543 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100544transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545{
546 if (enc_utf8 && c >= 0x80)
547 {
548 transchar_nonprint(transchar_buf, c);
549 return transchar_buf;
550 }
551 return transchar(c);
552}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000553
554/*
555 * Convert non-printable character to two or more printable characters in
556 * "buf[]". "buf" needs to be able to hold five bytes.
557 * Does NOT work for multi-byte characters, c must be <= 255.
558 */
559 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100560transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000561{
562 if (c == NL)
563 c = NUL; /* we use newline in place of a NUL */
564 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
565 c = NL; /* we use CR in place of NL in this case */
566
567 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
568 transchar_hex(buf, c);
569
570#ifdef EBCDIC
571 /* For EBCDIC only the characters 0-63 and 255 are not printable */
572 else if (CtrlChar(c) != 0 || c == DEL)
573#else
574 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
575#endif
576 {
577 buf[0] = '^';
578#ifdef EBCDIC
579 if (c == DEL)
580 buf[1] = '?'; /* DEL displayed as ^? */
581 else
582 buf[1] = CtrlChar(c);
583#else
584 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
585#endif
586
587 buf[2] = NUL;
588 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000589 else if (enc_utf8 && c >= 0x80)
590 {
591 transchar_hex(buf, c);
592 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000593#ifndef EBCDIC
594 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
595 {
596 buf[0] = '|';
597 buf[1] = c - 0x80;
598 buf[2] = NUL;
599 }
600#else
601 else if (c < 64)
602 {
603 buf[0] = '~';
604 buf[1] = MetaChar(c);
605 buf[2] = NUL;
606 }
607#endif
608 else /* 0x80 - 0x9f and 0xff */
609 {
610 /*
611 * TODO: EBCDIC I don't know what to do with this chars, so I display
612 * them as '~?' for now
613 */
614 buf[0] = '~';
615#ifdef EBCDIC
616 buf[1] = '?'; /* 0xff displayed as ~? */
617#else
618 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
619#endif
620 buf[2] = NUL;
621 }
622}
623
624 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100625transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000626{
627 int i = 0;
628
629 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630 if (c > 255)
631 {
632 buf[++i] = nr2hex((unsigned)c >> 12);
633 buf[++i] = nr2hex((unsigned)c >> 8);
634 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000635 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000636 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = '>';
638 buf[++i] = NUL;
639}
640
641/*
642 * Convert the lower 4 bits of byte "c" to its hex character.
643 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
644 * function key 1.
645 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000646 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100647nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648{
649 if ((c & 0xf) <= 9)
650 return (c & 0xf) + '0';
651 return (c & 0xf) - 10 + 'a';
652}
653
654/*
655 * Return number of display cells occupied by byte "b".
656 * Caller must make sure 0 <= b <= 255.
657 * For multi-byte mode "b" must be the first byte of a character.
658 * A TAB is counted as two cells: "^I".
659 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
660 * cells depends on further bytes.
661 */
662 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100663byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 if (enc_utf8 && b >= 0x80)
666 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100667 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000668}
669
670/*
671 * Return number of display cells occupied by character "c".
672 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
673 * A TAB is counted as two cells: "^I" or four: "<09>".
674 */
675 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100676char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677{
678 if (IS_SPECIAL(c))
679 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680 if (c >= 0x80)
681 {
682 /* UTF-8: above 0x80 need to check the value */
683 if (enc_utf8)
684 return utf_char2cells(c);
685 /* DBCS: double-byte means double-width, except for euc-jp with first
686 * byte 0x8e */
687 if (enc_dbcs != 0 && c >= 0x100)
688 {
689 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
690 return 1;
691 return 2;
692 }
693 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100694 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695}
696
697/*
698 * Return number of display cells occupied by character at "*p".
699 * A TAB is counted as two cells: "^I" or four: "<09>".
700 */
701 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100702ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000704 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
705 if (enc_utf8 && *p >= 0x80)
706 return utf_ptr2cells(p);
707 /* For DBCS we can tell the cell count from the first byte. */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100708 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000709}
710
711/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100712 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713 * counting TABs as two characters: "^I".
714 */
715 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100716vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717{
718 return vim_strnsize(s, (int)MAXCOL);
719}
720
721/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100722 * Return the number of character cells string "s[len]" will take on the
723 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724 */
725 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100726vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727{
728 int size = 0;
729
730 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 if (has_mbyte)
732 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000733 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734
735 size += ptr2cells(s);
736 s += l;
737 len -= l - 1;
738 }
739 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000740 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100741
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 return size;
743}
744
745/*
746 * Return the number of characters 'c' will take on the screen, taking
747 * into account the size of a tab.
748 * Use a define to make it fast, this is used very often!!!
749 * Also see getvcol() below.
750 */
751
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200752#ifdef FEAT_VARTABS
753# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
754 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
755 { \
756 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
757 } \
758 else \
759 return ptr2cells(p);
760#else
761# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000762 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
763 { \
764 int ts; \
765 ts = (buf)->b_p_ts; \
766 return (int)(ts - (col % ts)); \
767 } \
768 else \
769 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000771
Bram Moolenaar071d4272004-06-13 20:20:40 +0000772 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100773chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774{
775 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
776}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777
778#ifdef FEAT_LINEBREAK
779 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100780win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000781{
782 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
783}
784#endif
785
786/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200787 * Return the number of characters the string 's' will take on the screen,
788 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789 */
790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
Bram Moolenaardc536092010-07-18 15:45:49 +0200793 return linetabsize_col(0, s);
794}
795
796/*
797 * Like linetabsize(), but starting at column "startcol".
798 */
799 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100800linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200801{
802 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200803 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000804
805 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200806 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 return (int)col;
808}
809
810/*
811 * Like linetabsize(), but for a given window instead of the current one.
812 */
813 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100814win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815{
816 colnr_T col = 0;
817 char_u *s;
818
Bram Moolenaar597a4222014-06-25 14:39:50 +0200819 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100820 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822 return (int)col;
823}
824
825/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000826 * Return TRUE if 'c' is a normal identifier character:
827 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828 */
829 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100830vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100832 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833}
834
835/*
836 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100837 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000838 * For multi-byte characters mb_get_class() is used (builtin rules).
839 */
840 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100841vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000842{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100843 return vim_iswordc_buf(c, curbuf);
844}
845
846 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100847vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100848{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000849 if (c >= 0x100)
850 {
851 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000852 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000853 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100854 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100855 return FALSE;
856 }
857 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000858}
859
860/*
861 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
862 */
863 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100864vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000865{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100866 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000867}
868
Bram Moolenaar071d4272004-06-13 20:20:40 +0000869 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100870vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000871{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100872 int c = *p;
873
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100874 if (has_mbyte && MB_BYTE2LEN(c) > 1)
875 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100876 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000878
879/*
880 * return TRUE if 'c' is a valid file-name character
881 * Assume characters above 0x100 are valid (multi-byte).
882 */
883 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100884vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100886 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887}
888
889/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000890 * return TRUE if 'c' is a valid file-name character or a wildcard character
891 * Assume characters above 0x100 are valid (multi-byte).
892 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
893 * returns false.
894 */
895 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100896vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000897{
898 char_u buf[2];
899
900 buf[0] = (char_u)c;
901 buf[1] = NUL;
902 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
903}
904
905/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200906 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000907 * Assume characters above 0x100 are printable (multi-byte), except for
908 * Unicode.
909 */
910 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100911vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000912{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000913 if (enc_utf8 && c >= 0x100)
914 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100915 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000916}
917
918/*
919 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
920 * byte of a double-byte character.
921 */
922 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100923vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000924{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000925 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
926 return FALSE;
927 if (enc_utf8 && c >= 0x100)
928 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100929 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000930}
931
932/*
933 * like chartabsize(), but also check for line breaks on the screen
934 */
935 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100936lbr_chartabsize(
937 char_u *line UNUSED, /* start of the line */
938 unsigned char *s,
939 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000940{
941#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +0200942 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000943 {
944#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000945 if (curwin->w_p_wrap)
946 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000947 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
948#ifdef FEAT_LINEBREAK
949 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200950 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000951#endif
952}
953
954/*
955 * Call lbr_chartabsize() and advance the pointer.
956 */
957 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100958lbr_chartabsize_adv(
959 char_u *line, /* start of the line */
960 char_u **s,
961 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000962{
963 int retval;
964
Bram Moolenaar597a4222014-06-25 14:39:50 +0200965 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100966 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000967 return retval;
968}
969
970/*
971 * This function is used very often, keep it fast!!!!
972 *
973 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
974 * string at start of line. Warning: *headp is only set if it's a non-zero
975 * value, init to 0 before calling.
976 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000977 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100978win_lbr_chartabsize(
979 win_T *wp,
980 char_u *line UNUSED, /* start of the line */
981 char_u *s,
982 colnr_T col,
983 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000984{
985#ifdef FEAT_LINEBREAK
986 int c;
987 int size;
988 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +0200989 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 colnr_T colmax;
991 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000992 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000993 int numberextra;
994 char_u *ps;
995 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000996 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997
998 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +0200999 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001000 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001001 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001002#endif
1003 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004 if (wp->w_p_wrap)
1005 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001006 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1007 }
1008
1009#ifdef FEAT_LINEBREAK
1010 /*
1011 * First get normal size, without 'linebreak'
1012 */
1013 size = win_chartabsize(wp, s, col);
1014 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001015 if (tab_corr)
1016 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001017
1018 /*
1019 * If 'linebreak' set check at a blank before a non-blank if the line
1020 * needs a break here
1021 */
1022 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001023 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001024 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001025 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001026 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001027 {
1028 /*
1029 * Count all characters from first non-blank after a blank up to next
1030 * non-blank after a blank.
1031 */
1032 numberextra = win_col_off(wp);
1033 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001034 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001035 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001036 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001037 colmax += col_adj;
1038 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001039 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001040 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001041 }
1042
Bram Moolenaar071d4272004-06-13 20:20:40 +00001043 for (;;)
1044 {
1045 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001046 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001047 c = *s;
1048 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001049 && (VIM_ISBREAK(c)
1050 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001051 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001052 break;
1053
1054 col2 += win_chartabsize(wp, s, col2);
1055 if (col2 >= colmax) /* doesn't fit */
1056 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001057 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001058 break;
1059 }
1060 }
1061 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001062 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1063 && wp->w_p_wrap && in_win_border(wp, col))
1064 {
1065 ++size; /* Count the ">" in the last column. */
1066 mb_added = 1;
1067 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001068
1069 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001070 * May have to add something for 'breakindent' and/or 'showbreak'
1071 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001072 * Set *headp to the size of what we add.
1073 */
1074 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001075 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001077 colnr_T sbrlen = 0;
1078 int numberwidth = win_col_off(wp);
1079
1080 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001082 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001083 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001084 col -= wp->w_width;
1085 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001086 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001087 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001088 if (*p_sbr != NUL)
1089 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001090 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001091 if (col >= sbrlen)
1092 col -= sbrlen;
1093 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001094 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001095 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001096 else if (col > 0 && numberextra > 0)
1097 col += numberwidth - win_col_off2(wp);
1098
1099 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001101 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001103 added = 0;
1104 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001105 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001106 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001107 {
1108 /* calculate effective window width */
Bram Moolenaar02631462017-09-22 15:20:32 +02001109 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001110 int prev_width = col
1111 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001112 if (width == 0)
Bram Moolenaar02631462017-09-22 15:20:32 +02001113 width = (colnr_T)wp->w_width;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001114 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1115 if ((size - prev_width) % width)
1116 /* wrapped, add another length of 'sbr' */
1117 added += vim_strsize(p_sbr);
1118 }
1119 else
1120 added += vim_strsize(p_sbr);
1121 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001122 if (wp->w_p_bri)
1123 added += get_breakindent_win(wp, line);
1124
Bram Moolenaar95765082014-08-24 21:19:25 +02001125 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001126 if (col != 0)
1127 added = 0;
1128 }
1129 }
1130 if (headp != NULL)
1131 *headp = added + mb_added;
1132 return size;
1133#endif
1134}
1135
Bram Moolenaar071d4272004-06-13 20:20:40 +00001136/*
1137 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1138 * 'wrap' is on. This means we need to check for a double-byte character that
1139 * doesn't fit at the end of the screen line.
1140 */
1141 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001142win_nolbr_chartabsize(
1143 win_T *wp,
1144 char_u *s,
1145 colnr_T col,
1146 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001147{
1148 int n;
1149
1150 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1151 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001152# ifdef FEAT_VARTABS
1153 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1154 wp->w_buffer->b_p_vts_array);
1155# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001156 n = wp->w_buffer->b_p_ts;
1157 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001158# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001159 }
1160 n = ptr2cells(s);
1161 /* Add one cell for a double-width character in the last column of the
1162 * window, displayed with a ">". */
1163 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1164 {
1165 if (headp != NULL)
1166 *headp = 1;
1167 return 3;
1168 }
1169 return n;
1170}
1171
1172/*
1173 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1174 * "wp".
1175 */
1176 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001177in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001178{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001179 int width1; /* width of first line (after line number) */
1180 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001181
Bram Moolenaar071d4272004-06-13 20:20:40 +00001182 if (wp->w_width == 0) /* there is no border */
1183 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001184 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001185 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001186 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001187 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001188 return TRUE;
1189 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001190 if (width2 <= 0)
1191 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192 return ((vcol - width1) % width2 == width2 - 1);
1193}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001194
1195/*
1196 * Get virtual column number of pos.
1197 * start: on the first position of this character (TAB, ctrl)
1198 * cursor: where the cursor is on this character (first char, except for TAB)
1199 * end: on the last position of this character (TAB, ctrl)
1200 *
1201 * This is used very often, keep it fast!
1202 */
1203 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001204getvcol(
1205 win_T *wp,
1206 pos_T *pos,
1207 colnr_T *start,
1208 colnr_T *cursor,
1209 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001210{
1211 colnr_T vcol;
1212 char_u *ptr; /* points to current char */
1213 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001214 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001215 int incr;
1216 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001217#ifdef FEAT_VARTABS
1218 int *vts = wp->w_buffer->b_p_vts_array;
1219#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001220 int ts = wp->w_buffer->b_p_ts;
1221 int c;
1222
1223 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001224 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001225 if (pos->col == MAXCOL)
1226 posptr = NULL; /* continue until the NUL */
1227 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001228 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001229 /* Special check for an empty line, which can happen on exit, when
1230 * ml_get_buf() always returns an empty string. */
1231 if (*ptr == NUL)
1232 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001233 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001234 if (has_mbyte)
1235 /* always start on the first byte */
1236 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001237 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238
1239 /*
1240 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001241 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1242 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001243 * Also use this when 'list' is set but tabs take their normal size.
1244 */
1245 if ((!wp->w_p_list || lcs_tab1 != NUL)
1246#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001247 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001248#endif
1249 )
1250 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001251 for (;;)
1252 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001253 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001254 c = *ptr;
1255 /* make sure we don't go past the end of the line */
1256 if (c == NUL)
1257 {
1258 incr = 1; /* NUL at end of line only takes one column */
1259 break;
1260 }
1261 /* A tab gets expanded, depending on the current column */
1262 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001263#ifdef FEAT_VARTABS
1264 incr = tabstop_padding(vcol, ts, vts);
1265#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001266 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001267#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001268 else
1269 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 if (has_mbyte)
1271 {
1272 /* For utf-8, if the byte is >= 0x80, need to look at
1273 * further bytes to find the cell width. */
1274 if (enc_utf8 && c >= 0x80)
1275 incr = utf_ptr2cells(ptr);
1276 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001277 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001278
1279 /* If a double-cell char doesn't fit at the end of a line
1280 * it wraps to the next line, it's like this char is three
1281 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001282 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1283 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001284 {
1285 ++incr;
1286 head = 1;
1287 }
1288 }
1289 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001290 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001291 }
1292
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001293 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001294 break;
1295
1296 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001297 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001298 }
1299 }
1300 else
1301 {
1302 for (;;)
1303 {
1304 /* A tab gets expanded, depending on the current column */
1305 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001306 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001307 /* make sure we don't go past the end of the line */
1308 if (*ptr == NUL)
1309 {
1310 incr = 1; /* NUL at end of line only takes one column */
1311 break;
1312 }
1313
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001314 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001315 break;
1316
1317 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001318 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319 }
1320 }
1321 if (start != NULL)
1322 *start = vcol + head;
1323 if (end != NULL)
1324 *end = vcol + incr - 1;
1325 if (cursor != NULL)
1326 {
1327 if (*ptr == TAB
1328 && (State & NORMAL)
1329 && !wp->w_p_list
1330 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001331 && !(VIsual_active
1332 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001333 )
1334 *cursor = vcol + incr - 1; /* cursor at end */
1335 else
1336 *cursor = vcol + head; /* cursor at start */
1337 }
1338}
1339
1340/*
1341 * Get virtual cursor column in the current window, pretending 'list' is off.
1342 */
1343 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001344getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001345{
1346 int list_save = curwin->w_p_list;
1347 colnr_T vcol;
1348
1349 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001350 if (posp->coladd)
1351 getvvcol(curwin, posp, NULL, &vcol, NULL);
1352 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001353 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001354 curwin->w_p_list = list_save;
1355 return vcol;
1356}
1357
Bram Moolenaar071d4272004-06-13 20:20:40 +00001358/*
1359 * Get virtual column in virtual mode.
1360 */
1361 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001362getvvcol(
1363 win_T *wp,
1364 pos_T *pos,
1365 colnr_T *start,
1366 colnr_T *cursor,
1367 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001368{
1369 colnr_T col;
1370 colnr_T coladd;
1371 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001373
1374 if (virtual_active())
1375 {
1376 /* For virtual mode, only want one value */
1377 getvcol(wp, pos, &col, NULL, NULL);
1378
1379 coladd = pos->coladd;
1380 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001381 /* Cannot put the cursor on part of a wide character. */
1382 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001383 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001384 {
1385 int c = (*mb_ptr2char)(ptr + pos->col);
1386
1387 if (c != TAB && vim_isprintc(c))
1388 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001389 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001390 if (coladd > endadd) /* past end of line */
1391 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001392 else
1393 coladd = 0;
1394 }
1395 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001396 col += coladd;
1397 if (start != NULL)
1398 *start = col;
1399 if (cursor != NULL)
1400 *cursor = col;
1401 if (end != NULL)
1402 *end = col + endadd;
1403 }
1404 else
1405 getvcol(wp, pos, start, cursor, end);
1406}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001407
Bram Moolenaar071d4272004-06-13 20:20:40 +00001408/*
1409 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1410 * Used for Visual block mode.
1411 */
1412 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001413getvcols(
1414 win_T *wp,
1415 pos_T *pos1,
1416 pos_T *pos2,
1417 colnr_T *left,
1418 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001419{
1420 colnr_T from1, from2, to1, to2;
1421
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001422 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001423 {
1424 getvvcol(wp, pos1, &from1, NULL, &to1);
1425 getvvcol(wp, pos2, &from2, NULL, &to2);
1426 }
1427 else
1428 {
1429 getvvcol(wp, pos2, &from1, NULL, &to1);
1430 getvvcol(wp, pos1, &from2, NULL, &to2);
1431 }
1432 if (from2 < from1)
1433 *left = from2;
1434 else
1435 *left = from1;
1436 if (to2 > to1)
1437 {
1438 if (*p_sel == 'e' && from2 - 1 >= to1)
1439 *right = from2 - 1;
1440 else
1441 *right = to2;
1442 }
1443 else
1444 *right = to1;
1445}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001446
1447/*
1448 * skipwhite: skip over ' ' and '\t'.
1449 */
1450 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001451skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001452{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001453 char_u *p = q;
1454
Bram Moolenaar1c465442017-03-12 20:10:05 +01001455 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001456 ++p;
1457 return p;
1458}
1459
1460/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001461 * getwhitecols: return the number of whitespace
1462 * columns (bytes) at the start of a given line
1463 */
1464 int
1465getwhitecols_curline()
1466{
1467 return getwhitecols(ml_get_curline());
1468}
1469
1470 int
1471getwhitecols(char_u *p)
1472{
1473 return skipwhite(p) - p;
1474}
1475
1476/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001477 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001478 */
1479 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001480skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001481{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001482 char_u *p = q;
1483
Bram Moolenaar071d4272004-06-13 20:20:40 +00001484 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1485 ++p;
1486 return p;
1487}
1488
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001489#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001490/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001491 * skip over binary digits
1492 */
1493 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001494skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001495{
1496 char_u *p = q;
1497
1498 while (vim_isbdigit(*p)) /* skip to next non-digit */
1499 ++p;
1500 return p;
1501}
1502
1503/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001504 * skip over digits and hex characters
1505 */
1506 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001507skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001508{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001509 char_u *p = q;
1510
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001511 while (vim_isxdigit(*p)) /* skip to next non-digit */
1512 ++p;
1513 return p;
1514}
1515#endif
1516
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001517/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001518 * skip to bin digit (or NUL after the string)
1519 */
1520 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001521skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001522{
1523 char_u *p = q;
1524
1525 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1526 ++p;
1527 return p;
1528}
1529
1530/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001531 * skip to digit (or NUL after the string)
1532 */
1533 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001534skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001535{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001536 char_u *p = q;
1537
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001538 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1539 ++p;
1540 return p;
1541}
1542
1543/*
1544 * skip to hex character (or NUL after the string)
1545 */
1546 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001547skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001548{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001549 char_u *p = q;
1550
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001551 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1552 ++p;
1553 return p;
1554}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001555
Bram Moolenaar071d4272004-06-13 20:20:40 +00001556/*
1557 * Variant of isdigit() that can handle characters > 0x100.
1558 * We don't use isdigit() here, because on some systems it also considers
1559 * superscript 1 to be a digit.
1560 * Use the VIM_ISDIGIT() macro for simple arguments.
1561 */
1562 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001563vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001564{
1565 return (c >= '0' && c <= '9');
1566}
1567
1568/*
1569 * Variant of isxdigit() that can handle characters > 0x100.
1570 * We don't use isxdigit() here, because on some systems it also considers
1571 * superscript 1 to be a digit.
1572 */
1573 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001574vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001575{
1576 return (c >= '0' && c <= '9')
1577 || (c >= 'a' && c <= 'f')
1578 || (c >= 'A' && c <= 'F');
1579}
1580
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001581/*
1582 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1583 * characters > 0x100.
1584 */
1585 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001586vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001587{
1588 return (c == '0' || c == '1');
1589}
1590
Bram Moolenaar78622822005-08-23 21:00:13 +00001591/*
1592 * Vim's own character class functions. These exist because many library
1593 * islower()/toupper() etc. do not work properly: they crash when used with
1594 * invalid values or can't handle latin1 when the locale is C.
1595 * Speed is most important here.
1596 */
1597#define LATIN1LOWER 'l'
1598#define LATIN1UPPER 'U'
1599
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001600static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001601static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1602static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001603
1604 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001605vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001606{
1607 if (c <= '@')
1608 return FALSE;
1609 if (c >= 0x80)
1610 {
1611 if (enc_utf8)
1612 return utf_islower(c);
1613 if (c >= 0x100)
1614 {
1615#ifdef HAVE_ISWLOWER
1616 if (has_mbyte)
1617 return iswlower(c);
1618#endif
1619 /* islower() can't handle these chars and may crash */
1620 return FALSE;
1621 }
1622 if (enc_latin1like)
1623 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1624 }
1625 return islower(c);
1626}
1627
1628 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001629vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001630{
1631 if (c <= '@')
1632 return FALSE;
1633 if (c >= 0x80)
1634 {
1635 if (enc_utf8)
1636 return utf_isupper(c);
1637 if (c >= 0x100)
1638 {
1639#ifdef HAVE_ISWUPPER
1640 if (has_mbyte)
1641 return iswupper(c);
1642#endif
1643 /* islower() can't handle these chars and may crash */
1644 return FALSE;
1645 }
1646 if (enc_latin1like)
1647 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1648 }
1649 return isupper(c);
1650}
1651
1652 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001653vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001654{
1655 if (c <= '@')
1656 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001657 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001658 {
1659 if (enc_utf8)
1660 return utf_toupper(c);
1661 if (c >= 0x100)
1662 {
1663#ifdef HAVE_TOWUPPER
1664 if (has_mbyte)
1665 return towupper(c);
1666#endif
1667 /* toupper() can't handle these chars and may crash */
1668 return c;
1669 }
1670 if (enc_latin1like)
1671 return latin1upper[c];
1672 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001673 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1674 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001675 return TOUPPER_LOC(c);
1676}
1677
1678 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001679vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001680{
1681 if (c <= '@')
1682 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001683 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001684 {
1685 if (enc_utf8)
1686 return utf_tolower(c);
1687 if (c >= 0x100)
1688 {
1689#ifdef HAVE_TOWLOWER
1690 if (has_mbyte)
1691 return towlower(c);
1692#endif
1693 /* tolower() can't handle these chars and may crash */
1694 return c;
1695 }
1696 if (enc_latin1like)
1697 return latin1lower[c];
1698 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001699 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1700 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001701 return TOLOWER_LOC(c);
1702}
Bram Moolenaar78622822005-08-23 21:00:13 +00001703
Bram Moolenaar071d4272004-06-13 20:20:40 +00001704/*
1705 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1706 */
1707 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001708skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001709{
1710 while (*p != ' ' && *p != '\t' && *p != NUL)
1711 ++p;
1712 return p;
1713}
1714
Bram Moolenaar071d4272004-06-13 20:20:40 +00001715/*
1716 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1717 */
1718 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001719skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001720{
1721 while (*p != ' ' && *p != '\t' && *p != NUL)
1722 {
1723 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1724 ++p;
1725 ++p;
1726 }
1727 return p;
1728}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001729
1730/*
1731 * Getdigits: Get a number from a string and skip over it.
1732 * Note: the argument is a pointer to a char_u pointer!
1733 */
1734 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001735getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001736{
1737 char_u *p;
1738 long retval;
1739
1740 p = *pp;
1741 retval = atol((char *)p);
1742 if (*p == '-') /* skip negative sign */
1743 ++p;
1744 p = skipdigits(p); /* skip to next non-digit */
1745 *pp = p;
1746 return retval;
1747}
1748
1749/*
1750 * Return TRUE if "lbuf" is empty or only contains blanks.
1751 */
1752 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001753vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001754{
1755 char_u *p;
1756
1757 p = skipwhite(lbuf);
1758 return (*p == NUL || *p == '\r' || *p == '\n');
1759}
1760
1761/*
1762 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001763 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1764 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001765 * 0 decimal
1766 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001767 * 'B' bin
1768 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001769 * 'X' hex
1770 * 'x' hex
1771 * If "len" is not NULL, the length of the number in characters is returned.
1772 * If "nptr" is not NULL, the signed result is returned in it.
1773 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001774 * If "what" contains STR2NR_BIN recognize binary numbers
1775 * If "what" contains STR2NR_OCT recognize octal numbers
1776 * If "what" contains STR2NR_HEX recognize hex numbers
1777 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaarce157752017-10-28 16:07:33 +02001778 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001779 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001780 */
1781 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001782vim_str2nr(
1783 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001784 int *prep, // return: type of number 0 = decimal, 'x'
1785 // or 'X' is hex, '0' = octal, 'b' or 'B'
1786 // is bin
1787 int *len, // return: detected length of number
1788 int what, // what numbers to recognize
1789 varnumber_T *nptr, // return: signed result
1790 uvarnumber_T *unptr, // return: unsigned result
1791 int maxlen, // max length of string to check
1792 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001793{
1794 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001795 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001796 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001797 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001798 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001799
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001800 if (len != NULL)
1801 *len = 0;
1802
Bram Moolenaar071d4272004-06-13 20:20:40 +00001803 if (ptr[0] == '-')
1804 {
1805 negative = TRUE;
1806 ++ptr;
1807 }
1808
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001809 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001810 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1811 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001812 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001813 pre = ptr[1];
1814 if ((what & STR2NR_HEX)
1815 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1816 && (maxlen == 0 || maxlen > 2))
1817 /* hexadecimal */
1818 ptr += 2;
1819 else if ((what & STR2NR_BIN)
1820 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1821 && (maxlen == 0 || maxlen > 2))
1822 /* binary */
1823 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001824 else
1825 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001826 /* decimal or octal, default is decimal */
1827 pre = 0;
1828 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001829 {
1830 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001831 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001832 {
1833 if (ptr[n] > '7')
1834 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001835 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001836 break;
1837 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001838 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001839 }
1840 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841 }
1842 }
1843
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001844 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001845 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001846 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1847 {
1848 /* bin */
1849 if (pre != 0)
1850 n += 2; /* skip over "0b" */
1851 while ('0' <= *ptr && *ptr <= '1')
1852 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001853 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001854 if (un <= UVARNUM_MAX / 2)
1855 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001856 else
1857 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001858 ++ptr;
1859 if (n++ == maxlen)
1860 break;
1861 }
1862 }
1863 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001864 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001865 /* octal */
1866 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001867 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001868 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001869 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001870 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1871 else
1872 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001873 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001874 if (n++ == maxlen)
1875 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001876 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001877 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001878 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001879 {
1880 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001881 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001882 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001883 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001884 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001885 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001886 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001887 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1888 else
1889 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001890 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001891 if (n++ == maxlen)
1892 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001893 }
1894 }
1895 else
1896 {
1897 /* decimal */
1898 while (VIM_ISDIGIT(*ptr))
1899 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001900 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1901
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001902 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001903 if (un < UVARNUM_MAX / 10
1904 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1905 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001906 else
1907 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001908 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001909 if (n++ == maxlen)
1910 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001911 }
1912 }
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001913 // Check for an alpha-numeric character immediately following, that is
1914 // most likely a typo.
1915 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1916 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001917
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001918 if (prep != NULL)
1919 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001920 if (len != NULL)
1921 *len = (int)(ptr - start);
1922 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001923 {
1924 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001925 {
1926 /* avoid ubsan error for overflow */
1927 if (un > VARNUM_MAX)
1928 *nptr = VARNUM_MIN;
1929 else
1930 *nptr = -(varnumber_T)un;
1931 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001932 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001933 {
1934 if (un > VARNUM_MAX)
1935 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001936 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001937 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001938 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001939 if (unptr != NULL)
1940 *unptr = un;
1941}
1942
1943/*
1944 * Return the value of a single hex character.
1945 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1946 */
1947 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001948hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001949{
1950 if (c >= 'a' && c <= 'f')
1951 return c - 'a' + 10;
1952 if (c >= 'A' && c <= 'F')
1953 return c - 'A' + 10;
1954 return c - '0';
1955}
1956
Bram Moolenaar4033c552017-09-16 20:54:51 +02001957#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001958/*
1959 * Convert two hex characters to a byte.
1960 * Return -1 if one of the characters is not hex.
1961 */
1962 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001963hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001964{
1965 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1966 return -1;
1967 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1968}
1969#endif
1970
1971/*
1972 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001973 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00001974 * backslash is not a normal file name character.
1975 * '$' is a valid file name character, we don't remove the backslash before
1976 * it. This means it is not possible to use an environment variable after a
1977 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1978 * Although "\ name" is valid, the backslash in "Program\ files" must be
1979 * removed. Assume a file name doesn't start with a space.
1980 * For multi-byte names, never remove a backslash before a non-ascii
1981 * character, assume that all multi-byte characters are valid file name
1982 * characters.
1983 */
1984 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001985rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001986{
1987#ifdef BACKSLASH_IN_FILENAME
1988 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001989 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00001990 && (str[1] == ' '
1991 || (str[1] != NUL
1992 && str[1] != '*'
1993 && str[1] != '?'
1994 && !vim_isfilec(str[1]))));
1995#else
1996 return (str[0] == '\\' && str[1] != NUL);
1997#endif
1998}
1999
2000/*
2001 * Halve the number of backslashes in a file name argument.
2002 * For MS-DOS we only do this if the character after the backslash
2003 * is not a normal file character.
2004 */
2005 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002006backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002007{
2008 for ( ; *p; ++p)
2009 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002010 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002011}
2012
2013/*
2014 * backslash_halve() plus save the result in allocated memory.
2015 */
2016 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002017backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002018{
2019 char_u *res;
2020
2021 res = vim_strsave(p);
2022 if (res == NULL)
2023 return p;
2024 backslash_halve(res);
2025 return res;
2026}
2027
2028#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2029/*
2030 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2031 * The first 64 entries have been added to map control characters defined in
2032 * ascii.h
2033 */
2034static char_u ebcdic2ascii_tab[256] =
2035{
2036 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2037 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2038 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2039 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2040 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2041 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2042 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2043 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2044 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2045 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2046 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2047 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2048 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2049 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2050 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2051 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2052 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2053 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2054 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2055 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2056 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2057 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2058 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2059 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2060 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2061 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2062 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2063 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2064 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2065 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2066 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2067 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2068};
2069
2070/*
2071 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2072 * wanting 7-bit ASCII characters out the other end.
2073 */
2074 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002075ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002076{
2077 int i;
2078
2079 for (i = 0; i < len; i++)
2080 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2081}
2082#endif