blob: 09e78203ffe3ed33c25ef2c6e7103bc6e2b9f068 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
13# include <wchar.h> /* for towupper() and towlower() */
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
21/* b_chartab[] is an array of 32 bytes, each bit representing one of the
22 * characters 0-255. */
23#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010027/* table used below, see init_chartab() for an explanation */
28static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
33#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
34#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
35#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
36#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
37
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
75 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000105 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000108 /* euc-jp characters starting with 0x8e are single width */
109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000111 /* other double-byte chars can be printable AND double-width */
112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000115 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaar071d4272004-06-13 20:20:40 +0000119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
130 vim_memset(buf->b_chartab, 0, (size_t)32);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
134 /* double-byte characters are probably word characters */
135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
147 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 * options Each option is a list of characters, character numbers or
149 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
150 */
151 for (i = global ? 0 : 3; i <= 3; ++i)
152 {
153 if (i == 0)
154 p = p_isi; /* first round: 'isident' */
155 else if (i == 1)
156 p = p_isp; /* second round: 'isprint' */
157 else if (i == 2)
158 p = p_isf; /* third round: 'isfname' */
159 else /* i == 3 */
160 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
161
162 while (*p)
163 {
164 tilde = FALSE;
165 do_isalpha = FALSE;
166 if (*p == '^' && p[1] != NUL)
167 {
168 tilde = TRUE;
169 ++p;
170 }
171 if (VIM_ISDIGIT(*p))
172 c = getdigits(&p);
173 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000174 if (has_mbyte)
175 c = mb_ptr2char_adv(&p);
176 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000185 if (has_mbyte)
186 c2 = mb_ptr2char_adv(&p);
187 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000188 c2 = *p++;
189 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000190 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000191 || !(*p == NUL || *p == ','))
192 return FAIL;
193
194 if (c2 == -1) /* not a range */
195 {
196 /*
197 * A single '@' (not "@-@"):
198 * Decide on letters being ID/printable/keyword chars with
199 * standard function isalpha(). This takes care of locale for
200 * single-byte characters).
201 */
202 if (c == '@')
203 {
204 do_isalpha = TRUE;
205 c = 1;
206 c2 = 255;
207 }
208 else
209 c2 = c;
210 }
211 while (c <= c2)
212 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000213 /* Use the MB_ functions here, because isalpha() doesn't
214 * work properly when 'encoding' is "latin1" and the locale is
215 * "C". */
Bram Moolenaar14184a32019-02-16 15:10:30 +0100216 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000217 {
218 if (i == 0) /* (re)set ID flag */
219 {
220 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100221 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100223 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000224 }
225 else if (i == 1) /* (re)set printable */
226 {
227 if ((c < ' '
228#ifndef EBCDIC
229 || c > '~'
230#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100231 // For double-byte we keep the cell width, so
232 // that we can detect it from the first byte.
233 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000234 {
235 if (tilde)
236 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100237 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100239 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000240 }
241 else
242 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100243 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
244 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000245 }
246 }
247 }
248 else if (i == 2) /* (re)set fname flag */
249 {
250 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100251 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000252 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100253 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 }
255 else /* i == 3 */ /* (re)set keyword flag */
256 {
257 if (tilde)
258 RESET_CHARTAB(buf, c);
259 else
260 SET_CHARTAB(buf, c);
261 }
262 }
263 ++c;
264 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100265
266 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000267 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100268 if (c == ',' && *p == NUL)
269 /* Trailing comma is not allowed. */
270 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000271 }
272 }
273 chartab_initialized = TRUE;
274 return OK;
275}
276
277/*
278 * Translate any special characters in buf[bufsize] in-place.
279 * The result is a string with only printable characters, but if there is not
280 * enough room, not all characters will be translated.
281 */
282 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100283trans_characters(
284 char_u *buf,
285 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000286{
287 int len; /* length of string needing translation */
288 int room; /* room in buffer after string */
289 char_u *trs; /* translated character */
290 int trs_len; /* length of trs[] */
291
292 len = (int)STRLEN(buf);
293 room = bufsize - len;
294 while (*buf != 0)
295 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000297 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000298 len -= trs_len;
299 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 {
301 trs = transchar_byte(*buf);
302 trs_len = (int)STRLEN(trs);
303 if (trs_len > 1)
304 {
305 room -= trs_len - 1;
306 if (room <= 0)
307 return;
308 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
309 }
310 mch_memmove(buf, trs, (size_t)trs_len);
311 --len;
312 }
313 buf += trs_len;
314 }
315}
316
Bram Moolenaar071d4272004-06-13 20:20:40 +0000317/*
318 * Translate a string into allocated memory, replacing special chars with
319 * printable chars. Returns NULL when out of memory.
320 */
321 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100322transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323{
324 char_u *res;
325 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 int l, len, c;
327 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328
Bram Moolenaar071d4272004-06-13 20:20:40 +0000329 if (has_mbyte)
330 {
331 /* Compute the length of the result, taking account of unprintable
332 * multi-byte characters. */
333 len = 0;
334 p = s;
335 while (*p != NUL)
336 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000337 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338 {
339 c = (*mb_ptr2char)(p);
340 p += l;
341 if (vim_isprintc(c))
342 len += l;
343 else
344 {
345 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000346 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000347 }
348 }
349 else
350 {
351 l = byte2cells(*p++);
352 if (l > 0)
353 len += l;
354 else
355 len += 4; /* illegal byte sequence */
356 }
357 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200358 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 }
360 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200361 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000362 if (res != NULL)
363 {
364 *res = NUL;
365 p = s;
366 while (*p != NUL)
367 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000368 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000369 {
370 c = (*mb_ptr2char)(p);
371 if (vim_isprintc(c))
372 STRNCAT(res, p, l); /* append printable multi-byte char */
373 else
374 transchar_hex(res + STRLEN(res), c);
375 p += l;
376 }
377 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000378 STRCAT(res, transchar_byte(*p++));
379 }
380 }
381 return res;
382}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000383
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000385 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
386 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000387 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
388 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000389 */
390 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100391str_foldcase(
392 char_u *str,
393 int orglen,
394 char_u *buf,
395 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000396{
397 garray_T ga;
398 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000399 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000400
401#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
402#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000403#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
404#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000405
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000406 /* Copy "str" into "buf" or allocated memory, unmodified. */
407 if (buf == NULL)
408 {
409 ga_init2(&ga, 1, 10);
410 if (ga_grow(&ga, len + 1) == FAIL)
411 return NULL;
412 mch_memmove(ga.ga_data, str, (size_t)len);
413 ga.ga_len = len;
414 }
415 else
416 {
417 if (len >= buflen) /* Ugly! */
418 len = buflen - 1;
419 mch_memmove(buf, str, (size_t)len);
420 }
421 if (buf == NULL)
422 GA_CHAR(len) = NUL;
423 else
424 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000425
426 /* Make each character lower case. */
427 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000428 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000429 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000430 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000431 {
432 if (enc_utf8)
433 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000434 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100435 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000436 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000437
Bram Moolenaarb9839212008-06-28 11:03:50 +0000438 /* Only replace the character when it is not an invalid
439 * sequence (ASCII character or more than one byte) and
440 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100441 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100443 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000444
445 /* If the byte length changes need to shift the following
446 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100447 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100449 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000450 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100451 if (buf == NULL
452 ? ga_grow(&ga, nlen - olen + 1) == FAIL
453 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000454 {
455 /* out of memory, keep old char */
456 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100457 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000459 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100460 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000462 if (buf == NULL)
463 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100464 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
465 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000466 }
467 else
468 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100469 STRMOVE(buf + i + nlen, buf + i + olen);
470 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000471 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000472 }
473 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000474 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 }
476 }
477 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000478 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000479 }
480 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000481 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000482 if (buf == NULL)
483 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
484 else
485 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000486 ++i;
487 }
488 }
489
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000490 if (buf == NULL)
491 return (char_u *)ga.ga_data;
492 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000493}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494
495/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100496 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000497 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100498 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000499 * Does NOT work for multi-byte characters, c must be <= 255.
500 * Also doesn't work for the first byte of a multi-byte, "c" must be a
501 * character!
502 */
503static char_u transchar_buf[7];
504
505 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100506transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000507{
508 int i;
509
510 i = 0;
511 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
512 {
513 transchar_buf[0] = '~';
514 transchar_buf[1] = '@';
515 i = 2;
516 c = K_SECOND(c);
517 }
518
519 if ((!chartab_initialized && (
520#ifdef EBCDIC
521 (c >= 64 && c < 255)
522#else
523 (c >= ' ' && c <= '~')
524#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000525 )) || (c < 256 && vim_isprintc_strict(c)))
526 {
527 /* printable character */
528 transchar_buf[i] = c;
529 transchar_buf[i + 1] = NUL;
530 }
531 else
532 transchar_nonprint(transchar_buf + i, c);
533 return transchar_buf;
534}
535
Bram Moolenaar071d4272004-06-13 20:20:40 +0000536/*
537 * Like transchar(), but called with a byte instead of a character. Checks
538 * for an illegal UTF-8 byte.
539 */
540 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100541transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000542{
543 if (enc_utf8 && c >= 0x80)
544 {
545 transchar_nonprint(transchar_buf, c);
546 return transchar_buf;
547 }
548 return transchar(c);
549}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550
551/*
552 * Convert non-printable character to two or more printable characters in
553 * "buf[]". "buf" needs to be able to hold five bytes.
554 * Does NOT work for multi-byte characters, c must be <= 255.
555 */
556 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100557transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000558{
559 if (c == NL)
560 c = NUL; /* we use newline in place of a NUL */
561 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
562 c = NL; /* we use CR in place of NL in this case */
563
564 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
565 transchar_hex(buf, c);
566
567#ifdef EBCDIC
568 /* For EBCDIC only the characters 0-63 and 255 are not printable */
569 else if (CtrlChar(c) != 0 || c == DEL)
570#else
571 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
572#endif
573 {
574 buf[0] = '^';
575#ifdef EBCDIC
576 if (c == DEL)
577 buf[1] = '?'; /* DEL displayed as ^? */
578 else
579 buf[1] = CtrlChar(c);
580#else
581 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
582#endif
583
584 buf[2] = NUL;
585 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000586 else if (enc_utf8 && c >= 0x80)
587 {
588 transchar_hex(buf, c);
589 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590#ifndef EBCDIC
591 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
592 {
593 buf[0] = '|';
594 buf[1] = c - 0x80;
595 buf[2] = NUL;
596 }
597#else
598 else if (c < 64)
599 {
600 buf[0] = '~';
601 buf[1] = MetaChar(c);
602 buf[2] = NUL;
603 }
604#endif
605 else /* 0x80 - 0x9f and 0xff */
606 {
607 /*
608 * TODO: EBCDIC I don't know what to do with this chars, so I display
609 * them as '~?' for now
610 */
611 buf[0] = '~';
612#ifdef EBCDIC
613 buf[1] = '?'; /* 0xff displayed as ~? */
614#else
615 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
616#endif
617 buf[2] = NUL;
618 }
619}
620
621 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100622transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000623{
624 int i = 0;
625
626 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000627 if (c > 255)
628 {
629 buf[++i] = nr2hex((unsigned)c >> 12);
630 buf[++i] = nr2hex((unsigned)c >> 8);
631 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000632 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000633 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000634 buf[++i] = '>';
635 buf[++i] = NUL;
636}
637
638/*
639 * Convert the lower 4 bits of byte "c" to its hex character.
640 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
641 * function key 1.
642 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000643 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100644nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000645{
646 if ((c & 0xf) <= 9)
647 return (c & 0xf) + '0';
648 return (c & 0xf) - 10 + 'a';
649}
650
651/*
652 * Return number of display cells occupied by byte "b".
653 * Caller must make sure 0 <= b <= 255.
654 * For multi-byte mode "b" must be the first byte of a character.
655 * A TAB is counted as two cells: "^I".
656 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
657 * cells depends on further bytes.
658 */
659 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100660byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000661{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000662 if (enc_utf8 && b >= 0x80)
663 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100664 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665}
666
667/*
668 * Return number of display cells occupied by character "c".
669 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
670 * A TAB is counted as two cells: "^I" or four: "<09>".
671 */
672 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100673char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000674{
675 if (IS_SPECIAL(c))
676 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677 if (c >= 0x80)
678 {
679 /* UTF-8: above 0x80 need to check the value */
680 if (enc_utf8)
681 return utf_char2cells(c);
682 /* DBCS: double-byte means double-width, except for euc-jp with first
683 * byte 0x8e */
684 if (enc_dbcs != 0 && c >= 0x100)
685 {
686 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
687 return 1;
688 return 2;
689 }
690 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100691 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000692}
693
694/*
695 * Return number of display cells occupied by character at "*p".
696 * A TAB is counted as two cells: "^I" or four: "<09>".
697 */
698 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100699ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000700{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000701 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
702 if (enc_utf8 && *p >= 0x80)
703 return utf_ptr2cells(p);
704 /* For DBCS we can tell the cell count from the first byte. */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100705 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000706}
707
708/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100709 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000710 * counting TABs as two characters: "^I".
711 */
712 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100713vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000714{
715 return vim_strnsize(s, (int)MAXCOL);
716}
717
718/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100719 * Return the number of character cells string "s[len]" will take on the
720 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000721 */
722 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100723vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724{
725 int size = 0;
726
727 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000728 if (has_mbyte)
729 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000730 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731
732 size += ptr2cells(s);
733 s += l;
734 len -= l - 1;
735 }
736 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000737 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100738
Bram Moolenaar071d4272004-06-13 20:20:40 +0000739 return size;
740}
741
742/*
743 * Return the number of characters 'c' will take on the screen, taking
744 * into account the size of a tab.
745 * Use a define to make it fast, this is used very often!!!
746 * Also see getvcol() below.
747 */
748
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200749#ifdef FEAT_VARTABS
750# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
751 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
752 { \
753 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
754 } \
755 else \
756 return ptr2cells(p);
757#else
758# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000759 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
760 { \
761 int ts; \
762 ts = (buf)->b_p_ts; \
763 return (int)(ts - (col % ts)); \
764 } \
765 else \
766 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200767#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000768
Bram Moolenaar071d4272004-06-13 20:20:40 +0000769 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100770chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000771{
772 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
773}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774
775#ifdef FEAT_LINEBREAK
776 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100777win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000778{
779 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
780}
781#endif
782
783/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200784 * Return the number of characters the string 's' will take on the screen,
785 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000786 */
787 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100788linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789{
Bram Moolenaardc536092010-07-18 15:45:49 +0200790 return linetabsize_col(0, s);
791}
792
793/*
794 * Like linetabsize(), but starting at column "startcol".
795 */
796 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100797linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200798{
799 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200800 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000801
802 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200803 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000804 return (int)col;
805}
806
807/*
808 * Like linetabsize(), but for a given window instead of the current one.
809 */
810 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100811win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000812{
813 colnr_T col = 0;
814 char_u *s;
815
Bram Moolenaar597a4222014-06-25 14:39:50 +0200816 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100817 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200818 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000819 return (int)col;
820}
821
822/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000823 * Return TRUE if 'c' is a normal identifier character:
824 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 */
826 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100827vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100829 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830}
831
832/*
833 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100834 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000835 * For multi-byte characters mb_get_class() is used (builtin rules).
836 */
837 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100838vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000839{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100840 return vim_iswordc_buf(c, curbuf);
841}
842
843 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100844vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100845{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 if (c >= 0x100)
847 {
848 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000849 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000850 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100851 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100852 return FALSE;
853 }
854 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855}
856
857/*
858 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
859 */
860 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100861vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000862{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100863 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000864}
865
Bram Moolenaar071d4272004-06-13 20:20:40 +0000866 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100867vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000868{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100869 int c = *p;
870
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100871 if (has_mbyte && MB_BYTE2LEN(c) > 1)
872 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100873 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000874}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875
876/*
877 * return TRUE if 'c' is a valid file-name character
878 * Assume characters above 0x100 are valid (multi-byte).
879 */
880 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100881vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000882{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100883 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000884}
885
886/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000887 * return TRUE if 'c' is a valid file-name character or a wildcard character
888 * Assume characters above 0x100 are valid (multi-byte).
889 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
890 * returns false.
891 */
892 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100893vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000894{
895 char_u buf[2];
896
897 buf[0] = (char_u)c;
898 buf[1] = NUL;
899 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
900}
901
902/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200903 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000904 * Assume characters above 0x100 are printable (multi-byte), except for
905 * Unicode.
906 */
907 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100908vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000909{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000910 if (enc_utf8 && c >= 0x100)
911 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100912 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000913}
914
915/*
916 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
917 * byte of a double-byte character.
918 */
919 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100920vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000921{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000922 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
923 return FALSE;
924 if (enc_utf8 && c >= 0x100)
925 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100926 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000927}
928
929/*
930 * like chartabsize(), but also check for line breaks on the screen
931 */
932 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100933lbr_chartabsize(
934 char_u *line UNUSED, /* start of the line */
935 unsigned char *s,
936 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000937{
938#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +0200939 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000940 {
941#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000942 if (curwin->w_p_wrap)
943 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000944 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
945#ifdef FEAT_LINEBREAK
946 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200947 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000948#endif
949}
950
951/*
952 * Call lbr_chartabsize() and advance the pointer.
953 */
954 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100955lbr_chartabsize_adv(
956 char_u *line, /* start of the line */
957 char_u **s,
958 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000959{
960 int retval;
961
Bram Moolenaar597a4222014-06-25 14:39:50 +0200962 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100963 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000964 return retval;
965}
966
967/*
968 * This function is used very often, keep it fast!!!!
969 *
970 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
971 * string at start of line. Warning: *headp is only set if it's a non-zero
972 * value, init to 0 before calling.
973 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000974 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100975win_lbr_chartabsize(
976 win_T *wp,
977 char_u *line UNUSED, /* start of the line */
978 char_u *s,
979 colnr_T col,
980 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000981{
982#ifdef FEAT_LINEBREAK
983 int c;
984 int size;
985 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +0200986 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000987 colnr_T colmax;
988 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000989 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 int numberextra;
991 char_u *ps;
992 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000993 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000994
995 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +0200996 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997 */
Bram Moolenaar597a4222014-06-25 14:39:50 +0200998 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000999#endif
1000 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001 if (wp->w_p_wrap)
1002 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001003 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1004 }
1005
1006#ifdef FEAT_LINEBREAK
1007 /*
1008 * First get normal size, without 'linebreak'
1009 */
1010 size = win_chartabsize(wp, s, col);
1011 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001012 if (tab_corr)
1013 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001014
1015 /*
1016 * If 'linebreak' set check at a blank before a non-blank if the line
1017 * needs a break here
1018 */
1019 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001020 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001021 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001022 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001023 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001024 {
1025 /*
1026 * Count all characters from first non-blank after a blank up to next
1027 * non-blank after a blank.
1028 */
1029 numberextra = win_col_off(wp);
1030 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001031 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001032 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001033 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001034 colmax += col_adj;
1035 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001036 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001037 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001038 }
1039
Bram Moolenaar071d4272004-06-13 20:20:40 +00001040 for (;;)
1041 {
1042 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001043 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001044 c = *s;
1045 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001046 && (VIM_ISBREAK(c)
1047 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001048 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001049 break;
1050
1051 col2 += win_chartabsize(wp, s, col2);
1052 if (col2 >= colmax) /* doesn't fit */
1053 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001054 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055 break;
1056 }
1057 }
1058 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1060 && wp->w_p_wrap && in_win_border(wp, col))
1061 {
1062 ++size; /* Count the ">" in the last column. */
1063 mb_added = 1;
1064 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001065
1066 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001067 * May have to add something for 'breakindent' and/or 'showbreak'
1068 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001069 * Set *headp to the size of what we add.
1070 */
1071 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001072 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001073 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001074 colnr_T sbrlen = 0;
1075 int numberwidth = win_col_off(wp);
1076
1077 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001078 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001079 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001081 col -= wp->w_width;
1082 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001083 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001084 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001085 if (*p_sbr != NUL)
1086 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001087 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001088 if (col >= sbrlen)
1089 col -= sbrlen;
1090 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001091 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001092 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001093 else if (col > 0 && numberextra > 0)
1094 col += numberwidth - win_col_off2(wp);
1095
1096 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001097 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001098 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001099 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001100 added = 0;
1101 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001102 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001103 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001104 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001105 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001106 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001107 int prev_width = col
1108 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001109
1110 if (width <= 0)
1111 width = (colnr_T)1;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001112 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1113 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001114 // wrapped, add another length of 'sbr'
Bram Moolenaard574ea22015-01-14 19:35:14 +01001115 added += vim_strsize(p_sbr);
1116 }
1117 else
1118 added += vim_strsize(p_sbr);
1119 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001120 if (wp->w_p_bri)
1121 added += get_breakindent_win(wp, line);
1122
Bram Moolenaar95765082014-08-24 21:19:25 +02001123 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001124 if (col != 0)
1125 added = 0;
1126 }
1127 }
1128 if (headp != NULL)
1129 *headp = added + mb_added;
1130 return size;
1131#endif
1132}
1133
Bram Moolenaar071d4272004-06-13 20:20:40 +00001134/*
1135 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1136 * 'wrap' is on. This means we need to check for a double-byte character that
1137 * doesn't fit at the end of the screen line.
1138 */
1139 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001140win_nolbr_chartabsize(
1141 win_T *wp,
1142 char_u *s,
1143 colnr_T col,
1144 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001145{
1146 int n;
1147
1148 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1149 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001150# ifdef FEAT_VARTABS
1151 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1152 wp->w_buffer->b_p_vts_array);
1153# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001154 n = wp->w_buffer->b_p_ts;
1155 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001156# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001157 }
1158 n = ptr2cells(s);
1159 /* Add one cell for a double-width character in the last column of the
1160 * window, displayed with a ">". */
1161 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1162 {
1163 if (headp != NULL)
1164 *headp = 1;
1165 return 3;
1166 }
1167 return n;
1168}
1169
1170/*
1171 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1172 * "wp".
1173 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001174 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001175in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001176{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001177 int width1; /* width of first line (after line number) */
1178 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001179
Bram Moolenaar071d4272004-06-13 20:20:40 +00001180 if (wp->w_width == 0) /* there is no border */
1181 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001182 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001183 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001184 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001185 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001186 return TRUE;
1187 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001188 if (width2 <= 0)
1189 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001190 return ((vcol - width1) % width2 == width2 - 1);
1191}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192
1193/*
1194 * Get virtual column number of pos.
1195 * start: on the first position of this character (TAB, ctrl)
1196 * cursor: where the cursor is on this character (first char, except for TAB)
1197 * end: on the last position of this character (TAB, ctrl)
1198 *
1199 * This is used very often, keep it fast!
1200 */
1201 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001202getvcol(
1203 win_T *wp,
1204 pos_T *pos,
1205 colnr_T *start,
1206 colnr_T *cursor,
1207 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001208{
1209 colnr_T vcol;
1210 char_u *ptr; /* points to current char */
1211 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001212 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001213 int incr;
1214 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001215#ifdef FEAT_VARTABS
1216 int *vts = wp->w_buffer->b_p_vts_array;
1217#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001218 int ts = wp->w_buffer->b_p_ts;
1219 int c;
1220
1221 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001222 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001223 if (pos->col == MAXCOL)
1224 posptr = NULL; /* continue until the NUL */
1225 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001226 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001227 /* Special check for an empty line, which can happen on exit, when
1228 * ml_get_buf() always returns an empty string. */
1229 if (*ptr == NUL)
1230 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001231 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001232 if (has_mbyte)
1233 /* always start on the first byte */
1234 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001235 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001236
1237 /*
1238 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001239 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1240 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001241 * Also use this when 'list' is set but tabs take their normal size.
1242 */
1243 if ((!wp->w_p_list || lcs_tab1 != NUL)
1244#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001245 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001246#endif
1247 )
1248 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001249 for (;;)
1250 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001251 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001252 c = *ptr;
1253 /* make sure we don't go past the end of the line */
1254 if (c == NUL)
1255 {
1256 incr = 1; /* NUL at end of line only takes one column */
1257 break;
1258 }
1259 /* A tab gets expanded, depending on the current column */
1260 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001261#ifdef FEAT_VARTABS
1262 incr = tabstop_padding(vcol, ts, vts);
1263#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001264 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001265#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001266 else
1267 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001268 if (has_mbyte)
1269 {
1270 /* For utf-8, if the byte is >= 0x80, need to look at
1271 * further bytes to find the cell width. */
1272 if (enc_utf8 && c >= 0x80)
1273 incr = utf_ptr2cells(ptr);
1274 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001275 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001276
1277 /* If a double-cell char doesn't fit at the end of a line
1278 * it wraps to the next line, it's like this char is three
1279 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001280 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1281 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001282 {
1283 ++incr;
1284 head = 1;
1285 }
1286 }
1287 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001288 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001289 }
1290
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001291 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001292 break;
1293
1294 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001295 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001296 }
1297 }
1298 else
1299 {
1300 for (;;)
1301 {
1302 /* A tab gets expanded, depending on the current column */
1303 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001304 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001305 /* make sure we don't go past the end of the line */
1306 if (*ptr == NUL)
1307 {
1308 incr = 1; /* NUL at end of line only takes one column */
1309 break;
1310 }
1311
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001312 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001313 break;
1314
1315 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001316 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001317 }
1318 }
1319 if (start != NULL)
1320 *start = vcol + head;
1321 if (end != NULL)
1322 *end = vcol + incr - 1;
1323 if (cursor != NULL)
1324 {
1325 if (*ptr == TAB
1326 && (State & NORMAL)
1327 && !wp->w_p_list
1328 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001329 && !(VIsual_active
1330 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001331 )
1332 *cursor = vcol + incr - 1; /* cursor at end */
1333 else
1334 *cursor = vcol + head; /* cursor at start */
1335 }
1336}
1337
1338/*
1339 * Get virtual cursor column in the current window, pretending 'list' is off.
1340 */
1341 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001342getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001343{
1344 int list_save = curwin->w_p_list;
1345 colnr_T vcol;
1346
1347 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001348 if (posp->coladd)
1349 getvvcol(curwin, posp, NULL, &vcol, NULL);
1350 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001351 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001352 curwin->w_p_list = list_save;
1353 return vcol;
1354}
1355
Bram Moolenaar071d4272004-06-13 20:20:40 +00001356/*
1357 * Get virtual column in virtual mode.
1358 */
1359 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001360getvvcol(
1361 win_T *wp,
1362 pos_T *pos,
1363 colnr_T *start,
1364 colnr_T *cursor,
1365 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001366{
1367 colnr_T col;
1368 colnr_T coladd;
1369 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001370 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001371
1372 if (virtual_active())
1373 {
1374 /* For virtual mode, only want one value */
1375 getvcol(wp, pos, &col, NULL, NULL);
1376
1377 coladd = pos->coladd;
1378 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001379 /* Cannot put the cursor on part of a wide character. */
1380 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001381 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001382 {
1383 int c = (*mb_ptr2char)(ptr + pos->col);
1384
1385 if (c != TAB && vim_isprintc(c))
1386 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001387 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001388 if (coladd > endadd) /* past end of line */
1389 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390 else
1391 coladd = 0;
1392 }
1393 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001394 col += coladd;
1395 if (start != NULL)
1396 *start = col;
1397 if (cursor != NULL)
1398 *cursor = col;
1399 if (end != NULL)
1400 *end = col + endadd;
1401 }
1402 else
1403 getvcol(wp, pos, start, cursor, end);
1404}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001405
Bram Moolenaar071d4272004-06-13 20:20:40 +00001406/*
1407 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1408 * Used for Visual block mode.
1409 */
1410 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001411getvcols(
1412 win_T *wp,
1413 pos_T *pos1,
1414 pos_T *pos2,
1415 colnr_T *left,
1416 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001417{
1418 colnr_T from1, from2, to1, to2;
1419
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001420 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001421 {
1422 getvvcol(wp, pos1, &from1, NULL, &to1);
1423 getvvcol(wp, pos2, &from2, NULL, &to2);
1424 }
1425 else
1426 {
1427 getvvcol(wp, pos2, &from1, NULL, &to1);
1428 getvvcol(wp, pos1, &from2, NULL, &to2);
1429 }
1430 if (from2 < from1)
1431 *left = from2;
1432 else
1433 *left = from1;
1434 if (to2 > to1)
1435 {
1436 if (*p_sel == 'e' && from2 - 1 >= to1)
1437 *right = from2 - 1;
1438 else
1439 *right = to2;
1440 }
1441 else
1442 *right = to1;
1443}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001444
1445/*
1446 * skipwhite: skip over ' ' and '\t'.
1447 */
1448 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001449skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001450{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001451 char_u *p = q;
1452
Bram Moolenaar1c465442017-03-12 20:10:05 +01001453 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001454 ++p;
1455 return p;
1456}
1457
1458/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001459 * getwhitecols: return the number of whitespace
1460 * columns (bytes) at the start of a given line
1461 */
1462 int
1463getwhitecols_curline()
1464{
1465 return getwhitecols(ml_get_curline());
1466}
1467
1468 int
1469getwhitecols(char_u *p)
1470{
1471 return skipwhite(p) - p;
1472}
1473
1474/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001475 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001476 */
1477 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001478skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001479{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001480 char_u *p = q;
1481
Bram Moolenaar071d4272004-06-13 20:20:40 +00001482 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1483 ++p;
1484 return p;
1485}
1486
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001487#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001488/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001489 * skip over binary digits
1490 */
1491 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001492skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001493{
1494 char_u *p = q;
1495
1496 while (vim_isbdigit(*p)) /* skip to next non-digit */
1497 ++p;
1498 return p;
1499}
1500
1501/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001502 * skip over digits and hex characters
1503 */
1504 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001505skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001506{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001507 char_u *p = q;
1508
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001509 while (vim_isxdigit(*p)) /* skip to next non-digit */
1510 ++p;
1511 return p;
1512}
1513#endif
1514
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001515/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001516 * skip to bin digit (or NUL after the string)
1517 */
1518 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001519skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001520{
1521 char_u *p = q;
1522
1523 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1524 ++p;
1525 return p;
1526}
1527
1528/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001529 * skip to digit (or NUL after the string)
1530 */
1531 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001532skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001533{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001534 char_u *p = q;
1535
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001536 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1537 ++p;
1538 return p;
1539}
1540
1541/*
1542 * skip to hex character (or NUL after the string)
1543 */
1544 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001545skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001546{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001547 char_u *p = q;
1548
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001549 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1550 ++p;
1551 return p;
1552}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001553
Bram Moolenaar071d4272004-06-13 20:20:40 +00001554/*
1555 * Variant of isdigit() that can handle characters > 0x100.
1556 * We don't use isdigit() here, because on some systems it also considers
1557 * superscript 1 to be a digit.
1558 * Use the VIM_ISDIGIT() macro for simple arguments.
1559 */
1560 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001561vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001562{
1563 return (c >= '0' && c <= '9');
1564}
1565
1566/*
1567 * Variant of isxdigit() that can handle characters > 0x100.
1568 * We don't use isxdigit() here, because on some systems it also considers
1569 * superscript 1 to be a digit.
1570 */
1571 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001572vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001573{
1574 return (c >= '0' && c <= '9')
1575 || (c >= 'a' && c <= 'f')
1576 || (c >= 'A' && c <= 'F');
1577}
1578
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001579/*
1580 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1581 * characters > 0x100.
1582 */
1583 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001584vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001585{
1586 return (c == '0' || c == '1');
1587}
1588
Bram Moolenaar78622822005-08-23 21:00:13 +00001589/*
1590 * Vim's own character class functions. These exist because many library
1591 * islower()/toupper() etc. do not work properly: they crash when used with
1592 * invalid values or can't handle latin1 when the locale is C.
1593 * Speed is most important here.
1594 */
1595#define LATIN1LOWER 'l'
1596#define LATIN1UPPER 'U'
1597
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001598static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001599static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1600static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001601
1602 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001603vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001604{
1605 if (c <= '@')
1606 return FALSE;
1607 if (c >= 0x80)
1608 {
1609 if (enc_utf8)
1610 return utf_islower(c);
1611 if (c >= 0x100)
1612 {
1613#ifdef HAVE_ISWLOWER
1614 if (has_mbyte)
1615 return iswlower(c);
1616#endif
1617 /* islower() can't handle these chars and may crash */
1618 return FALSE;
1619 }
1620 if (enc_latin1like)
1621 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1622 }
1623 return islower(c);
1624}
1625
1626 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001627vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001628{
1629 if (c <= '@')
1630 return FALSE;
1631 if (c >= 0x80)
1632 {
1633 if (enc_utf8)
1634 return utf_isupper(c);
1635 if (c >= 0x100)
1636 {
1637#ifdef HAVE_ISWUPPER
1638 if (has_mbyte)
1639 return iswupper(c);
1640#endif
1641 /* islower() can't handle these chars and may crash */
1642 return FALSE;
1643 }
1644 if (enc_latin1like)
1645 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1646 }
1647 return isupper(c);
1648}
1649
1650 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001651vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001652{
1653 if (c <= '@')
1654 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001655 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001656 {
1657 if (enc_utf8)
1658 return utf_toupper(c);
1659 if (c >= 0x100)
1660 {
1661#ifdef HAVE_TOWUPPER
1662 if (has_mbyte)
1663 return towupper(c);
1664#endif
1665 /* toupper() can't handle these chars and may crash */
1666 return c;
1667 }
1668 if (enc_latin1like)
1669 return latin1upper[c];
1670 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001671 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1672 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001673 return TOUPPER_LOC(c);
1674}
1675
1676 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001677vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001678{
1679 if (c <= '@')
1680 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001681 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001682 {
1683 if (enc_utf8)
1684 return utf_tolower(c);
1685 if (c >= 0x100)
1686 {
1687#ifdef HAVE_TOWLOWER
1688 if (has_mbyte)
1689 return towlower(c);
1690#endif
1691 /* tolower() can't handle these chars and may crash */
1692 return c;
1693 }
1694 if (enc_latin1like)
1695 return latin1lower[c];
1696 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001697 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1698 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001699 return TOLOWER_LOC(c);
1700}
Bram Moolenaar78622822005-08-23 21:00:13 +00001701
Bram Moolenaar071d4272004-06-13 20:20:40 +00001702/*
1703 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1704 */
1705 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001706skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001707{
1708 while (*p != ' ' && *p != '\t' && *p != NUL)
1709 ++p;
1710 return p;
1711}
1712
Bram Moolenaar071d4272004-06-13 20:20:40 +00001713/*
1714 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1715 */
1716 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001717skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001718{
1719 while (*p != ' ' && *p != '\t' && *p != NUL)
1720 {
1721 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1722 ++p;
1723 ++p;
1724 }
1725 return p;
1726}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001727
1728/*
1729 * Getdigits: Get a number from a string and skip over it.
1730 * Note: the argument is a pointer to a char_u pointer!
1731 */
1732 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001733getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001734{
1735 char_u *p;
1736 long retval;
1737
1738 p = *pp;
1739 retval = atol((char *)p);
1740 if (*p == '-') /* skip negative sign */
1741 ++p;
1742 p = skipdigits(p); /* skip to next non-digit */
1743 *pp = p;
1744 return retval;
1745}
1746
1747/*
1748 * Return TRUE if "lbuf" is empty or only contains blanks.
1749 */
1750 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001751vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001752{
1753 char_u *p;
1754
1755 p = skipwhite(lbuf);
1756 return (*p == NUL || *p == '\r' || *p == '\n');
1757}
1758
1759/*
1760 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001761 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1762 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 * 0 decimal
1764 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001765 * 'B' bin
1766 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001767 * 'X' hex
1768 * 'x' hex
1769 * If "len" is not NULL, the length of the number in characters is returned.
1770 * If "nptr" is not NULL, the signed result is returned in it.
1771 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001772 * If "what" contains STR2NR_BIN recognize binary numbers
1773 * If "what" contains STR2NR_OCT recognize octal numbers
1774 * If "what" contains STR2NR_HEX recognize hex numbers
1775 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001776 * If "what" contains STR2NR_QUOTE ignore embedded single quotes
Bram Moolenaarce157752017-10-28 16:07:33 +02001777 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001778 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001779 */
1780 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001781vim_str2nr(
1782 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001783 int *prep, // return: type of number 0 = decimal, 'x'
1784 // or 'X' is hex, '0' = octal, 'b' or 'B'
1785 // is bin
1786 int *len, // return: detected length of number
1787 int what, // what numbers to recognize
1788 varnumber_T *nptr, // return: signed result
1789 uvarnumber_T *unptr, // return: unsigned result
1790 int maxlen, // max length of string to check
1791 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001792{
1793 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001794 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001795 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001796 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001797 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001798
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001799 if (len != NULL)
1800 *len = 0;
1801
Bram Moolenaar071d4272004-06-13 20:20:40 +00001802 if (ptr[0] == '-')
1803 {
1804 negative = TRUE;
1805 ++ptr;
1806 }
1807
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001808 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001809 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1810 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001812 pre = ptr[1];
1813 if ((what & STR2NR_HEX)
1814 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1815 && (maxlen == 0 || maxlen > 2))
1816 /* hexadecimal */
1817 ptr += 2;
1818 else if ((what & STR2NR_BIN)
1819 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1820 && (maxlen == 0 || maxlen > 2))
1821 /* binary */
1822 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001823 else
1824 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001825 /* decimal or octal, default is decimal */
1826 pre = 0;
1827 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001828 {
1829 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001830 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001831 {
1832 if (ptr[n] > '7')
1833 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001834 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001835 break;
1836 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001837 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001838 }
1839 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001840 }
1841 }
1842
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001843 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001844 n = 1;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001845 if (pre == 'B' || pre == 'b'
1846 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001847 {
1848 /* bin */
1849 if (pre != 0)
1850 n += 2; /* skip over "0b" */
1851 while ('0' <= *ptr && *ptr <= '1')
1852 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001853 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001854 if (un <= UVARNUM_MAX / 2)
1855 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001856 else
1857 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001858 ++ptr;
1859 if (n++ == maxlen)
1860 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001861 if ((what & STR2NR_QUOTE) && *ptr == '\''
1862 && '0' <= ptr[1] && ptr[1] <= '1')
1863 {
1864 ++ptr;
1865 if (n++ == maxlen)
1866 break;
1867 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001868 }
1869 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001870 else if (pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001871 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001872 /* octal */
1873 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001874 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001875 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001876 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001877 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1878 else
1879 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001880 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001881 if (n++ == maxlen)
1882 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001883 if ((what & STR2NR_QUOTE) && *ptr == '\''
1884 && '0' <= ptr[1] && ptr[1] <= '7')
1885 {
1886 ++ptr;
1887 if (n++ == maxlen)
1888 break;
1889 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001890 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001891 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001892 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001893 {
1894 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001895 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001896 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001897 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001898 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001899 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001900 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001901 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1902 else
1903 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001904 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001905 if (n++ == maxlen)
1906 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001907 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1908 {
1909 ++ptr;
1910 if (n++ == maxlen)
1911 break;
1912 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001913 }
1914 }
1915 else
1916 {
1917 /* decimal */
1918 while (VIM_ISDIGIT(*ptr))
1919 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001920 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1921
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001922 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001923 if (un < UVARNUM_MAX / 10
1924 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1925 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001926 else
1927 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001928 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001929 if (n++ == maxlen)
1930 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001931 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
1932 {
1933 ++ptr;
1934 if (n++ == maxlen)
1935 break;
1936 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001937 }
1938 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001939
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001940 // Check for an alpha-numeric character immediately following, that is
1941 // most likely a typo.
1942 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1943 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001944
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001945 if (prep != NULL)
1946 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001947 if (len != NULL)
1948 *len = (int)(ptr - start);
1949 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001950 {
1951 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001952 {
1953 /* avoid ubsan error for overflow */
1954 if (un > VARNUM_MAX)
1955 *nptr = VARNUM_MIN;
1956 else
1957 *nptr = -(varnumber_T)un;
1958 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001959 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001960 {
1961 if (un > VARNUM_MAX)
1962 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001963 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001964 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001965 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001966 if (unptr != NULL)
1967 *unptr = un;
1968}
1969
1970/*
1971 * Return the value of a single hex character.
1972 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1973 */
1974 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001975hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001976{
1977 if (c >= 'a' && c <= 'f')
1978 return c - 'a' + 10;
1979 if (c >= 'A' && c <= 'F')
1980 return c - 'A' + 10;
1981 return c - '0';
1982}
1983
Bram Moolenaar4033c552017-09-16 20:54:51 +02001984#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001985/*
1986 * Convert two hex characters to a byte.
1987 * Return -1 if one of the characters is not hex.
1988 */
1989 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001990hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001991{
1992 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1993 return -1;
1994 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1995}
1996#endif
1997
1998/*
1999 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002000 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00002001 * backslash is not a normal file name character.
2002 * '$' is a valid file name character, we don't remove the backslash before
2003 * it. This means it is not possible to use an environment variable after a
2004 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2005 * Although "\ name" is valid, the backslash in "Program\ files" must be
2006 * removed. Assume a file name doesn't start with a space.
2007 * For multi-byte names, never remove a backslash before a non-ascii
2008 * character, assume that all multi-byte characters are valid file name
2009 * characters.
2010 */
2011 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002012rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002013{
2014#ifdef BACKSLASH_IN_FILENAME
2015 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002016 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002017 && (str[1] == ' '
2018 || (str[1] != NUL
2019 && str[1] != '*'
2020 && str[1] != '?'
2021 && !vim_isfilec(str[1]))));
2022#else
2023 return (str[0] == '\\' && str[1] != NUL);
2024#endif
2025}
2026
2027/*
2028 * Halve the number of backslashes in a file name argument.
2029 * For MS-DOS we only do this if the character after the backslash
2030 * is not a normal file character.
2031 */
2032 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002033backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002034{
2035 for ( ; *p; ++p)
2036 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002037 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002038}
2039
2040/*
2041 * backslash_halve() plus save the result in allocated memory.
Bram Moolenaare2c453d2019-08-21 14:37:09 +02002042 * However, returns "p" when out of memory.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002043 */
2044 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002045backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002046{
2047 char_u *res;
2048
2049 res = vim_strsave(p);
2050 if (res == NULL)
2051 return p;
2052 backslash_halve(res);
2053 return res;
2054}
2055
2056#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2057/*
2058 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2059 * The first 64 entries have been added to map control characters defined in
2060 * ascii.h
2061 */
2062static char_u ebcdic2ascii_tab[256] =
2063{
2064 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2065 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2066 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2067 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2068 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2069 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2070 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2071 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2072 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2073 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2074 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2075 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2076 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2077 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2078 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2079 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2080 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2081 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2082 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2083 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2084 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2085 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2086 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2087 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2088 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2089 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2090 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2091 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2092 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2093 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2094 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2095 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2096};
2097
2098/*
2099 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2100 * wanting 7-bit ASCII characters out the other end.
2101 */
2102 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002103ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002104{
2105 int i;
2106
2107 for (i = 0; i < len; i++)
2108 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2109}
2110#endif