blob: 50ca617dc28cbe48598863e1cc35bf5ba7414aa2 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
13# include <wchar.h> /* for towupper() and towlower() */
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
21/* b_chartab[] is an array of 32 bytes, each bit representing one of the
22 * characters 0-255. */
23#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010027/* table used below, see init_chartab() for an explanation */
28static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
33#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
34#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
35#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
36#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
37
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
75 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000105 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000108 /* euc-jp characters starting with 0x8e are single width */
109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000111 /* other double-byte chars can be printable AND double-width */
112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000115 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaar071d4272004-06-13 20:20:40 +0000119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
130 vim_memset(buf->b_chartab, 0, (size_t)32);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
134 /* double-byte characters are probably word characters */
135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
147 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 * options Each option is a list of characters, character numbers or
149 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
150 */
151 for (i = global ? 0 : 3; i <= 3; ++i)
152 {
153 if (i == 0)
154 p = p_isi; /* first round: 'isident' */
155 else if (i == 1)
156 p = p_isp; /* second round: 'isprint' */
157 else if (i == 2)
158 p = p_isf; /* third round: 'isfname' */
159 else /* i == 3 */
160 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
161
162 while (*p)
163 {
164 tilde = FALSE;
165 do_isalpha = FALSE;
166 if (*p == '^' && p[1] != NUL)
167 {
168 tilde = TRUE;
169 ++p;
170 }
171 if (VIM_ISDIGIT(*p))
172 c = getdigits(&p);
173 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000174 if (has_mbyte)
175 c = mb_ptr2char_adv(&p);
176 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000185 if (has_mbyte)
186 c2 = mb_ptr2char_adv(&p);
187 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000188 c2 = *p++;
189 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000190 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000191 || !(*p == NUL || *p == ','))
192 return FAIL;
193
194 if (c2 == -1) /* not a range */
195 {
196 /*
197 * A single '@' (not "@-@"):
198 * Decide on letters being ID/printable/keyword chars with
199 * standard function isalpha(). This takes care of locale for
200 * single-byte characters).
201 */
202 if (c == '@')
203 {
204 do_isalpha = TRUE;
205 c = 1;
206 c2 = 255;
207 }
208 else
209 c2 = c;
210 }
211 while (c <= c2)
212 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000213 /* Use the MB_ functions here, because isalpha() doesn't
214 * work properly when 'encoding' is "latin1" and the locale is
215 * "C". */
Bram Moolenaar14184a32019-02-16 15:10:30 +0100216 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000217 {
218 if (i == 0) /* (re)set ID flag */
219 {
220 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100221 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100223 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000224 }
225 else if (i == 1) /* (re)set printable */
226 {
227 if ((c < ' '
228#ifndef EBCDIC
229 || c > '~'
230#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100231 // For double-byte we keep the cell width, so
232 // that we can detect it from the first byte.
233 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000234 {
235 if (tilde)
236 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100237 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100239 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000240 }
241 else
242 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100243 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
244 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000245 }
246 }
247 }
248 else if (i == 2) /* (re)set fname flag */
249 {
250 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100251 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000252 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100253 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 }
255 else /* i == 3 */ /* (re)set keyword flag */
256 {
257 if (tilde)
258 RESET_CHARTAB(buf, c);
259 else
260 SET_CHARTAB(buf, c);
261 }
262 }
263 ++c;
264 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100265
266 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000267 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100268 if (c == ',' && *p == NUL)
269 /* Trailing comma is not allowed. */
270 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000271 }
272 }
273 chartab_initialized = TRUE;
274 return OK;
275}
276
277/*
278 * Translate any special characters in buf[bufsize] in-place.
279 * The result is a string with only printable characters, but if there is not
280 * enough room, not all characters will be translated.
281 */
282 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100283trans_characters(
284 char_u *buf,
285 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000286{
287 int len; /* length of string needing translation */
288 int room; /* room in buffer after string */
289 char_u *trs; /* translated character */
290 int trs_len; /* length of trs[] */
291
292 len = (int)STRLEN(buf);
293 room = bufsize - len;
294 while (*buf != 0)
295 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000297 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000298 len -= trs_len;
299 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 {
301 trs = transchar_byte(*buf);
302 trs_len = (int)STRLEN(trs);
303 if (trs_len > 1)
304 {
305 room -= trs_len - 1;
306 if (room <= 0)
307 return;
308 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
309 }
310 mch_memmove(buf, trs, (size_t)trs_len);
311 --len;
312 }
313 buf += trs_len;
314 }
315}
316
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000317#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
318 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000319/*
320 * Translate a string into allocated memory, replacing special chars with
321 * printable chars. Returns NULL when out of memory.
322 */
323 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100324transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325{
326 char_u *res;
327 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328 int l, len, c;
329 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000330
Bram Moolenaar071d4272004-06-13 20:20:40 +0000331 if (has_mbyte)
332 {
333 /* Compute the length of the result, taking account of unprintable
334 * multi-byte characters. */
335 len = 0;
336 p = s;
337 while (*p != NUL)
338 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000339 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000340 {
341 c = (*mb_ptr2char)(p);
342 p += l;
343 if (vim_isprintc(c))
344 len += l;
345 else
346 {
347 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000348 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000349 }
350 }
351 else
352 {
353 l = byte2cells(*p++);
354 if (l > 0)
355 len += l;
356 else
357 len += 4; /* illegal byte sequence */
358 }
359 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200360 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000361 }
362 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200363 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000364 if (res != NULL)
365 {
366 *res = NUL;
367 p = s;
368 while (*p != NUL)
369 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000370 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000371 {
372 c = (*mb_ptr2char)(p);
373 if (vim_isprintc(c))
374 STRNCAT(res, p, l); /* append printable multi-byte char */
375 else
376 transchar_hex(res + STRLEN(res), c);
377 p += l;
378 }
379 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380 STRCAT(res, transchar_byte(*p++));
381 }
382 }
383 return res;
384}
385#endif
386
387#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
388/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000389 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
390 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000391 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
392 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393 */
394 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100395str_foldcase(
396 char_u *str,
397 int orglen,
398 char_u *buf,
399 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000400{
401 garray_T ga;
402 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000403 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
405#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
406#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000407#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
408#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000409
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000410 /* Copy "str" into "buf" or allocated memory, unmodified. */
411 if (buf == NULL)
412 {
413 ga_init2(&ga, 1, 10);
414 if (ga_grow(&ga, len + 1) == FAIL)
415 return NULL;
416 mch_memmove(ga.ga_data, str, (size_t)len);
417 ga.ga_len = len;
418 }
419 else
420 {
421 if (len >= buflen) /* Ugly! */
422 len = buflen - 1;
423 mch_memmove(buf, str, (size_t)len);
424 }
425 if (buf == NULL)
426 GA_CHAR(len) = NUL;
427 else
428 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000429
430 /* Make each character lower case. */
431 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000432 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000433 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000434 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000435 {
436 if (enc_utf8)
437 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000438 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100439 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000440 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441
Bram Moolenaarb9839212008-06-28 11:03:50 +0000442 /* Only replace the character when it is not an invalid
443 * sequence (ASCII character or more than one byte) and
444 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100445 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100447 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448
449 /* If the byte length changes need to shift the following
450 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100451 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000452 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100453 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000454 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100455 if (buf == NULL
456 ? ga_grow(&ga, nlen - olen + 1) == FAIL
457 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458 {
459 /* out of memory, keep old char */
460 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100461 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000462 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000463 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100464 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000465 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000466 if (buf == NULL)
467 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100468 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
469 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000470 }
471 else
472 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100473 STRMOVE(buf + i + nlen, buf + i + olen);
474 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000475 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000476 }
477 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000478 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000479 }
480 }
481 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000482 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000483 }
484 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000485 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000486 if (buf == NULL)
487 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
488 else
489 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490 ++i;
491 }
492 }
493
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000494 if (buf == NULL)
495 return (char_u *)ga.ga_data;
496 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000497}
498#endif
499
500/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100501 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000502 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100503 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 * Does NOT work for multi-byte characters, c must be <= 255.
505 * Also doesn't work for the first byte of a multi-byte, "c" must be a
506 * character!
507 */
508static char_u transchar_buf[7];
509
510 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100511transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000512{
513 int i;
514
515 i = 0;
516 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
517 {
518 transchar_buf[0] = '~';
519 transchar_buf[1] = '@';
520 i = 2;
521 c = K_SECOND(c);
522 }
523
524 if ((!chartab_initialized && (
525#ifdef EBCDIC
526 (c >= 64 && c < 255)
527#else
528 (c >= ' ' && c <= '~')
529#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000530 )) || (c < 256 && vim_isprintc_strict(c)))
531 {
532 /* printable character */
533 transchar_buf[i] = c;
534 transchar_buf[i + 1] = NUL;
535 }
536 else
537 transchar_nonprint(transchar_buf + i, c);
538 return transchar_buf;
539}
540
Bram Moolenaar071d4272004-06-13 20:20:40 +0000541/*
542 * Like transchar(), but called with a byte instead of a character. Checks
543 * for an illegal UTF-8 byte.
544 */
545 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100546transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000547{
548 if (enc_utf8 && c >= 0x80)
549 {
550 transchar_nonprint(transchar_buf, c);
551 return transchar_buf;
552 }
553 return transchar(c);
554}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000555
556/*
557 * Convert non-printable character to two or more printable characters in
558 * "buf[]". "buf" needs to be able to hold five bytes.
559 * Does NOT work for multi-byte characters, c must be <= 255.
560 */
561 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100562transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563{
564 if (c == NL)
565 c = NUL; /* we use newline in place of a NUL */
566 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
567 c = NL; /* we use CR in place of NL in this case */
568
569 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
570 transchar_hex(buf, c);
571
572#ifdef EBCDIC
573 /* For EBCDIC only the characters 0-63 and 255 are not printable */
574 else if (CtrlChar(c) != 0 || c == DEL)
575#else
576 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
577#endif
578 {
579 buf[0] = '^';
580#ifdef EBCDIC
581 if (c == DEL)
582 buf[1] = '?'; /* DEL displayed as ^? */
583 else
584 buf[1] = CtrlChar(c);
585#else
586 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
587#endif
588
589 buf[2] = NUL;
590 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000591 else if (enc_utf8 && c >= 0x80)
592 {
593 transchar_hex(buf, c);
594 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595#ifndef EBCDIC
596 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
597 {
598 buf[0] = '|';
599 buf[1] = c - 0x80;
600 buf[2] = NUL;
601 }
602#else
603 else if (c < 64)
604 {
605 buf[0] = '~';
606 buf[1] = MetaChar(c);
607 buf[2] = NUL;
608 }
609#endif
610 else /* 0x80 - 0x9f and 0xff */
611 {
612 /*
613 * TODO: EBCDIC I don't know what to do with this chars, so I display
614 * them as '~?' for now
615 */
616 buf[0] = '~';
617#ifdef EBCDIC
618 buf[1] = '?'; /* 0xff displayed as ~? */
619#else
620 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
621#endif
622 buf[2] = NUL;
623 }
624}
625
626 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100627transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000628{
629 int i = 0;
630
631 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000632 if (c > 255)
633 {
634 buf[++i] = nr2hex((unsigned)c >> 12);
635 buf[++i] = nr2hex((unsigned)c >> 8);
636 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000638 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000639 buf[++i] = '>';
640 buf[++i] = NUL;
641}
642
643/*
644 * Convert the lower 4 bits of byte "c" to its hex character.
645 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
646 * function key 1.
647 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000648 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100649nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000650{
651 if ((c & 0xf) <= 9)
652 return (c & 0xf) + '0';
653 return (c & 0xf) - 10 + 'a';
654}
655
656/*
657 * Return number of display cells occupied by byte "b".
658 * Caller must make sure 0 <= b <= 255.
659 * For multi-byte mode "b" must be the first byte of a character.
660 * A TAB is counted as two cells: "^I".
661 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
662 * cells depends on further bytes.
663 */
664 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100665byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667 if (enc_utf8 && b >= 0x80)
668 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100669 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000670}
671
672/*
673 * Return number of display cells occupied by character "c".
674 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
675 * A TAB is counted as two cells: "^I" or four: "<09>".
676 */
677 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100678char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000679{
680 if (IS_SPECIAL(c))
681 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000682 if (c >= 0x80)
683 {
684 /* UTF-8: above 0x80 need to check the value */
685 if (enc_utf8)
686 return utf_char2cells(c);
687 /* DBCS: double-byte means double-width, except for euc-jp with first
688 * byte 0x8e */
689 if (enc_dbcs != 0 && c >= 0x100)
690 {
691 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
692 return 1;
693 return 2;
694 }
695 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100696 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000697}
698
699/*
700 * Return number of display cells occupied by character at "*p".
701 * A TAB is counted as two cells: "^I" or four: "<09>".
702 */
703 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100704ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000706 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
707 if (enc_utf8 && *p >= 0x80)
708 return utf_ptr2cells(p);
709 /* For DBCS we can tell the cell count from the first byte. */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100710 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000711}
712
713/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100714 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000715 * counting TABs as two characters: "^I".
716 */
717 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100718vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000719{
720 return vim_strnsize(s, (int)MAXCOL);
721}
722
723/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100724 * Return the number of character cells string "s[len]" will take on the
725 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000726 */
727 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100728vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000729{
730 int size = 0;
731
732 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000733 if (has_mbyte)
734 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000735 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000736
737 size += ptr2cells(s);
738 s += l;
739 len -= l - 1;
740 }
741 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100743
Bram Moolenaar071d4272004-06-13 20:20:40 +0000744 return size;
745}
746
747/*
748 * Return the number of characters 'c' will take on the screen, taking
749 * into account the size of a tab.
750 * Use a define to make it fast, this is used very often!!!
751 * Also see getvcol() below.
752 */
753
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200754#ifdef FEAT_VARTABS
755# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
756 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
757 { \
758 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
759 } \
760 else \
761 return ptr2cells(p);
762#else
763# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000764 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
765 { \
766 int ts; \
767 ts = (buf)->b_p_ts; \
768 return (int)(ts - (col % ts)); \
769 } \
770 else \
771 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200772#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000773
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100775chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000776{
777 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
778}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000779
780#ifdef FEAT_LINEBREAK
781 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100782win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000783{
784 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
785}
786#endif
787
788/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200789 * Return the number of characters the string 's' will take on the screen,
790 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000791 */
792 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100793linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000794{
Bram Moolenaardc536092010-07-18 15:45:49 +0200795 return linetabsize_col(0, s);
796}
797
798/*
799 * Like linetabsize(), but starting at column "startcol".
800 */
801 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100802linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200803{
804 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200805 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000806
807 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200808 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000809 return (int)col;
810}
811
812/*
813 * Like linetabsize(), but for a given window instead of the current one.
814 */
815 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100816win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000817{
818 colnr_T col = 0;
819 char_u *s;
820
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100822 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200823 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000824 return (int)col;
825}
826
827/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000828 * Return TRUE if 'c' is a normal identifier character:
829 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830 */
831 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100832vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100834 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000835}
836
837/*
838 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100839 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840 * For multi-byte characters mb_get_class() is used (builtin rules).
841 */
842 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100843vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000844{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100845 return vim_iswordc_buf(c, curbuf);
846}
847
848 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100849vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100850{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851 if (c >= 0x100)
852 {
853 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000854 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100856 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100857 return FALSE;
858 }
859 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860}
861
862/*
863 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
864 */
865 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100866vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000867{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100868 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000869}
870
Bram Moolenaar071d4272004-06-13 20:20:40 +0000871 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100872vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000873{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100874 int c = *p;
875
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100876 if (has_mbyte && MB_BYTE2LEN(c) > 1)
877 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100878 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000879}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000880
881/*
882 * return TRUE if 'c' is a valid file-name character
883 * Assume characters above 0x100 are valid (multi-byte).
884 */
885 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100886vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100888 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889}
890
891/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000892 * return TRUE if 'c' is a valid file-name character or a wildcard character
893 * Assume characters above 0x100 are valid (multi-byte).
894 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
895 * returns false.
896 */
897 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100898vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000899{
900 char_u buf[2];
901
902 buf[0] = (char_u)c;
903 buf[1] = NUL;
904 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
905}
906
907/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200908 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000909 * Assume characters above 0x100 are printable (multi-byte), except for
910 * Unicode.
911 */
912 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100913vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000914{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000915 if (enc_utf8 && c >= 0x100)
916 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100917 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000918}
919
920/*
921 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
922 * byte of a double-byte character.
923 */
924 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100925vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000926{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000927 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
928 return FALSE;
929 if (enc_utf8 && c >= 0x100)
930 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100931 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000932}
933
934/*
935 * like chartabsize(), but also check for line breaks on the screen
936 */
937 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100938lbr_chartabsize(
939 char_u *line UNUSED, /* start of the line */
940 unsigned char *s,
941 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000942{
943#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +0200944 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000945 {
946#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000947 if (curwin->w_p_wrap)
948 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000949 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
950#ifdef FEAT_LINEBREAK
951 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200952 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000953#endif
954}
955
956/*
957 * Call lbr_chartabsize() and advance the pointer.
958 */
959 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100960lbr_chartabsize_adv(
961 char_u *line, /* start of the line */
962 char_u **s,
963 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000964{
965 int retval;
966
Bram Moolenaar597a4222014-06-25 14:39:50 +0200967 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100968 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000969 return retval;
970}
971
972/*
973 * This function is used very often, keep it fast!!!!
974 *
975 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
976 * string at start of line. Warning: *headp is only set if it's a non-zero
977 * value, init to 0 before calling.
978 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000979 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100980win_lbr_chartabsize(
981 win_T *wp,
982 char_u *line UNUSED, /* start of the line */
983 char_u *s,
984 colnr_T col,
985 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000986{
987#ifdef FEAT_LINEBREAK
988 int c;
989 int size;
990 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +0200991 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000992 colnr_T colmax;
993 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000994 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995 int numberextra;
996 char_u *ps;
997 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000998 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000999
1000 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001001 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001002 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001003 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004#endif
1005 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001006 if (wp->w_p_wrap)
1007 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001008 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1009 }
1010
1011#ifdef FEAT_LINEBREAK
1012 /*
1013 * First get normal size, without 'linebreak'
1014 */
1015 size = win_chartabsize(wp, s, col);
1016 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001017 if (tab_corr)
1018 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001019
1020 /*
1021 * If 'linebreak' set check at a blank before a non-blank if the line
1022 * needs a break here
1023 */
1024 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001025 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001026 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001027 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001028 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001029 {
1030 /*
1031 * Count all characters from first non-blank after a blank up to next
1032 * non-blank after a blank.
1033 */
1034 numberextra = win_col_off(wp);
1035 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001036 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001037 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001038 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001039 colmax += col_adj;
1040 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001041 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001042 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001043 }
1044
Bram Moolenaar071d4272004-06-13 20:20:40 +00001045 for (;;)
1046 {
1047 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001048 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001049 c = *s;
1050 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001051 && (VIM_ISBREAK(c)
1052 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001053 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001054 break;
1055
1056 col2 += win_chartabsize(wp, s, col2);
1057 if (col2 >= colmax) /* doesn't fit */
1058 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001059 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001060 break;
1061 }
1062 }
1063 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001064 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1065 && wp->w_p_wrap && in_win_border(wp, col))
1066 {
1067 ++size; /* Count the ">" in the last column. */
1068 mb_added = 1;
1069 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070
1071 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001072 * May have to add something for 'breakindent' and/or 'showbreak'
1073 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001074 * Set *headp to the size of what we add.
1075 */
1076 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001077 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001078 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001079 colnr_T sbrlen = 0;
1080 int numberwidth = win_col_off(wp);
1081
1082 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001083 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001084 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001085 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001086 col -= wp->w_width;
1087 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001088 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001089 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001090 if (*p_sbr != NUL)
1091 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001092 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001093 if (col >= sbrlen)
1094 col -= sbrlen;
1095 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001096 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001097 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001098 else if (col > 0 && numberextra > 0)
1099 col += numberwidth - win_col_off2(wp);
1100
1101 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001103 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001104 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001105 added = 0;
1106 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001107 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001108 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001110 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001111 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001112 int prev_width = col
1113 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001114
1115 if (width <= 0)
1116 width = (colnr_T)1;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001117 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1118 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001119 // wrapped, add another length of 'sbr'
Bram Moolenaard574ea22015-01-14 19:35:14 +01001120 added += vim_strsize(p_sbr);
1121 }
1122 else
1123 added += vim_strsize(p_sbr);
1124 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001125 if (wp->w_p_bri)
1126 added += get_breakindent_win(wp, line);
1127
Bram Moolenaar95765082014-08-24 21:19:25 +02001128 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001129 if (col != 0)
1130 added = 0;
1131 }
1132 }
1133 if (headp != NULL)
1134 *headp = added + mb_added;
1135 return size;
1136#endif
1137}
1138
Bram Moolenaar071d4272004-06-13 20:20:40 +00001139/*
1140 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1141 * 'wrap' is on. This means we need to check for a double-byte character that
1142 * doesn't fit at the end of the screen line.
1143 */
1144 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001145win_nolbr_chartabsize(
1146 win_T *wp,
1147 char_u *s,
1148 colnr_T col,
1149 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001150{
1151 int n;
1152
1153 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1154 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001155# ifdef FEAT_VARTABS
1156 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1157 wp->w_buffer->b_p_vts_array);
1158# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001159 n = wp->w_buffer->b_p_ts;
1160 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001161# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001162 }
1163 n = ptr2cells(s);
1164 /* Add one cell for a double-width character in the last column of the
1165 * window, displayed with a ">". */
1166 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1167 {
1168 if (headp != NULL)
1169 *headp = 1;
1170 return 3;
1171 }
1172 return n;
1173}
1174
1175/*
1176 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1177 * "wp".
1178 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001179 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001180in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001181{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001182 int width1; /* width of first line (after line number) */
1183 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001184
Bram Moolenaar071d4272004-06-13 20:20:40 +00001185 if (wp->w_width == 0) /* there is no border */
1186 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001187 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001188 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001189 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001190 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001191 return TRUE;
1192 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001193 if (width2 <= 0)
1194 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001195 return ((vcol - width1) % width2 == width2 - 1);
1196}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001197
1198/*
1199 * Get virtual column number of pos.
1200 * start: on the first position of this character (TAB, ctrl)
1201 * cursor: where the cursor is on this character (first char, except for TAB)
1202 * end: on the last position of this character (TAB, ctrl)
1203 *
1204 * This is used very often, keep it fast!
1205 */
1206 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001207getvcol(
1208 win_T *wp,
1209 pos_T *pos,
1210 colnr_T *start,
1211 colnr_T *cursor,
1212 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001213{
1214 colnr_T vcol;
1215 char_u *ptr; /* points to current char */
1216 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001217 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001218 int incr;
1219 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001220#ifdef FEAT_VARTABS
1221 int *vts = wp->w_buffer->b_p_vts_array;
1222#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001223 int ts = wp->w_buffer->b_p_ts;
1224 int c;
1225
1226 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001227 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001228 if (pos->col == MAXCOL)
1229 posptr = NULL; /* continue until the NUL */
1230 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001231 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001232 /* Special check for an empty line, which can happen on exit, when
1233 * ml_get_buf() always returns an empty string. */
1234 if (*ptr == NUL)
1235 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001236 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001237 if (has_mbyte)
1238 /* always start on the first byte */
1239 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001240 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001241
1242 /*
1243 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001244 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1245 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001246 * Also use this when 'list' is set but tabs take their normal size.
1247 */
1248 if ((!wp->w_p_list || lcs_tab1 != NUL)
1249#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001250 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001251#endif
1252 )
1253 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001254 for (;;)
1255 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001256 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001257 c = *ptr;
1258 /* make sure we don't go past the end of the line */
1259 if (c == NUL)
1260 {
1261 incr = 1; /* NUL at end of line only takes one column */
1262 break;
1263 }
1264 /* A tab gets expanded, depending on the current column */
1265 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001266#ifdef FEAT_VARTABS
1267 incr = tabstop_padding(vcol, ts, vts);
1268#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001270#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 else
1272 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273 if (has_mbyte)
1274 {
1275 /* For utf-8, if the byte is >= 0x80, need to look at
1276 * further bytes to find the cell width. */
1277 if (enc_utf8 && c >= 0x80)
1278 incr = utf_ptr2cells(ptr);
1279 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001280 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001281
1282 /* If a double-cell char doesn't fit at the end of a line
1283 * it wraps to the next line, it's like this char is three
1284 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001285 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1286 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001287 {
1288 ++incr;
1289 head = 1;
1290 }
1291 }
1292 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001293 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001294 }
1295
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001296 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001297 break;
1298
1299 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001300 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001301 }
1302 }
1303 else
1304 {
1305 for (;;)
1306 {
1307 /* A tab gets expanded, depending on the current column */
1308 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001309 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310 /* make sure we don't go past the end of the line */
1311 if (*ptr == NUL)
1312 {
1313 incr = 1; /* NUL at end of line only takes one column */
1314 break;
1315 }
1316
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001317 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001318 break;
1319
1320 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001321 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001322 }
1323 }
1324 if (start != NULL)
1325 *start = vcol + head;
1326 if (end != NULL)
1327 *end = vcol + incr - 1;
1328 if (cursor != NULL)
1329 {
1330 if (*ptr == TAB
1331 && (State & NORMAL)
1332 && !wp->w_p_list
1333 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001334 && !(VIsual_active
1335 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001336 )
1337 *cursor = vcol + incr - 1; /* cursor at end */
1338 else
1339 *cursor = vcol + head; /* cursor at start */
1340 }
1341}
1342
1343/*
1344 * Get virtual cursor column in the current window, pretending 'list' is off.
1345 */
1346 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001347getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001348{
1349 int list_save = curwin->w_p_list;
1350 colnr_T vcol;
1351
1352 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001353 if (posp->coladd)
1354 getvvcol(curwin, posp, NULL, &vcol, NULL);
1355 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001356 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001357 curwin->w_p_list = list_save;
1358 return vcol;
1359}
1360
Bram Moolenaar071d4272004-06-13 20:20:40 +00001361/*
1362 * Get virtual column in virtual mode.
1363 */
1364 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001365getvvcol(
1366 win_T *wp,
1367 pos_T *pos,
1368 colnr_T *start,
1369 colnr_T *cursor,
1370 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001371{
1372 colnr_T col;
1373 colnr_T coladd;
1374 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001375 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001376
1377 if (virtual_active())
1378 {
1379 /* For virtual mode, only want one value */
1380 getvcol(wp, pos, &col, NULL, NULL);
1381
1382 coladd = pos->coladd;
1383 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001384 /* Cannot put the cursor on part of a wide character. */
1385 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001386 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387 {
1388 int c = (*mb_ptr2char)(ptr + pos->col);
1389
1390 if (c != TAB && vim_isprintc(c))
1391 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001392 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001393 if (coladd > endadd) /* past end of line */
1394 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001395 else
1396 coladd = 0;
1397 }
1398 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001399 col += coladd;
1400 if (start != NULL)
1401 *start = col;
1402 if (cursor != NULL)
1403 *cursor = col;
1404 if (end != NULL)
1405 *end = col + endadd;
1406 }
1407 else
1408 getvcol(wp, pos, start, cursor, end);
1409}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001410
Bram Moolenaar071d4272004-06-13 20:20:40 +00001411/*
1412 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1413 * Used for Visual block mode.
1414 */
1415 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001416getvcols(
1417 win_T *wp,
1418 pos_T *pos1,
1419 pos_T *pos2,
1420 colnr_T *left,
1421 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001422{
1423 colnr_T from1, from2, to1, to2;
1424
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001425 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001426 {
1427 getvvcol(wp, pos1, &from1, NULL, &to1);
1428 getvvcol(wp, pos2, &from2, NULL, &to2);
1429 }
1430 else
1431 {
1432 getvvcol(wp, pos2, &from1, NULL, &to1);
1433 getvvcol(wp, pos1, &from2, NULL, &to2);
1434 }
1435 if (from2 < from1)
1436 *left = from2;
1437 else
1438 *left = from1;
1439 if (to2 > to1)
1440 {
1441 if (*p_sel == 'e' && from2 - 1 >= to1)
1442 *right = from2 - 1;
1443 else
1444 *right = to2;
1445 }
1446 else
1447 *right = to1;
1448}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001449
1450/*
1451 * skipwhite: skip over ' ' and '\t'.
1452 */
1453 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001454skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001455{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001456 char_u *p = q;
1457
Bram Moolenaar1c465442017-03-12 20:10:05 +01001458 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001459 ++p;
1460 return p;
1461}
1462
1463/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001464 * getwhitecols: return the number of whitespace
1465 * columns (bytes) at the start of a given line
1466 */
1467 int
1468getwhitecols_curline()
1469{
1470 return getwhitecols(ml_get_curline());
1471}
1472
1473 int
1474getwhitecols(char_u *p)
1475{
1476 return skipwhite(p) - p;
1477}
1478
1479/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001480 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001481 */
1482 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001483skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001484{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001485 char_u *p = q;
1486
Bram Moolenaar071d4272004-06-13 20:20:40 +00001487 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1488 ++p;
1489 return p;
1490}
1491
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001492#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001493/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001494 * skip over binary digits
1495 */
1496 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001497skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001498{
1499 char_u *p = q;
1500
1501 while (vim_isbdigit(*p)) /* skip to next non-digit */
1502 ++p;
1503 return p;
1504}
1505
1506/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001507 * skip over digits and hex characters
1508 */
1509 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001510skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001511{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001512 char_u *p = q;
1513
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001514 while (vim_isxdigit(*p)) /* skip to next non-digit */
1515 ++p;
1516 return p;
1517}
1518#endif
1519
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001520/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001521 * skip to bin digit (or NUL after the string)
1522 */
1523 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001524skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001525{
1526 char_u *p = q;
1527
1528 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1529 ++p;
1530 return p;
1531}
1532
1533/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001534 * skip to digit (or NUL after the string)
1535 */
1536 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001537skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001538{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001539 char_u *p = q;
1540
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001541 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1542 ++p;
1543 return p;
1544}
1545
1546/*
1547 * skip to hex character (or NUL after the string)
1548 */
1549 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001550skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001551{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001552 char_u *p = q;
1553
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001554 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1555 ++p;
1556 return p;
1557}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001558
Bram Moolenaar071d4272004-06-13 20:20:40 +00001559/*
1560 * Variant of isdigit() that can handle characters > 0x100.
1561 * We don't use isdigit() here, because on some systems it also considers
1562 * superscript 1 to be a digit.
1563 * Use the VIM_ISDIGIT() macro for simple arguments.
1564 */
1565 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001566vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001567{
1568 return (c >= '0' && c <= '9');
1569}
1570
1571/*
1572 * Variant of isxdigit() that can handle characters > 0x100.
1573 * We don't use isxdigit() here, because on some systems it also considers
1574 * superscript 1 to be a digit.
1575 */
1576 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001577vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578{
1579 return (c >= '0' && c <= '9')
1580 || (c >= 'a' && c <= 'f')
1581 || (c >= 'A' && c <= 'F');
1582}
1583
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001584/*
1585 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1586 * characters > 0x100.
1587 */
1588 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001589vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001590{
1591 return (c == '0' || c == '1');
1592}
1593
Bram Moolenaar78622822005-08-23 21:00:13 +00001594/*
1595 * Vim's own character class functions. These exist because many library
1596 * islower()/toupper() etc. do not work properly: they crash when used with
1597 * invalid values or can't handle latin1 when the locale is C.
1598 * Speed is most important here.
1599 */
1600#define LATIN1LOWER 'l'
1601#define LATIN1UPPER 'U'
1602
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001603static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001604static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1605static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001606
1607 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001608vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001609{
1610 if (c <= '@')
1611 return FALSE;
1612 if (c >= 0x80)
1613 {
1614 if (enc_utf8)
1615 return utf_islower(c);
1616 if (c >= 0x100)
1617 {
1618#ifdef HAVE_ISWLOWER
1619 if (has_mbyte)
1620 return iswlower(c);
1621#endif
1622 /* islower() can't handle these chars and may crash */
1623 return FALSE;
1624 }
1625 if (enc_latin1like)
1626 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1627 }
1628 return islower(c);
1629}
1630
1631 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001632vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001633{
1634 if (c <= '@')
1635 return FALSE;
1636 if (c >= 0x80)
1637 {
1638 if (enc_utf8)
1639 return utf_isupper(c);
1640 if (c >= 0x100)
1641 {
1642#ifdef HAVE_ISWUPPER
1643 if (has_mbyte)
1644 return iswupper(c);
1645#endif
1646 /* islower() can't handle these chars and may crash */
1647 return FALSE;
1648 }
1649 if (enc_latin1like)
1650 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1651 }
1652 return isupper(c);
1653}
1654
1655 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001656vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001657{
1658 if (c <= '@')
1659 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001660 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001661 {
1662 if (enc_utf8)
1663 return utf_toupper(c);
1664 if (c >= 0x100)
1665 {
1666#ifdef HAVE_TOWUPPER
1667 if (has_mbyte)
1668 return towupper(c);
1669#endif
1670 /* toupper() can't handle these chars and may crash */
1671 return c;
1672 }
1673 if (enc_latin1like)
1674 return latin1upper[c];
1675 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001676 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1677 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001678 return TOUPPER_LOC(c);
1679}
1680
1681 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001682vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001683{
1684 if (c <= '@')
1685 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001686 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001687 {
1688 if (enc_utf8)
1689 return utf_tolower(c);
1690 if (c >= 0x100)
1691 {
1692#ifdef HAVE_TOWLOWER
1693 if (has_mbyte)
1694 return towlower(c);
1695#endif
1696 /* tolower() can't handle these chars and may crash */
1697 return c;
1698 }
1699 if (enc_latin1like)
1700 return latin1lower[c];
1701 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001702 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1703 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001704 return TOLOWER_LOC(c);
1705}
Bram Moolenaar78622822005-08-23 21:00:13 +00001706
Bram Moolenaar071d4272004-06-13 20:20:40 +00001707/*
1708 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1709 */
1710 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001711skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001712{
1713 while (*p != ' ' && *p != '\t' && *p != NUL)
1714 ++p;
1715 return p;
1716}
1717
Bram Moolenaar071d4272004-06-13 20:20:40 +00001718/*
1719 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1720 */
1721 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001722skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001723{
1724 while (*p != ' ' && *p != '\t' && *p != NUL)
1725 {
1726 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1727 ++p;
1728 ++p;
1729 }
1730 return p;
1731}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001732
1733/*
1734 * Getdigits: Get a number from a string and skip over it.
1735 * Note: the argument is a pointer to a char_u pointer!
1736 */
1737 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001738getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001739{
1740 char_u *p;
1741 long retval;
1742
1743 p = *pp;
1744 retval = atol((char *)p);
1745 if (*p == '-') /* skip negative sign */
1746 ++p;
1747 p = skipdigits(p); /* skip to next non-digit */
1748 *pp = p;
1749 return retval;
1750}
1751
1752/*
1753 * Return TRUE if "lbuf" is empty or only contains blanks.
1754 */
1755 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001756vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001757{
1758 char_u *p;
1759
1760 p = skipwhite(lbuf);
1761 return (*p == NUL || *p == '\r' || *p == '\n');
1762}
1763
1764/*
1765 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001766 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1767 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001768 * 0 decimal
1769 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001770 * 'B' bin
1771 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001772 * 'X' hex
1773 * 'x' hex
1774 * If "len" is not NULL, the length of the number in characters is returned.
1775 * If "nptr" is not NULL, the signed result is returned in it.
1776 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001777 * If "what" contains STR2NR_BIN recognize binary numbers
1778 * If "what" contains STR2NR_OCT recognize octal numbers
1779 * If "what" contains STR2NR_HEX recognize hex numbers
1780 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaarce157752017-10-28 16:07:33 +02001781 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001782 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001783 */
1784 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001785vim_str2nr(
1786 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001787 int *prep, // return: type of number 0 = decimal, 'x'
1788 // or 'X' is hex, '0' = octal, 'b' or 'B'
1789 // is bin
1790 int *len, // return: detected length of number
1791 int what, // what numbers to recognize
1792 varnumber_T *nptr, // return: signed result
1793 uvarnumber_T *unptr, // return: unsigned result
1794 int maxlen, // max length of string to check
1795 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001796{
1797 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001798 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001799 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001800 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001801 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001802
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001803 if (len != NULL)
1804 *len = 0;
1805
Bram Moolenaar071d4272004-06-13 20:20:40 +00001806 if (ptr[0] == '-')
1807 {
1808 negative = TRUE;
1809 ++ptr;
1810 }
1811
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001812 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001813 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1814 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001815 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001816 pre = ptr[1];
1817 if ((what & STR2NR_HEX)
1818 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1819 && (maxlen == 0 || maxlen > 2))
1820 /* hexadecimal */
1821 ptr += 2;
1822 else if ((what & STR2NR_BIN)
1823 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1824 && (maxlen == 0 || maxlen > 2))
1825 /* binary */
1826 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001827 else
1828 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001829 /* decimal or octal, default is decimal */
1830 pre = 0;
1831 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001832 {
1833 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001834 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001835 {
1836 if (ptr[n] > '7')
1837 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001838 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001839 break;
1840 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001841 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001842 }
1843 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001844 }
1845 }
1846
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001847 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001848 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001849 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1850 {
1851 /* bin */
1852 if (pre != 0)
1853 n += 2; /* skip over "0b" */
1854 while ('0' <= *ptr && *ptr <= '1')
1855 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001856 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001857 if (un <= UVARNUM_MAX / 2)
1858 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001859 else
1860 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001861 ++ptr;
1862 if (n++ == maxlen)
1863 break;
1864 }
1865 }
1866 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001867 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001868 /* octal */
1869 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001870 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001871 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001872 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001873 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1874 else
1875 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001876 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001877 if (n++ == maxlen)
1878 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001879 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001880 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001881 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001882 {
1883 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001884 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001885 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001886 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001887 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001888 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001889 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001890 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1891 else
1892 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001893 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001894 if (n++ == maxlen)
1895 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001896 }
1897 }
1898 else
1899 {
1900 /* decimal */
1901 while (VIM_ISDIGIT(*ptr))
1902 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001903 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1904
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001905 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001906 if (un < UVARNUM_MAX / 10
1907 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1908 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001909 else
1910 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001911 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001912 if (n++ == maxlen)
1913 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001914 }
1915 }
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001916 // Check for an alpha-numeric character immediately following, that is
1917 // most likely a typo.
1918 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1919 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001920
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001921 if (prep != NULL)
1922 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923 if (len != NULL)
1924 *len = (int)(ptr - start);
1925 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001926 {
1927 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001928 {
1929 /* avoid ubsan error for overflow */
1930 if (un > VARNUM_MAX)
1931 *nptr = VARNUM_MIN;
1932 else
1933 *nptr = -(varnumber_T)un;
1934 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001935 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001936 {
1937 if (un > VARNUM_MAX)
1938 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001939 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001940 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001941 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001942 if (unptr != NULL)
1943 *unptr = un;
1944}
1945
1946/*
1947 * Return the value of a single hex character.
1948 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1949 */
1950 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001951hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001952{
1953 if (c >= 'a' && c <= 'f')
1954 return c - 'a' + 10;
1955 if (c >= 'A' && c <= 'F')
1956 return c - 'A' + 10;
1957 return c - '0';
1958}
1959
Bram Moolenaar4033c552017-09-16 20:54:51 +02001960#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001961/*
1962 * Convert two hex characters to a byte.
1963 * Return -1 if one of the characters is not hex.
1964 */
1965 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001966hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001967{
1968 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1969 return -1;
1970 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1971}
1972#endif
1973
1974/*
1975 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001976 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00001977 * backslash is not a normal file name character.
1978 * '$' is a valid file name character, we don't remove the backslash before
1979 * it. This means it is not possible to use an environment variable after a
1980 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1981 * Although "\ name" is valid, the backslash in "Program\ files" must be
1982 * removed. Assume a file name doesn't start with a space.
1983 * For multi-byte names, never remove a backslash before a non-ascii
1984 * character, assume that all multi-byte characters are valid file name
1985 * characters.
1986 */
1987 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001988rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001989{
1990#ifdef BACKSLASH_IN_FILENAME
1991 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001992 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00001993 && (str[1] == ' '
1994 || (str[1] != NUL
1995 && str[1] != '*'
1996 && str[1] != '?'
1997 && !vim_isfilec(str[1]))));
1998#else
1999 return (str[0] == '\\' && str[1] != NUL);
2000#endif
2001}
2002
2003/*
2004 * Halve the number of backslashes in a file name argument.
2005 * For MS-DOS we only do this if the character after the backslash
2006 * is not a normal file character.
2007 */
2008 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002009backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002010{
2011 for ( ; *p; ++p)
2012 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002013 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002014}
2015
2016/*
2017 * backslash_halve() plus save the result in allocated memory.
2018 */
2019 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002020backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002021{
2022 char_u *res;
2023
2024 res = vim_strsave(p);
2025 if (res == NULL)
2026 return p;
2027 backslash_halve(res);
2028 return res;
2029}
2030
2031#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2032/*
2033 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2034 * The first 64 entries have been added to map control characters defined in
2035 * ascii.h
2036 */
2037static char_u ebcdic2ascii_tab[256] =
2038{
2039 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2040 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2041 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2042 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2043 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2044 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2045 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2046 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2047 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2048 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2049 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2050 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2051 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2052 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2053 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2054 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2055 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2056 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2057 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2058 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2059 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2060 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2061 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2062 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2063 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2064 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2065 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2066 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2067 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2068 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2069 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2070 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2071};
2072
2073/*
2074 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2075 * wanting 7-bit ASCII characters out the other end.
2076 */
2077 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002078ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002079{
2080 int i;
2081
2082 for (i = 0; i < len; i++)
2083 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2084}
2085#endif