blob: c1ca3f97c769154ca812ace31d24a9a7f504287a [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
13# include <wchar.h> /* for towupper() and towlower() */
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
21/* b_chartab[] is an array of 32 bytes, each bit representing one of the
22 * characters 0-255. */
23#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010027/* table used below, see init_chartab() for an explanation */
28static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
33#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
34#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
35#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
36#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
37
38/*
39 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000040 * characters for current buffer.
41 *
42 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
43 * 'isprint' and 'encoding'.
44 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010045 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000046 * - For non-multi-byte index with the byte (same as the character).
47 * - For DBCS index with the first byte.
48 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
49 * the same as the character, if the first byte is 0x80 and above it depends
50 * on further bytes).
51 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010052 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000053 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
54 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
55 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
56 * translate the character before displaying it). Note that only DBCS
57 * characters can have 2 display cells and still be printable.
58 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
59 * - CT_ID_CHAR bit is set when the character can be in an identifier.
60 *
61 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
62 * error, OK otherwise.
63 */
64 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010065init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000066{
67 return buf_init_chartab(curbuf, TRUE);
68}
69
70 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010071buf_init_chartab(
72 buf_T *buf,
73 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000074{
75 int c;
76 int c2;
77 char_u *p;
78 int i;
79 int tilde;
80 int do_isalpha;
81
82 if (global)
83 {
84 /*
85 * Set the default size for printable characters:
86 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
87 * This also inits all 'isident' and 'isfname' flags to FALSE.
88 *
89 * EBCDIC: all chars below ' ' are not printable, all others are
90 * printable.
91 */
92 c = 0;
93 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010094 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000095#ifdef EBCDIC
96 while (c < 255)
97#else
98 while (c <= '~')
99#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100100 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000101#ifdef FEAT_FKMAP
102 if (p_altkeymap)
103 {
104 while (c < YE)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100105 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 }
107#endif
108 while (c < 256)
109 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000110 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
111 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100112 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 /* euc-jp characters starting with 0x8e are single width */
114 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100115 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000116 /* other double-byte chars can be printable AND double-width */
117 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100118 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000119 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100121 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 }
123
Bram Moolenaar071d4272004-06-13 20:20:40 +0000124 /* Assume that every multi-byte char is a filename character. */
125 for (c = 1; c < 256; ++c)
126 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
127 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
128 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100129 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000130 }
131
132 /*
133 * Init word char flags all to FALSE
134 */
135 vim_memset(buf->b_chartab, 0, (size_t)32);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000136 if (enc_dbcs != 0)
137 for (c = 0; c < 256; ++c)
138 {
139 /* double-byte characters are probably word characters */
140 if (MB_BYTE2LEN(c) == 2)
141 SET_CHARTAB(buf, c);
142 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000143
144#ifdef FEAT_LISP
145 /*
146 * In lisp mode the '-' character is included in keywords.
147 */
148 if (buf->b_p_lisp)
149 SET_CHARTAB(buf, '-');
150#endif
151
152 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
153 * options Each option is a list of characters, character numbers or
154 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
155 */
156 for (i = global ? 0 : 3; i <= 3; ++i)
157 {
158 if (i == 0)
159 p = p_isi; /* first round: 'isident' */
160 else if (i == 1)
161 p = p_isp; /* second round: 'isprint' */
162 else if (i == 2)
163 p = p_isf; /* third round: 'isfname' */
164 else /* i == 3 */
165 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
166
167 while (*p)
168 {
169 tilde = FALSE;
170 do_isalpha = FALSE;
171 if (*p == '^' && p[1] != NUL)
172 {
173 tilde = TRUE;
174 ++p;
175 }
176 if (VIM_ISDIGIT(*p))
177 c = getdigits(&p);
178 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000179 if (has_mbyte)
180 c = mb_ptr2char_adv(&p);
181 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000182 c = *p++;
183 c2 = -1;
184 if (*p == '-' && p[1] != NUL)
185 {
186 ++p;
187 if (VIM_ISDIGIT(*p))
188 c2 = getdigits(&p);
189 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000190 if (has_mbyte)
191 c2 = mb_ptr2char_adv(&p);
192 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000193 c2 = *p++;
194 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000195 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000196 || !(*p == NUL || *p == ','))
197 return FAIL;
198
199 if (c2 == -1) /* not a range */
200 {
201 /*
202 * A single '@' (not "@-@"):
203 * Decide on letters being ID/printable/keyword chars with
204 * standard function isalpha(). This takes care of locale for
205 * single-byte characters).
206 */
207 if (c == '@')
208 {
209 do_isalpha = TRUE;
210 c = 1;
211 c2 = 255;
212 }
213 else
214 c2 = c;
215 }
216 while (c <= c2)
217 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000218 /* Use the MB_ functions here, because isalpha() doesn't
219 * work properly when 'encoding' is "latin1" and the locale is
220 * "C". */
221 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222#ifdef FEAT_FKMAP
223 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
224#endif
225 )
226 {
227 if (i == 0) /* (re)set ID flag */
228 {
229 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100230 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000231 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100232 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000233 }
234 else if (i == 1) /* (re)set printable */
235 {
236 if ((c < ' '
237#ifndef EBCDIC
238 || c > '~'
239#endif
240#ifdef FEAT_FKMAP
241 || (p_altkeymap
242 && (F_isalpha(c) || F_isdigit(c)))
243#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100244 // For double-byte we keep the cell width, so
245 // that we can detect it from the first byte.
246 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000247 {
248 if (tilde)
249 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100252 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 }
254 else
255 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100256 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
257 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 }
259 }
260 }
261 else if (i == 2) /* (re)set fname flag */
262 {
263 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100264 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000265 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100266 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000267 }
268 else /* i == 3 */ /* (re)set keyword flag */
269 {
270 if (tilde)
271 RESET_CHARTAB(buf, c);
272 else
273 SET_CHARTAB(buf, c);
274 }
275 }
276 ++c;
277 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100278
279 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000280 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100281 if (c == ',' && *p == NUL)
282 /* Trailing comma is not allowed. */
283 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000284 }
285 }
286 chartab_initialized = TRUE;
287 return OK;
288}
289
290/*
291 * Translate any special characters in buf[bufsize] in-place.
292 * The result is a string with only printable characters, but if there is not
293 * enough room, not all characters will be translated.
294 */
295 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100296trans_characters(
297 char_u *buf,
298 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000299{
300 int len; /* length of string needing translation */
301 int room; /* room in buffer after string */
302 char_u *trs; /* translated character */
303 int trs_len; /* length of trs[] */
304
305 len = (int)STRLEN(buf);
306 room = bufsize - len;
307 while (*buf != 0)
308 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000309 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000310 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000311 len -= trs_len;
312 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000313 {
314 trs = transchar_byte(*buf);
315 trs_len = (int)STRLEN(trs);
316 if (trs_len > 1)
317 {
318 room -= trs_len - 1;
319 if (room <= 0)
320 return;
321 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
322 }
323 mch_memmove(buf, trs, (size_t)trs_len);
324 --len;
325 }
326 buf += trs_len;
327 }
328}
329
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000330#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
331 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000332/*
333 * Translate a string into allocated memory, replacing special chars with
334 * printable chars. Returns NULL when out of memory.
335 */
336 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100337transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338{
339 char_u *res;
340 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000341 int l, len, c;
342 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344 if (has_mbyte)
345 {
346 /* Compute the length of the result, taking account of unprintable
347 * multi-byte characters. */
348 len = 0;
349 p = s;
350 while (*p != NUL)
351 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000352 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353 {
354 c = (*mb_ptr2char)(p);
355 p += l;
356 if (vim_isprintc(c))
357 len += l;
358 else
359 {
360 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000361 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000362 }
363 }
364 else
365 {
366 l = byte2cells(*p++);
367 if (l > 0)
368 len += l;
369 else
370 len += 4; /* illegal byte sequence */
371 }
372 }
373 res = alloc((unsigned)(len + 1));
374 }
375 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000376 res = alloc((unsigned)(vim_strsize(s) + 1));
377 if (res != NULL)
378 {
379 *res = NUL;
380 p = s;
381 while (*p != NUL)
382 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000383 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384 {
385 c = (*mb_ptr2char)(p);
386 if (vim_isprintc(c))
387 STRNCAT(res, p, l); /* append printable multi-byte char */
388 else
389 transchar_hex(res + STRLEN(res), c);
390 p += l;
391 }
392 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393 STRCAT(res, transchar_byte(*p++));
394 }
395 }
396 return res;
397}
398#endif
399
400#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
401/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000402 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
403 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
405 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000406 */
407 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100408str_foldcase(
409 char_u *str,
410 int orglen,
411 char_u *buf,
412 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413{
414 garray_T ga;
415 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417
418#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
419#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000420#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
421#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000423 /* Copy "str" into "buf" or allocated memory, unmodified. */
424 if (buf == NULL)
425 {
426 ga_init2(&ga, 1, 10);
427 if (ga_grow(&ga, len + 1) == FAIL)
428 return NULL;
429 mch_memmove(ga.ga_data, str, (size_t)len);
430 ga.ga_len = len;
431 }
432 else
433 {
434 if (len >= buflen) /* Ugly! */
435 len = buflen - 1;
436 mch_memmove(buf, str, (size_t)len);
437 }
438 if (buf == NULL)
439 GA_CHAR(len) = NUL;
440 else
441 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
443 /* Make each character lower case. */
444 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000445 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000447 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448 {
449 if (enc_utf8)
450 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000451 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100452 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000453 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000454
Bram Moolenaarb9839212008-06-28 11:03:50 +0000455 /* Only replace the character when it is not an invalid
456 * sequence (ASCII character or more than one byte) and
457 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100458 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000459 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100460 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461
462 /* If the byte length changes need to shift the following
463 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100464 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000467 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100468 if (buf == NULL
469 ? ga_grow(&ga, nlen - olen + 1) == FAIL
470 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 {
472 /* out of memory, keep old char */
473 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100474 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000476 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100477 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000479 if (buf == NULL)
480 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100481 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
482 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000483 }
484 else
485 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100486 STRMOVE(buf + i + nlen, buf + i + olen);
487 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000488 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000489 }
490 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000491 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492 }
493 }
494 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000495 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 }
497 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000498 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000499 if (buf == NULL)
500 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
501 else
502 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000503 ++i;
504 }
505 }
506
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000507 if (buf == NULL)
508 return (char_u *)ga.ga_data;
509 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000510}
511#endif
512
513/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100514 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100516 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000517 * Does NOT work for multi-byte characters, c must be <= 255.
518 * Also doesn't work for the first byte of a multi-byte, "c" must be a
519 * character!
520 */
521static char_u transchar_buf[7];
522
523 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100524transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000525{
526 int i;
527
528 i = 0;
529 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
530 {
531 transchar_buf[0] = '~';
532 transchar_buf[1] = '@';
533 i = 2;
534 c = K_SECOND(c);
535 }
536
537 if ((!chartab_initialized && (
538#ifdef EBCDIC
539 (c >= 64 && c < 255)
540#else
541 (c >= ' ' && c <= '~')
542#endif
543#ifdef FEAT_FKMAP
Bram Moolenaaree2615a2016-07-02 18:25:34 +0200544 || (p_altkeymap && F_ischar(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545#endif
546 )) || (c < 256 && vim_isprintc_strict(c)))
547 {
548 /* printable character */
549 transchar_buf[i] = c;
550 transchar_buf[i + 1] = NUL;
551 }
552 else
553 transchar_nonprint(transchar_buf + i, c);
554 return transchar_buf;
555}
556
Bram Moolenaar071d4272004-06-13 20:20:40 +0000557/*
558 * Like transchar(), but called with a byte instead of a character. Checks
559 * for an illegal UTF-8 byte.
560 */
561 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100562transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563{
564 if (enc_utf8 && c >= 0x80)
565 {
566 transchar_nonprint(transchar_buf, c);
567 return transchar_buf;
568 }
569 return transchar(c);
570}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000571
572/*
573 * Convert non-printable character to two or more printable characters in
574 * "buf[]". "buf" needs to be able to hold five bytes.
575 * Does NOT work for multi-byte characters, c must be <= 255.
576 */
577 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100578transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000579{
580 if (c == NL)
581 c = NUL; /* we use newline in place of a NUL */
582 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
583 c = NL; /* we use CR in place of NL in this case */
584
585 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
586 transchar_hex(buf, c);
587
588#ifdef EBCDIC
589 /* For EBCDIC only the characters 0-63 and 255 are not printable */
590 else if (CtrlChar(c) != 0 || c == DEL)
591#else
592 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
593#endif
594 {
595 buf[0] = '^';
596#ifdef EBCDIC
597 if (c == DEL)
598 buf[1] = '?'; /* DEL displayed as ^? */
599 else
600 buf[1] = CtrlChar(c);
601#else
602 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
603#endif
604
605 buf[2] = NUL;
606 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000607 else if (enc_utf8 && c >= 0x80)
608 {
609 transchar_hex(buf, c);
610 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000611#ifndef EBCDIC
612 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
613 {
614 buf[0] = '|';
615 buf[1] = c - 0x80;
616 buf[2] = NUL;
617 }
618#else
619 else if (c < 64)
620 {
621 buf[0] = '~';
622 buf[1] = MetaChar(c);
623 buf[2] = NUL;
624 }
625#endif
626 else /* 0x80 - 0x9f and 0xff */
627 {
628 /*
629 * TODO: EBCDIC I don't know what to do with this chars, so I display
630 * them as '~?' for now
631 */
632 buf[0] = '~';
633#ifdef EBCDIC
634 buf[1] = '?'; /* 0xff displayed as ~? */
635#else
636 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
637#endif
638 buf[2] = NUL;
639 }
640}
641
642 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100643transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000644{
645 int i = 0;
646
647 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648 if (c > 255)
649 {
650 buf[++i] = nr2hex((unsigned)c >> 12);
651 buf[++i] = nr2hex((unsigned)c >> 8);
652 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000653 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000654 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000655 buf[++i] = '>';
656 buf[++i] = NUL;
657}
658
659/*
660 * Convert the lower 4 bits of byte "c" to its hex character.
661 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
662 * function key 1.
663 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000664 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100665nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666{
667 if ((c & 0xf) <= 9)
668 return (c & 0xf) + '0';
669 return (c & 0xf) - 10 + 'a';
670}
671
672/*
673 * Return number of display cells occupied by byte "b".
674 * Caller must make sure 0 <= b <= 255.
675 * For multi-byte mode "b" must be the first byte of a character.
676 * A TAB is counted as two cells: "^I".
677 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
678 * cells depends on further bytes.
679 */
680 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100681byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000682{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683 if (enc_utf8 && b >= 0x80)
684 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100685 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000686}
687
688/*
689 * Return number of display cells occupied by character "c".
690 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
691 * A TAB is counted as two cells: "^I" or four: "<09>".
692 */
693 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100694char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695{
696 if (IS_SPECIAL(c))
697 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000698 if (c >= 0x80)
699 {
700 /* UTF-8: above 0x80 need to check the value */
701 if (enc_utf8)
702 return utf_char2cells(c);
703 /* DBCS: double-byte means double-width, except for euc-jp with first
704 * byte 0x8e */
705 if (enc_dbcs != 0 && c >= 0x100)
706 {
707 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
708 return 1;
709 return 2;
710 }
711 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100712 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713}
714
715/*
716 * Return number of display cells occupied by character at "*p".
717 * A TAB is counted as two cells: "^I" or four: "<09>".
718 */
719 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100720ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000721{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000722 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
723 if (enc_utf8 && *p >= 0x80)
724 return utf_ptr2cells(p);
725 /* For DBCS we can tell the cell count from the first byte. */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100726 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727}
728
729/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100730 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 * counting TABs as two characters: "^I".
732 */
733 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100734vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000735{
736 return vim_strnsize(s, (int)MAXCOL);
737}
738
739/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100740 * Return the number of character cells string "s[len]" will take on the
741 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 */
743 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100744vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000745{
746 int size = 0;
747
748 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000749 if (has_mbyte)
750 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000751 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000752
753 size += ptr2cells(s);
754 s += l;
755 len -= l - 1;
756 }
757 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000758 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100759
Bram Moolenaar071d4272004-06-13 20:20:40 +0000760 return size;
761}
762
763/*
764 * Return the number of characters 'c' will take on the screen, taking
765 * into account the size of a tab.
766 * Use a define to make it fast, this is used very often!!!
767 * Also see getvcol() below.
768 */
769
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#ifdef FEAT_VARTABS
771# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
772 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
773 { \
774 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
775 } \
776 else \
777 return ptr2cells(p);
778#else
779# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000780 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
781 { \
782 int ts; \
783 ts = (buf)->b_p_ts; \
784 return (int)(ts - (col % ts)); \
785 } \
786 else \
787 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200788#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789
Bram Moolenaar071d4272004-06-13 20:20:40 +0000790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
793 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
794}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000795
796#ifdef FEAT_LINEBREAK
797 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100798win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000799{
800 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
801}
802#endif
803
804/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200805 * Return the number of characters the string 's' will take on the screen,
806 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 */
808 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100809linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000810{
Bram Moolenaardc536092010-07-18 15:45:49 +0200811 return linetabsize_col(0, s);
812}
813
814/*
815 * Like linetabsize(), but starting at column "startcol".
816 */
817 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100818linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200819{
820 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822
823 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200824 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 return (int)col;
826}
827
828/*
829 * Like linetabsize(), but for a given window instead of the current one.
830 */
831 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100832win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833{
834 colnr_T col = 0;
835 char_u *s;
836
Bram Moolenaar597a4222014-06-25 14:39:50 +0200837 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100838 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200839 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840 return (int)col;
841}
842
843/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000844 * Return TRUE if 'c' is a normal identifier character:
845 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 */
847 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100848vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000849{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100850 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851}
852
853/*
854 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100855 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000856 * For multi-byte characters mb_get_class() is used (builtin rules).
857 */
858 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100859vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100861 return vim_iswordc_buf(c, curbuf);
862}
863
864 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100865vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100866{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000867 if (c >= 0x100)
868 {
869 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000870 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000871 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100872 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100873 return FALSE;
874 }
875 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000876}
877
878/*
879 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
880 */
881 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100882vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000883{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100884 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885}
886
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100888vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100890 int c = *p;
891
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100892 if (has_mbyte && MB_BYTE2LEN(c) > 1)
893 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100894 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000895}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000896
897/*
898 * return TRUE if 'c' is a valid file-name character
899 * Assume characters above 0x100 are valid (multi-byte).
900 */
901 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100902vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000903{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100904 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000905}
906
907/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000908 * return TRUE if 'c' is a valid file-name character or a wildcard character
909 * Assume characters above 0x100 are valid (multi-byte).
910 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
911 * returns false.
912 */
913 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100914vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000915{
916 char_u buf[2];
917
918 buf[0] = (char_u)c;
919 buf[1] = NUL;
920 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
921}
922
923/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200924 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000925 * Assume characters above 0x100 are printable (multi-byte), except for
926 * Unicode.
927 */
928 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100929vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000930{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000931 if (enc_utf8 && c >= 0x100)
932 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100933 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934}
935
936/*
937 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
938 * byte of a double-byte character.
939 */
940 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100941vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000942{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000943 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
944 return FALSE;
945 if (enc_utf8 && c >= 0x100)
946 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100947 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000948}
949
950/*
951 * like chartabsize(), but also check for line breaks on the screen
952 */
953 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100954lbr_chartabsize(
955 char_u *line UNUSED, /* start of the line */
956 unsigned char *s,
957 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958{
959#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +0200960 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000961 {
962#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000963 if (curwin->w_p_wrap)
964 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000965 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
966#ifdef FEAT_LINEBREAK
967 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200968 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000969#endif
970}
971
972/*
973 * Call lbr_chartabsize() and advance the pointer.
974 */
975 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100976lbr_chartabsize_adv(
977 char_u *line, /* start of the line */
978 char_u **s,
979 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000980{
981 int retval;
982
Bram Moolenaar597a4222014-06-25 14:39:50 +0200983 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100984 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000985 return retval;
986}
987
988/*
989 * This function is used very often, keep it fast!!!!
990 *
991 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
992 * string at start of line. Warning: *headp is only set if it's a non-zero
993 * value, init to 0 before calling.
994 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100996win_lbr_chartabsize(
997 win_T *wp,
998 char_u *line UNUSED, /* start of the line */
999 char_u *s,
1000 colnr_T col,
1001 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001002{
1003#ifdef FEAT_LINEBREAK
1004 int c;
1005 int size;
1006 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001007 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001008 colnr_T colmax;
1009 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001010 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001011 int numberextra;
1012 char_u *ps;
1013 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001014 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001015
1016 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001017 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001019 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001020#endif
1021 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001022 if (wp->w_p_wrap)
1023 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001024 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1025 }
1026
1027#ifdef FEAT_LINEBREAK
1028 /*
1029 * First get normal size, without 'linebreak'
1030 */
1031 size = win_chartabsize(wp, s, col);
1032 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001033 if (tab_corr)
1034 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001035
1036 /*
1037 * If 'linebreak' set check at a blank before a non-blank if the line
1038 * needs a break here
1039 */
1040 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001041 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001042 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001043 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001044 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001045 {
1046 /*
1047 * Count all characters from first non-blank after a blank up to next
1048 * non-blank after a blank.
1049 */
1050 numberextra = win_col_off(wp);
1051 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001052 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001053 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001054 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001055 colmax += col_adj;
1056 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001057 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001058 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001059 }
1060
Bram Moolenaar071d4272004-06-13 20:20:40 +00001061 for (;;)
1062 {
1063 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001064 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001065 c = *s;
1066 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001067 && (VIM_ISBREAK(c)
1068 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001069 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 break;
1071
1072 col2 += win_chartabsize(wp, s, col2);
1073 if (col2 >= colmax) /* doesn't fit */
1074 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001075 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 tab_corr = FALSE;
1077 break;
1078 }
1079 }
1080 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1082 && wp->w_p_wrap && in_win_border(wp, col))
1083 {
1084 ++size; /* Count the ">" in the last column. */
1085 mb_added = 1;
1086 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001087
1088 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001089 * May have to add something for 'breakindent' and/or 'showbreak'
1090 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001091 * Set *headp to the size of what we add.
1092 */
1093 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001094 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001095 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001096 colnr_T sbrlen = 0;
1097 int numberwidth = win_col_off(wp);
1098
1099 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001101 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001103 col -= wp->w_width;
1104 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001105 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001106 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001107 if (*p_sbr != NUL)
1108 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001110 if (col >= sbrlen)
1111 col -= sbrlen;
1112 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001113 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001114 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001115 else if (col > 0 && numberextra > 0)
1116 col += numberwidth - win_col_off2(wp);
1117
1118 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001119 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001120 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001121 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001122 added = 0;
1123 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001124 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001125 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001126 {
1127 /* calculate effective window width */
Bram Moolenaar02631462017-09-22 15:20:32 +02001128 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
1129 int prev_width = col ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001130 if (width == 0)
Bram Moolenaar02631462017-09-22 15:20:32 +02001131 width = (colnr_T)wp->w_width;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001132 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1133 if ((size - prev_width) % width)
1134 /* wrapped, add another length of 'sbr' */
1135 added += vim_strsize(p_sbr);
1136 }
1137 else
1138 added += vim_strsize(p_sbr);
1139 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001140 if (wp->w_p_bri)
1141 added += get_breakindent_win(wp, line);
1142
Bram Moolenaar95765082014-08-24 21:19:25 +02001143 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001144 if (col != 0)
1145 added = 0;
1146 }
1147 }
1148 if (headp != NULL)
1149 *headp = added + mb_added;
1150 return size;
1151#endif
1152}
1153
Bram Moolenaar071d4272004-06-13 20:20:40 +00001154/*
1155 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1156 * 'wrap' is on. This means we need to check for a double-byte character that
1157 * doesn't fit at the end of the screen line.
1158 */
1159 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001160win_nolbr_chartabsize(
1161 win_T *wp,
1162 char_u *s,
1163 colnr_T col,
1164 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165{
1166 int n;
1167
1168 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1169 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001170# ifdef FEAT_VARTABS
1171 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1172 wp->w_buffer->b_p_vts_array);
1173# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001174 n = wp->w_buffer->b_p_ts;
1175 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001176# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177 }
1178 n = ptr2cells(s);
1179 /* Add one cell for a double-width character in the last column of the
1180 * window, displayed with a ">". */
1181 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1182 {
1183 if (headp != NULL)
1184 *headp = 1;
1185 return 3;
1186 }
1187 return n;
1188}
1189
1190/*
1191 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1192 * "wp".
1193 */
1194 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001195in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001196{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001197 int width1; /* width of first line (after line number) */
1198 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001199
Bram Moolenaar071d4272004-06-13 20:20:40 +00001200 if (wp->w_width == 0) /* there is no border */
1201 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001202 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001203 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001204 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001205 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206 return TRUE;
1207 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001208 if (width2 <= 0)
1209 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001210 return ((vcol - width1) % width2 == width2 - 1);
1211}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001212
1213/*
1214 * Get virtual column number of pos.
1215 * start: on the first position of this character (TAB, ctrl)
1216 * cursor: where the cursor is on this character (first char, except for TAB)
1217 * end: on the last position of this character (TAB, ctrl)
1218 *
1219 * This is used very often, keep it fast!
1220 */
1221 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001222getvcol(
1223 win_T *wp,
1224 pos_T *pos,
1225 colnr_T *start,
1226 colnr_T *cursor,
1227 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001228{
1229 colnr_T vcol;
1230 char_u *ptr; /* points to current char */
1231 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001232 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001233 int incr;
1234 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001235#ifdef FEAT_VARTABS
1236 int *vts = wp->w_buffer->b_p_vts_array;
1237#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238 int ts = wp->w_buffer->b_p_ts;
1239 int c;
1240
1241 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001242 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001243 if (pos->col == MAXCOL)
1244 posptr = NULL; /* continue until the NUL */
1245 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001246 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001247 /* Special check for an empty line, which can happen on exit, when
1248 * ml_get_buf() always returns an empty string. */
1249 if (*ptr == NUL)
1250 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001251 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001252 if (has_mbyte)
1253 /* always start on the first byte */
1254 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001255 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001256
1257 /*
1258 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001259 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1260 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001261 * Also use this when 'list' is set but tabs take their normal size.
1262 */
1263 if ((!wp->w_p_list || lcs_tab1 != NUL)
1264#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001265 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001266#endif
1267 )
1268 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 for (;;)
1270 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001272 c = *ptr;
1273 /* make sure we don't go past the end of the line */
1274 if (c == NUL)
1275 {
1276 incr = 1; /* NUL at end of line only takes one column */
1277 break;
1278 }
1279 /* A tab gets expanded, depending on the current column */
1280 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001281#ifdef FEAT_VARTABS
1282 incr = tabstop_padding(vcol, ts, vts);
1283#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001284 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001285#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001286 else
1287 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001288 if (has_mbyte)
1289 {
1290 /* For utf-8, if the byte is >= 0x80, need to look at
1291 * further bytes to find the cell width. */
1292 if (enc_utf8 && c >= 0x80)
1293 incr = utf_ptr2cells(ptr);
1294 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001295 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001296
1297 /* If a double-cell char doesn't fit at the end of a line
1298 * it wraps to the next line, it's like this char is three
1299 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001300 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1301 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001302 {
1303 ++incr;
1304 head = 1;
1305 }
1306 }
1307 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001308 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001309 }
1310
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001311 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001312 break;
1313
1314 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001315 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001316 }
1317 }
1318 else
1319 {
1320 for (;;)
1321 {
1322 /* A tab gets expanded, depending on the current column */
1323 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001324 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001325 /* make sure we don't go past the end of the line */
1326 if (*ptr == NUL)
1327 {
1328 incr = 1; /* NUL at end of line only takes one column */
1329 break;
1330 }
1331
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001332 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001333 break;
1334
1335 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001336 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001337 }
1338 }
1339 if (start != NULL)
1340 *start = vcol + head;
1341 if (end != NULL)
1342 *end = vcol + incr - 1;
1343 if (cursor != NULL)
1344 {
1345 if (*ptr == TAB
1346 && (State & NORMAL)
1347 && !wp->w_p_list
1348 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001349 && !(VIsual_active
1350 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001351 )
1352 *cursor = vcol + incr - 1; /* cursor at end */
1353 else
1354 *cursor = vcol + head; /* cursor at start */
1355 }
1356}
1357
1358/*
1359 * Get virtual cursor column in the current window, pretending 'list' is off.
1360 */
1361 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001362getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001363{
1364 int list_save = curwin->w_p_list;
1365 colnr_T vcol;
1366
1367 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001368#ifdef FEAT_VIRTUALEDIT
1369 if (posp->coladd)
1370 getvvcol(curwin, posp, NULL, &vcol, NULL);
1371 else
1372#endif
1373 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374 curwin->w_p_list = list_save;
1375 return vcol;
1376}
1377
1378#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1379/*
1380 * Get virtual column in virtual mode.
1381 */
1382 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001383getvvcol(
1384 win_T *wp,
1385 pos_T *pos,
1386 colnr_T *start,
1387 colnr_T *cursor,
1388 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001389{
1390 colnr_T col;
1391 colnr_T coladd;
1392 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001393 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001394
1395 if (virtual_active())
1396 {
1397 /* For virtual mode, only want one value */
1398 getvcol(wp, pos, &col, NULL, NULL);
1399
1400 coladd = pos->coladd;
1401 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001402 /* Cannot put the cursor on part of a wide character. */
1403 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001404 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001405 {
1406 int c = (*mb_ptr2char)(ptr + pos->col);
1407
1408 if (c != TAB && vim_isprintc(c))
1409 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001410 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001411 if (coladd > endadd) /* past end of line */
1412 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001413 else
1414 coladd = 0;
1415 }
1416 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001417 col += coladd;
1418 if (start != NULL)
1419 *start = col;
1420 if (cursor != NULL)
1421 *cursor = col;
1422 if (end != NULL)
1423 *end = col + endadd;
1424 }
1425 else
1426 getvcol(wp, pos, start, cursor, end);
1427}
1428#endif
1429
Bram Moolenaar071d4272004-06-13 20:20:40 +00001430/*
1431 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1432 * Used for Visual block mode.
1433 */
1434 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001435getvcols(
1436 win_T *wp,
1437 pos_T *pos1,
1438 pos_T *pos2,
1439 colnr_T *left,
1440 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001441{
1442 colnr_T from1, from2, to1, to2;
1443
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001444 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001445 {
1446 getvvcol(wp, pos1, &from1, NULL, &to1);
1447 getvvcol(wp, pos2, &from2, NULL, &to2);
1448 }
1449 else
1450 {
1451 getvvcol(wp, pos2, &from1, NULL, &to1);
1452 getvvcol(wp, pos1, &from2, NULL, &to2);
1453 }
1454 if (from2 < from1)
1455 *left = from2;
1456 else
1457 *left = from1;
1458 if (to2 > to1)
1459 {
1460 if (*p_sel == 'e' && from2 - 1 >= to1)
1461 *right = from2 - 1;
1462 else
1463 *right = to2;
1464 }
1465 else
1466 *right = to1;
1467}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001468
1469/*
1470 * skipwhite: skip over ' ' and '\t'.
1471 */
1472 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001473skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001474{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001475 char_u *p = q;
1476
Bram Moolenaar1c465442017-03-12 20:10:05 +01001477 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001478 ++p;
1479 return p;
1480}
1481
1482/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001483 * getwhitecols: return the number of whitespace
1484 * columns (bytes) at the start of a given line
1485 */
1486 int
1487getwhitecols_curline()
1488{
1489 return getwhitecols(ml_get_curline());
1490}
1491
1492 int
1493getwhitecols(char_u *p)
1494{
1495 return skipwhite(p) - p;
1496}
1497
1498/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001499 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001500 */
1501 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001502skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001503{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001504 char_u *p = q;
1505
Bram Moolenaar071d4272004-06-13 20:20:40 +00001506 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1507 ++p;
1508 return p;
1509}
1510
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001511#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001512/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001513 * skip over binary digits
1514 */
1515 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001516skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001517{
1518 char_u *p = q;
1519
1520 while (vim_isbdigit(*p)) /* skip to next non-digit */
1521 ++p;
1522 return p;
1523}
1524
1525/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001526 * skip over digits and hex characters
1527 */
1528 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001529skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001530{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001531 char_u *p = q;
1532
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001533 while (vim_isxdigit(*p)) /* skip to next non-digit */
1534 ++p;
1535 return p;
1536}
1537#endif
1538
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001539/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001540 * skip to bin digit (or NUL after the string)
1541 */
1542 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001543skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001544{
1545 char_u *p = q;
1546
1547 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1548 ++p;
1549 return p;
1550}
1551
1552/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001553 * skip to digit (or NUL after the string)
1554 */
1555 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001556skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001557{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001558 char_u *p = q;
1559
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001560 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1561 ++p;
1562 return p;
1563}
1564
1565/*
1566 * skip to hex character (or NUL after the string)
1567 */
1568 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001569skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001570{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001571 char_u *p = q;
1572
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001573 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1574 ++p;
1575 return p;
1576}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001577
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578/*
1579 * Variant of isdigit() that can handle characters > 0x100.
1580 * We don't use isdigit() here, because on some systems it also considers
1581 * superscript 1 to be a digit.
1582 * Use the VIM_ISDIGIT() macro for simple arguments.
1583 */
1584 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001585vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001586{
1587 return (c >= '0' && c <= '9');
1588}
1589
1590/*
1591 * Variant of isxdigit() that can handle characters > 0x100.
1592 * We don't use isxdigit() here, because on some systems it also considers
1593 * superscript 1 to be a digit.
1594 */
1595 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001596vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001597{
1598 return (c >= '0' && c <= '9')
1599 || (c >= 'a' && c <= 'f')
1600 || (c >= 'A' && c <= 'F');
1601}
1602
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001603/*
1604 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1605 * characters > 0x100.
1606 */
1607 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001608vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001609{
1610 return (c == '0' || c == '1');
1611}
1612
Bram Moolenaar78622822005-08-23 21:00:13 +00001613/*
1614 * Vim's own character class functions. These exist because many library
1615 * islower()/toupper() etc. do not work properly: they crash when used with
1616 * invalid values or can't handle latin1 when the locale is C.
1617 * Speed is most important here.
1618 */
1619#define LATIN1LOWER 'l'
1620#define LATIN1UPPER 'U'
1621
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001622static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001623static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1624static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001625
1626 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001627vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001628{
1629 if (c <= '@')
1630 return FALSE;
1631 if (c >= 0x80)
1632 {
1633 if (enc_utf8)
1634 return utf_islower(c);
1635 if (c >= 0x100)
1636 {
1637#ifdef HAVE_ISWLOWER
1638 if (has_mbyte)
1639 return iswlower(c);
1640#endif
1641 /* islower() can't handle these chars and may crash */
1642 return FALSE;
1643 }
1644 if (enc_latin1like)
1645 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1646 }
1647 return islower(c);
1648}
1649
1650 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001651vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001652{
1653 if (c <= '@')
1654 return FALSE;
1655 if (c >= 0x80)
1656 {
1657 if (enc_utf8)
1658 return utf_isupper(c);
1659 if (c >= 0x100)
1660 {
1661#ifdef HAVE_ISWUPPER
1662 if (has_mbyte)
1663 return iswupper(c);
1664#endif
1665 /* islower() can't handle these chars and may crash */
1666 return FALSE;
1667 }
1668 if (enc_latin1like)
1669 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1670 }
1671 return isupper(c);
1672}
1673
1674 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001675vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001676{
1677 if (c <= '@')
1678 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001679 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001680 {
1681 if (enc_utf8)
1682 return utf_toupper(c);
1683 if (c >= 0x100)
1684 {
1685#ifdef HAVE_TOWUPPER
1686 if (has_mbyte)
1687 return towupper(c);
1688#endif
1689 /* toupper() can't handle these chars and may crash */
1690 return c;
1691 }
1692 if (enc_latin1like)
1693 return latin1upper[c];
1694 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001695 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1696 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001697 return TOUPPER_LOC(c);
1698}
1699
1700 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001701vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001702{
1703 if (c <= '@')
1704 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001705 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001706 {
1707 if (enc_utf8)
1708 return utf_tolower(c);
1709 if (c >= 0x100)
1710 {
1711#ifdef HAVE_TOWLOWER
1712 if (has_mbyte)
1713 return towlower(c);
1714#endif
1715 /* tolower() can't handle these chars and may crash */
1716 return c;
1717 }
1718 if (enc_latin1like)
1719 return latin1lower[c];
1720 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001721 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1722 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001723 return TOLOWER_LOC(c);
1724}
Bram Moolenaar78622822005-08-23 21:00:13 +00001725
Bram Moolenaar071d4272004-06-13 20:20:40 +00001726/*
1727 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1728 */
1729 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001730skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001731{
1732 while (*p != ' ' && *p != '\t' && *p != NUL)
1733 ++p;
1734 return p;
1735}
1736
Bram Moolenaar071d4272004-06-13 20:20:40 +00001737/*
1738 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1739 */
1740 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001741skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001742{
1743 while (*p != ' ' && *p != '\t' && *p != NUL)
1744 {
1745 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1746 ++p;
1747 ++p;
1748 }
1749 return p;
1750}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001751
1752/*
1753 * Getdigits: Get a number from a string and skip over it.
1754 * Note: the argument is a pointer to a char_u pointer!
1755 */
1756 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001757getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001758{
1759 char_u *p;
1760 long retval;
1761
1762 p = *pp;
1763 retval = atol((char *)p);
1764 if (*p == '-') /* skip negative sign */
1765 ++p;
1766 p = skipdigits(p); /* skip to next non-digit */
1767 *pp = p;
1768 return retval;
1769}
1770
1771/*
1772 * Return TRUE if "lbuf" is empty or only contains blanks.
1773 */
1774 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001775vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001776{
1777 char_u *p;
1778
1779 p = skipwhite(lbuf);
1780 return (*p == NUL || *p == '\r' || *p == '\n');
1781}
1782
1783/*
1784 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001785 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1786 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001787 * 0 decimal
1788 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001789 * 'B' bin
1790 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001791 * 'X' hex
1792 * 'x' hex
1793 * If "len" is not NULL, the length of the number in characters is returned.
1794 * If "nptr" is not NULL, the signed result is returned in it.
1795 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001796 * If "what" contains STR2NR_BIN recognize binary numbers
1797 * If "what" contains STR2NR_OCT recognize octal numbers
1798 * If "what" contains STR2NR_HEX recognize hex numbers
1799 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaarce157752017-10-28 16:07:33 +02001800 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001801 */
1802 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001803vim_str2nr(
1804 char_u *start,
1805 int *prep, /* return: type of number 0 = decimal, 'x'
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001806 or 'X' is hex, '0' = octal, 'b' or 'B'
1807 is bin */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001808 int *len, /* return: detected length of number */
1809 int what, /* what numbers to recognize */
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001810 varnumber_T *nptr, /* return: signed result */
1811 uvarnumber_T *unptr, /* return: unsigned result */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001812 int maxlen) /* max length of string to check */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001813{
1814 char_u *ptr = start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001815 int pre = 0; /* default is decimal */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001816 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001817 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001818 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001819
1820 if (ptr[0] == '-')
1821 {
1822 negative = TRUE;
1823 ++ptr;
1824 }
1825
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001826 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001827 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1828 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001829 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001830 pre = ptr[1];
1831 if ((what & STR2NR_HEX)
1832 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1833 && (maxlen == 0 || maxlen > 2))
1834 /* hexadecimal */
1835 ptr += 2;
1836 else if ((what & STR2NR_BIN)
1837 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1838 && (maxlen == 0 || maxlen > 2))
1839 /* binary */
1840 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841 else
1842 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001843 /* decimal or octal, default is decimal */
1844 pre = 0;
1845 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001846 {
1847 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001848 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001849 {
1850 if (ptr[n] > '7')
1851 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001852 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001853 break;
1854 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001855 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001856 }
1857 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001858 }
1859 }
1860
1861 /*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001862 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1863 */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001864 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001865 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1866 {
1867 /* bin */
1868 if (pre != 0)
1869 n += 2; /* skip over "0b" */
1870 while ('0' <= *ptr && *ptr <= '1')
1871 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001872 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001873 if (un <= UVARNUM_MAX / 2)
1874 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001875 else
1876 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001877 ++ptr;
1878 if (n++ == maxlen)
1879 break;
1880 }
1881 }
1882 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001883 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001884 /* octal */
1885 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001886 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001887 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001888 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001889 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1890 else
1891 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001892 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001893 if (n++ == maxlen)
1894 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001895 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001896 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001897 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001898 {
1899 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001900 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001901 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001902 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001903 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001904 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001905 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001906 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1907 else
1908 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001909 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001910 if (n++ == maxlen)
1911 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001912 }
1913 }
1914 else
1915 {
1916 /* decimal */
1917 while (VIM_ISDIGIT(*ptr))
1918 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001919 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1920
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001921 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001922 if (un < UVARNUM_MAX / 10
1923 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1924 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001925 else
1926 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001927 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001928 if (n++ == maxlen)
1929 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001930 }
1931 }
1932
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001933 if (prep != NULL)
1934 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001935 if (len != NULL)
1936 *len = (int)(ptr - start);
1937 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001938 {
1939 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001940 {
1941 /* avoid ubsan error for overflow */
1942 if (un > VARNUM_MAX)
1943 *nptr = VARNUM_MIN;
1944 else
1945 *nptr = -(varnumber_T)un;
1946 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001947 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001948 {
1949 if (un > VARNUM_MAX)
1950 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001951 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001952 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001953 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001954 if (unptr != NULL)
1955 *unptr = un;
1956}
1957
1958/*
1959 * Return the value of a single hex character.
1960 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1961 */
1962 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001963hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001964{
1965 if (c >= 'a' && c <= 'f')
1966 return c - 'a' + 10;
1967 if (c >= 'A' && c <= 'F')
1968 return c - 'A' + 10;
1969 return c - '0';
1970}
1971
Bram Moolenaar4033c552017-09-16 20:54:51 +02001972#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001973/*
1974 * Convert two hex characters to a byte.
1975 * Return -1 if one of the characters is not hex.
1976 */
1977 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001978hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001979{
1980 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1981 return -1;
1982 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1983}
1984#endif
1985
1986/*
1987 * Return TRUE if "str" starts with a backslash that should be removed.
1988 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1989 * backslash is not a normal file name character.
1990 * '$' is a valid file name character, we don't remove the backslash before
1991 * it. This means it is not possible to use an environment variable after a
1992 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1993 * Although "\ name" is valid, the backslash in "Program\ files" must be
1994 * removed. Assume a file name doesn't start with a space.
1995 * For multi-byte names, never remove a backslash before a non-ascii
1996 * character, assume that all multi-byte characters are valid file name
1997 * characters.
1998 */
1999 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002000rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002001{
2002#ifdef BACKSLASH_IN_FILENAME
2003 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002004 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002005 && (str[1] == ' '
2006 || (str[1] != NUL
2007 && str[1] != '*'
2008 && str[1] != '?'
2009 && !vim_isfilec(str[1]))));
2010#else
2011 return (str[0] == '\\' && str[1] != NUL);
2012#endif
2013}
2014
2015/*
2016 * Halve the number of backslashes in a file name argument.
2017 * For MS-DOS we only do this if the character after the backslash
2018 * is not a normal file character.
2019 */
2020 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002021backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002022{
2023 for ( ; *p; ++p)
2024 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002025 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002026}
2027
2028/*
2029 * backslash_halve() plus save the result in allocated memory.
2030 */
2031 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002032backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002033{
2034 char_u *res;
2035
2036 res = vim_strsave(p);
2037 if (res == NULL)
2038 return p;
2039 backslash_halve(res);
2040 return res;
2041}
2042
2043#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2044/*
2045 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2046 * The first 64 entries have been added to map control characters defined in
2047 * ascii.h
2048 */
2049static char_u ebcdic2ascii_tab[256] =
2050{
2051 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2052 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2053 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2054 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2055 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2056 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2057 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2058 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2059 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2060 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2061 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2062 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2063 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2064 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2065 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2066 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2067 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2068 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2069 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2070 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2071 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2072 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2073 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2074 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2075 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2076 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2077 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2078 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2079 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2080 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2081 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2082 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2083};
2084
2085/*
2086 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2087 * wanting 7-bit ASCII characters out the other end.
2088 */
2089 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002090ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002091{
2092 int i;
2093
2094 for (i = 0; i < len; i++)
2095 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2096}
2097#endif