blob: 0280954538a9b4bf5f39752f1bd1c66eb1673378 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#ifdef FEAT_MBYTE
Bram Moolenaard7b734a2010-08-12 20:17:02 +020013# if defined(HAVE_WCHAR_H)
14# include <wchar.h> /* for towupper() and towlower() */
15# endif
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010016static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000017#endif
18
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010019static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000020
21static int chartab_initialized = FALSE;
22
23/* b_chartab[] is an array of 32 bytes, each bit representing one of the
24 * characters 0-255. */
25#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
26#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
27#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
28
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010029/* table used below, see init_chartab() for an explanation */
30static char_u g_chartab[256];
31
Bram Moolenaar071d4272004-06-13 20:20:40 +000032/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010033 * Flags for g_chartab[].
34 */
35#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
36#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
37#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
38#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
39
40/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
75 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103#ifdef FEAT_FKMAP
104 if (p_altkeymap)
105 {
106 while (c < YE)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000108 }
109#endif
110 while (c < 256)
111 {
112#ifdef FEAT_MBYTE
113 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
114 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100115 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000116 /* euc-jp characters starting with 0x8e are single width */
117 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100118 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000119 /* other double-byte chars can be printable AND double-width */
120 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100121 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 else
123#endif
124 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100125 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000126 }
127
128#ifdef FEAT_MBYTE
129 /* Assume that every multi-byte char is a filename character. */
130 for (c = 1; c < 256; ++c)
131 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
132 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
133 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100134 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000135#endif
136 }
137
138 /*
139 * Init word char flags all to FALSE
140 */
141 vim_memset(buf->b_chartab, 0, (size_t)32);
142#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000143 if (enc_dbcs != 0)
144 for (c = 0; c < 256; ++c)
145 {
146 /* double-byte characters are probably word characters */
147 if (MB_BYTE2LEN(c) == 2)
148 SET_CHARTAB(buf, c);
149 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000150#endif
151
152#ifdef FEAT_LISP
153 /*
154 * In lisp mode the '-' character is included in keywords.
155 */
156 if (buf->b_p_lisp)
157 SET_CHARTAB(buf, '-');
158#endif
159
160 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
161 * options Each option is a list of characters, character numbers or
162 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
163 */
164 for (i = global ? 0 : 3; i <= 3; ++i)
165 {
166 if (i == 0)
167 p = p_isi; /* first round: 'isident' */
168 else if (i == 1)
169 p = p_isp; /* second round: 'isprint' */
170 else if (i == 2)
171 p = p_isf; /* third round: 'isfname' */
172 else /* i == 3 */
173 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
174
175 while (*p)
176 {
177 tilde = FALSE;
178 do_isalpha = FALSE;
179 if (*p == '^' && p[1] != NUL)
180 {
181 tilde = TRUE;
182 ++p;
183 }
184 if (VIM_ISDIGIT(*p))
185 c = getdigits(&p);
186 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000187#ifdef FEAT_MBYTE
188 if (has_mbyte)
189 c = mb_ptr2char_adv(&p);
190 else
191#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000192 c = *p++;
193 c2 = -1;
194 if (*p == '-' && p[1] != NUL)
195 {
196 ++p;
197 if (VIM_ISDIGIT(*p))
198 c2 = getdigits(&p);
199 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000200#ifdef FEAT_MBYTE
201 if (has_mbyte)
202 c2 = mb_ptr2char_adv(&p);
203 else
204#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000205 c2 = *p++;
206 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000207 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000208 || !(*p == NUL || *p == ','))
209 return FAIL;
210
211 if (c2 == -1) /* not a range */
212 {
213 /*
214 * A single '@' (not "@-@"):
215 * Decide on letters being ID/printable/keyword chars with
216 * standard function isalpha(). This takes care of locale for
217 * single-byte characters).
218 */
219 if (c == '@')
220 {
221 do_isalpha = TRUE;
222 c = 1;
223 c2 = 255;
224 }
225 else
226 c2 = c;
227 }
228 while (c <= c2)
229 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000230 /* Use the MB_ functions here, because isalpha() doesn't
231 * work properly when 'encoding' is "latin1" and the locale is
232 * "C". */
233 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000234#ifdef FEAT_FKMAP
235 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
236#endif
237 )
238 {
239 if (i == 0) /* (re)set ID flag */
240 {
241 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100242 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000243 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100244 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000245 }
246 else if (i == 1) /* (re)set printable */
247 {
248 if ((c < ' '
249#ifndef EBCDIC
250 || c > '~'
251#endif
252#ifdef FEAT_FKMAP
253 || (p_altkeymap
254 && (F_isalpha(c) || F_isdigit(c)))
255#endif
256 )
257#ifdef FEAT_MBYTE
258 /* For double-byte we keep the cell width, so
259 * that we can detect it from the first byte. */
260 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
261#endif
262 )
263 {
264 if (tilde)
265 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100266 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000267 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100268 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000269 }
270 else
271 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100272 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
273 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000274 }
275 }
276 }
277 else if (i == 2) /* (re)set fname flag */
278 {
279 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100280 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000281 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100282 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283 }
284 else /* i == 3 */ /* (re)set keyword flag */
285 {
286 if (tilde)
287 RESET_CHARTAB(buf, c);
288 else
289 SET_CHARTAB(buf, c);
290 }
291 }
292 ++c;
293 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100294
295 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100297 if (c == ',' && *p == NUL)
298 /* Trailing comma is not allowed. */
299 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 }
301 }
302 chartab_initialized = TRUE;
303 return OK;
304}
305
306/*
307 * Translate any special characters in buf[bufsize] in-place.
308 * The result is a string with only printable characters, but if there is not
309 * enough room, not all characters will be translated.
310 */
311 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100312trans_characters(
313 char_u *buf,
314 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000315{
316 int len; /* length of string needing translation */
317 int room; /* room in buffer after string */
318 char_u *trs; /* translated character */
319 int trs_len; /* length of trs[] */
320
321 len = (int)STRLEN(buf);
322 room = bufsize - len;
323 while (*buf != 0)
324 {
325# ifdef FEAT_MBYTE
326 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000327 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328 len -= trs_len;
329 else
330# endif
331 {
332 trs = transchar_byte(*buf);
333 trs_len = (int)STRLEN(trs);
334 if (trs_len > 1)
335 {
336 room -= trs_len - 1;
337 if (room <= 0)
338 return;
339 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
340 }
341 mch_memmove(buf, trs, (size_t)trs_len);
342 --len;
343 }
344 buf += trs_len;
345 }
346}
347
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000348#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
349 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350/*
351 * Translate a string into allocated memory, replacing special chars with
352 * printable chars. Returns NULL when out of memory.
353 */
354 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100355transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356{
357 char_u *res;
358 char_u *p;
359#ifdef FEAT_MBYTE
360 int l, len, c;
361 char_u hexbuf[11];
362#endif
363
364#ifdef FEAT_MBYTE
365 if (has_mbyte)
366 {
367 /* Compute the length of the result, taking account of unprintable
368 * multi-byte characters. */
369 len = 0;
370 p = s;
371 while (*p != NUL)
372 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000373 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000374 {
375 c = (*mb_ptr2char)(p);
376 p += l;
377 if (vim_isprintc(c))
378 len += l;
379 else
380 {
381 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000382 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000383 }
384 }
385 else
386 {
387 l = byte2cells(*p++);
388 if (l > 0)
389 len += l;
390 else
391 len += 4; /* illegal byte sequence */
392 }
393 }
394 res = alloc((unsigned)(len + 1));
395 }
396 else
397#endif
398 res = alloc((unsigned)(vim_strsize(s) + 1));
399 if (res != NULL)
400 {
401 *res = NUL;
402 p = s;
403 while (*p != NUL)
404 {
405#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000406 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000407 {
408 c = (*mb_ptr2char)(p);
409 if (vim_isprintc(c))
410 STRNCAT(res, p, l); /* append printable multi-byte char */
411 else
412 transchar_hex(res + STRLEN(res), c);
413 p += l;
414 }
415 else
416#endif
417 STRCAT(res, transchar_byte(*p++));
418 }
419 }
420 return res;
421}
422#endif
423
424#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
425/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000426 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
427 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000428 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
429 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000430 */
431 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100432str_foldcase(
433 char_u *str,
434 int orglen,
435 char_u *buf,
436 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000437{
438 garray_T ga;
439 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000440 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441
442#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
443#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000444#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
445#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000447 /* Copy "str" into "buf" or allocated memory, unmodified. */
448 if (buf == NULL)
449 {
450 ga_init2(&ga, 1, 10);
451 if (ga_grow(&ga, len + 1) == FAIL)
452 return NULL;
453 mch_memmove(ga.ga_data, str, (size_t)len);
454 ga.ga_len = len;
455 }
456 else
457 {
458 if (len >= buflen) /* Ugly! */
459 len = buflen - 1;
460 mch_memmove(buf, str, (size_t)len);
461 }
462 if (buf == NULL)
463 GA_CHAR(len) = NUL;
464 else
465 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000466
467 /* Make each character lower case. */
468 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000469 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000470 {
471#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000472 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000473 {
474 if (enc_utf8)
475 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000476 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100477 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000478 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000479
Bram Moolenaarb9839212008-06-28 11:03:50 +0000480 /* Only replace the character when it is not an invalid
481 * sequence (ASCII character or more than one byte) and
482 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100483 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000484 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100485 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000486
487 /* If the byte length changes need to shift the following
488 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100489 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100491 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000492 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100493 if (buf == NULL
494 ? ga_grow(&ga, nlen - olen + 1) == FAIL
495 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 {
497 /* out of memory, keep old char */
498 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100499 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000501 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100502 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000503 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000504 if (buf == NULL)
505 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100506 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
507 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000508 }
509 else
510 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100511 STRMOVE(buf + i + nlen, buf + i + olen);
512 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000513 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000514 }
515 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000516 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000517 }
518 }
519 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000520 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000521 }
522 else
523#endif
524 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000525 if (buf == NULL)
526 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
527 else
528 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000529 ++i;
530 }
531 }
532
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000533 if (buf == NULL)
534 return (char_u *)ga.ga_data;
535 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000536}
537#endif
538
539/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100540 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000541 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100542 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000543 * Does NOT work for multi-byte characters, c must be <= 255.
544 * Also doesn't work for the first byte of a multi-byte, "c" must be a
545 * character!
546 */
547static char_u transchar_buf[7];
548
549 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100550transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000551{
552 int i;
553
554 i = 0;
555 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
556 {
557 transchar_buf[0] = '~';
558 transchar_buf[1] = '@';
559 i = 2;
560 c = K_SECOND(c);
561 }
562
563 if ((!chartab_initialized && (
564#ifdef EBCDIC
565 (c >= 64 && c < 255)
566#else
567 (c >= ' ' && c <= '~')
568#endif
569#ifdef FEAT_FKMAP
Bram Moolenaaree2615a2016-07-02 18:25:34 +0200570 || (p_altkeymap && F_ischar(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000571#endif
572 )) || (c < 256 && vim_isprintc_strict(c)))
573 {
574 /* printable character */
575 transchar_buf[i] = c;
576 transchar_buf[i + 1] = NUL;
577 }
578 else
579 transchar_nonprint(transchar_buf + i, c);
580 return transchar_buf;
581}
582
583#if defined(FEAT_MBYTE) || defined(PROTO)
584/*
585 * Like transchar(), but called with a byte instead of a character. Checks
586 * for an illegal UTF-8 byte.
587 */
588 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100589transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590{
591 if (enc_utf8 && c >= 0x80)
592 {
593 transchar_nonprint(transchar_buf, c);
594 return transchar_buf;
595 }
596 return transchar(c);
597}
598#endif
599
600/*
601 * Convert non-printable character to two or more printable characters in
602 * "buf[]". "buf" needs to be able to hold five bytes.
603 * Does NOT work for multi-byte characters, c must be <= 255.
604 */
605 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100606transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000607{
608 if (c == NL)
609 c = NUL; /* we use newline in place of a NUL */
610 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
611 c = NL; /* we use CR in place of NL in this case */
612
613 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
614 transchar_hex(buf, c);
615
616#ifdef EBCDIC
617 /* For EBCDIC only the characters 0-63 and 255 are not printable */
618 else if (CtrlChar(c) != 0 || c == DEL)
619#else
620 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
621#endif
622 {
623 buf[0] = '^';
624#ifdef EBCDIC
625 if (c == DEL)
626 buf[1] = '?'; /* DEL displayed as ^? */
627 else
628 buf[1] = CtrlChar(c);
629#else
630 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
631#endif
632
633 buf[2] = NUL;
634 }
635#ifdef FEAT_MBYTE
636 else if (enc_utf8 && c >= 0x80)
637 {
638 transchar_hex(buf, c);
639 }
640#endif
641#ifndef EBCDIC
642 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
643 {
644 buf[0] = '|';
645 buf[1] = c - 0x80;
646 buf[2] = NUL;
647 }
648#else
649 else if (c < 64)
650 {
651 buf[0] = '~';
652 buf[1] = MetaChar(c);
653 buf[2] = NUL;
654 }
655#endif
656 else /* 0x80 - 0x9f and 0xff */
657 {
658 /*
659 * TODO: EBCDIC I don't know what to do with this chars, so I display
660 * them as '~?' for now
661 */
662 buf[0] = '~';
663#ifdef EBCDIC
664 buf[1] = '?'; /* 0xff displayed as ~? */
665#else
666 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
667#endif
668 buf[2] = NUL;
669 }
670}
671
672 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100673transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000674{
675 int i = 0;
676
677 buf[0] = '<';
678#ifdef FEAT_MBYTE
679 if (c > 255)
680 {
681 buf[++i] = nr2hex((unsigned)c >> 12);
682 buf[++i] = nr2hex((unsigned)c >> 8);
683 }
684#endif
685 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000686 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687 buf[++i] = '>';
688 buf[++i] = NUL;
689}
690
691/*
692 * Convert the lower 4 bits of byte "c" to its hex character.
693 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
694 * function key 1.
695 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000696 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100697nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000698{
699 if ((c & 0xf) <= 9)
700 return (c & 0xf) + '0';
701 return (c & 0xf) - 10 + 'a';
702}
703
704/*
705 * Return number of display cells occupied by byte "b".
706 * Caller must make sure 0 <= b <= 255.
707 * For multi-byte mode "b" must be the first byte of a character.
708 * A TAB is counted as two cells: "^I".
709 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
710 * cells depends on further bytes.
711 */
712 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100713byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000714{
715#ifdef FEAT_MBYTE
716 if (enc_utf8 && b >= 0x80)
717 return 0;
718#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100719 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000720}
721
722/*
723 * Return number of display cells occupied by character "c".
724 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
725 * A TAB is counted as two cells: "^I" or four: "<09>".
726 */
727 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100728char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000729{
730 if (IS_SPECIAL(c))
731 return char2cells(K_SECOND(c)) + 2;
732#ifdef FEAT_MBYTE
733 if (c >= 0x80)
734 {
735 /* UTF-8: above 0x80 need to check the value */
736 if (enc_utf8)
737 return utf_char2cells(c);
738 /* DBCS: double-byte means double-width, except for euc-jp with first
739 * byte 0x8e */
740 if (enc_dbcs != 0 && c >= 0x100)
741 {
742 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
743 return 1;
744 return 2;
745 }
746 }
747#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100748 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000749}
750
751/*
752 * Return number of display cells occupied by character at "*p".
753 * A TAB is counted as two cells: "^I" or four: "<09>".
754 */
755 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100756ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000757{
758#ifdef FEAT_MBYTE
759 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
760 if (enc_utf8 && *p >= 0x80)
761 return utf_ptr2cells(p);
762 /* For DBCS we can tell the cell count from the first byte. */
763#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100764 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000765}
766
767/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100768 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000769 * counting TABs as two characters: "^I".
770 */
771 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100772vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000773{
774 return vim_strnsize(s, (int)MAXCOL);
775}
776
777/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100778 * Return the number of character cells string "s[len]" will take on the
779 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000780 */
781 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100782vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000783{
784 int size = 0;
785
786 while (*s != NUL && --len >= 0)
787 {
788#ifdef FEAT_MBYTE
789 if (has_mbyte)
790 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000791 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792
793 size += ptr2cells(s);
794 s += l;
795 len -= l - 1;
796 }
797 else
798#endif
799 size += byte2cells(*s++);
800 }
801 return size;
802}
803
804/*
805 * Return the number of characters 'c' will take on the screen, taking
806 * into account the size of a tab.
807 * Use a define to make it fast, this is used very often!!!
808 * Also see getvcol() below.
809 */
810
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200811#ifdef FEAT_VARTABS
812# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
813 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
814 { \
815 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
816 } \
817 else \
818 return ptr2cells(p);
819#else
820# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000821 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
822 { \
823 int ts; \
824 ts = (buf)->b_p_ts; \
825 return (int)(ts - (col % ts)); \
826 } \
827 else \
828 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200829#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100832chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833{
834 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
835}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000836
837#ifdef FEAT_LINEBREAK
838 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100839win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840{
841 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
842}
843#endif
844
845/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200846 * Return the number of characters the string 's' will take on the screen,
847 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000848 */
849 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100850linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851{
Bram Moolenaardc536092010-07-18 15:45:49 +0200852 return linetabsize_col(0, s);
853}
854
855/*
856 * Like linetabsize(), but starting at column "startcol".
857 */
858 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100859linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200860{
861 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200862 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000863
864 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200865 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000866 return (int)col;
867}
868
869/*
870 * Like linetabsize(), but for a given window instead of the current one.
871 */
872 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100873win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000874{
875 colnr_T col = 0;
876 char_u *s;
877
Bram Moolenaar597a4222014-06-25 14:39:50 +0200878 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100879 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200880 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000881 return (int)col;
882}
883
884/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000885 * Return TRUE if 'c' is a normal identifier character:
886 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887 */
888 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100889vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000890{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100891 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000892}
893
894/*
895 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100896 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000897 * For multi-byte characters mb_get_class() is used (builtin rules).
898 */
899 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100900vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000901{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100902 return vim_iswordc_buf(c, curbuf);
903}
904
905 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100906vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100907{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000908 if (c >= 0x100)
909 {
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100910#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +0000911 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000912 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000913 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100914 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000915#endif
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100916 return FALSE;
917 }
918 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000919}
920
921/*
922 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
923 */
924 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100925vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000926{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100927 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000928}
929
Bram Moolenaar071d4272004-06-13 20:20:40 +0000930 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100931vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000932{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100933 int c = *p;
934
Bram Moolenaara50e5862013-01-30 17:30:17 +0100935#ifdef FEAT_MBYTE
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100936 if (has_mbyte && MB_BYTE2LEN(c) > 1)
937 c = (*mb_ptr2char)(p);
Bram Moolenaara50e5862013-01-30 17:30:17 +0100938#endif
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100939 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000940}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000941
942/*
943 * return TRUE if 'c' is a valid file-name character
944 * Assume characters above 0x100 are valid (multi-byte).
945 */
946 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100947vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000948{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100949 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000950}
951
952/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000953 * return TRUE if 'c' is a valid file-name character or a wildcard character
954 * Assume characters above 0x100 are valid (multi-byte).
955 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
956 * returns false.
957 */
958 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100959vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000960{
961 char_u buf[2];
962
963 buf[0] = (char_u)c;
964 buf[1] = NUL;
965 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
966}
967
968/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200969 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000970 * Assume characters above 0x100 are printable (multi-byte), except for
971 * Unicode.
972 */
973 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100974vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000975{
976#ifdef FEAT_MBYTE
977 if (enc_utf8 && c >= 0x100)
978 return utf_printable(c);
979#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100980 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000981}
982
983/*
984 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
985 * byte of a double-byte character.
986 */
987 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100988vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000989{
990#ifdef FEAT_MBYTE
991 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
992 return FALSE;
993 if (enc_utf8 && c >= 0x100)
994 return utf_printable(c);
995#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100996 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997}
998
999/*
1000 * like chartabsize(), but also check for line breaks on the screen
1001 */
1002 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001003lbr_chartabsize(
1004 char_u *line UNUSED, /* start of the line */
1005 unsigned char *s,
1006 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001007{
1008#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001009 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001010 {
1011#endif
1012#ifdef FEAT_MBYTE
1013 if (curwin->w_p_wrap)
1014 return win_nolbr_chartabsize(curwin, s, col, NULL);
1015#endif
1016 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1017#ifdef FEAT_LINEBREAK
1018 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001019 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001020#endif
1021}
1022
1023/*
1024 * Call lbr_chartabsize() and advance the pointer.
1025 */
1026 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001027lbr_chartabsize_adv(
1028 char_u *line, /* start of the line */
1029 char_u **s,
1030 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001031{
1032 int retval;
1033
Bram Moolenaar597a4222014-06-25 14:39:50 +02001034 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001035 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001036 return retval;
1037}
1038
1039/*
1040 * This function is used very often, keep it fast!!!!
1041 *
1042 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1043 * string at start of line. Warning: *headp is only set if it's a non-zero
1044 * value, init to 0 before calling.
1045 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001046 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001047win_lbr_chartabsize(
1048 win_T *wp,
1049 char_u *line UNUSED, /* start of the line */
1050 char_u *s,
1051 colnr_T col,
1052 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001053{
1054#ifdef FEAT_LINEBREAK
1055 int c;
1056 int size;
1057 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001058 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 colnr_T colmax;
1060 int added;
1061# ifdef FEAT_MBYTE
1062 int mb_added = 0;
1063# else
1064# define mb_added 0
1065# endif
1066 int numberextra;
1067 char_u *ps;
1068 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001069 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070
1071 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001072 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001073 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001074 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001075#endif
1076 {
1077#ifdef FEAT_MBYTE
1078 if (wp->w_p_wrap)
1079 return win_nolbr_chartabsize(wp, s, col, headp);
1080#endif
1081 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1082 }
1083
1084#ifdef FEAT_LINEBREAK
1085 /*
1086 * First get normal size, without 'linebreak'
1087 */
1088 size = win_chartabsize(wp, s, col);
1089 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001090 if (tab_corr)
1091 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001092
1093 /*
1094 * If 'linebreak' set check at a blank before a non-blank if the line
1095 * needs a break here
1096 */
1097 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001098 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001099 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001101 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 {
1103 /*
1104 * Count all characters from first non-blank after a blank up to next
1105 * non-blank after a blank.
1106 */
1107 numberextra = win_col_off(wp);
1108 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001109 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001110 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001111 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001112 colmax += col_adj;
1113 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001114 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001115 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001116 }
1117
Bram Moolenaar071d4272004-06-13 20:20:40 +00001118 for (;;)
1119 {
1120 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001121 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001122 c = *s;
1123 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001124 && (VIM_ISBREAK(c)
1125 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001126 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001127 break;
1128
1129 col2 += win_chartabsize(wp, s, col2);
1130 if (col2 >= colmax) /* doesn't fit */
1131 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001132 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001133 tab_corr = FALSE;
1134 break;
1135 }
1136 }
1137 }
1138# ifdef FEAT_MBYTE
1139 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1140 && wp->w_p_wrap && in_win_border(wp, col))
1141 {
1142 ++size; /* Count the ">" in the last column. */
1143 mb_added = 1;
1144 }
1145# endif
1146
1147 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001148 * May have to add something for 'breakindent' and/or 'showbreak'
1149 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001150 * Set *headp to the size of what we add.
1151 */
1152 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001153 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001154 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001155 colnr_T sbrlen = 0;
1156 int numberwidth = win_col_off(wp);
1157
1158 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001159 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001160 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001161 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001162 col -= wp->w_width;
1163 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001164 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001165 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001166 if (*p_sbr != NUL)
1167 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001168 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001169 if (col >= sbrlen)
1170 col -= sbrlen;
1171 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001172 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001173 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001174 else if (col > 0 && numberextra > 0)
1175 col += numberwidth - win_col_off2(wp);
1176
1177 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001178 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001179 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001180 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001181 added = 0;
1182 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001183 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001184 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001185 {
1186 /* calculate effective window width */
Bram Moolenaar02631462017-09-22 15:20:32 +02001187 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
1188 int prev_width = col ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001189 if (width == 0)
Bram Moolenaar02631462017-09-22 15:20:32 +02001190 width = (colnr_T)wp->w_width;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001191 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1192 if ((size - prev_width) % width)
1193 /* wrapped, add another length of 'sbr' */
1194 added += vim_strsize(p_sbr);
1195 }
1196 else
1197 added += vim_strsize(p_sbr);
1198 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001199 if (wp->w_p_bri)
1200 added += get_breakindent_win(wp, line);
1201
Bram Moolenaar95765082014-08-24 21:19:25 +02001202 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001203 if (col != 0)
1204 added = 0;
1205 }
1206 }
1207 if (headp != NULL)
1208 *headp = added + mb_added;
1209 return size;
1210#endif
1211}
1212
1213#if defined(FEAT_MBYTE) || defined(PROTO)
1214/*
1215 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1216 * 'wrap' is on. This means we need to check for a double-byte character that
1217 * doesn't fit at the end of the screen line.
1218 */
1219 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001220win_nolbr_chartabsize(
1221 win_T *wp,
1222 char_u *s,
1223 colnr_T col,
1224 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001225{
1226 int n;
1227
1228 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1229 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001230# ifdef FEAT_VARTABS
1231 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1232 wp->w_buffer->b_p_vts_array);
1233# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001234 n = wp->w_buffer->b_p_ts;
1235 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001236# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001237 }
1238 n = ptr2cells(s);
1239 /* Add one cell for a double-width character in the last column of the
1240 * window, displayed with a ">". */
1241 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1242 {
1243 if (headp != NULL)
1244 *headp = 1;
1245 return 3;
1246 }
1247 return n;
1248}
1249
1250/*
1251 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1252 * "wp".
1253 */
1254 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001255in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001256{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001257 int width1; /* width of first line (after line number) */
1258 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001259
Bram Moolenaar071d4272004-06-13 20:20:40 +00001260 if (wp->w_width == 0) /* there is no border */
1261 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001262 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001263 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001264 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001265 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001266 return TRUE;
1267 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001268 if (width2 <= 0)
1269 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 return ((vcol - width1) % width2 == width2 - 1);
1271}
1272#endif /* FEAT_MBYTE */
1273
1274/*
1275 * Get virtual column number of pos.
1276 * start: on the first position of this character (TAB, ctrl)
1277 * cursor: where the cursor is on this character (first char, except for TAB)
1278 * end: on the last position of this character (TAB, ctrl)
1279 *
1280 * This is used very often, keep it fast!
1281 */
1282 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001283getvcol(
1284 win_T *wp,
1285 pos_T *pos,
1286 colnr_T *start,
1287 colnr_T *cursor,
1288 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001289{
1290 colnr_T vcol;
1291 char_u *ptr; /* points to current char */
1292 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001293 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001294 int incr;
1295 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001296#ifdef FEAT_VARTABS
1297 int *vts = wp->w_buffer->b_p_vts_array;
1298#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001299 int ts = wp->w_buffer->b_p_ts;
1300 int c;
1301
1302 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001303 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001304 if (pos->col == MAXCOL)
1305 posptr = NULL; /* continue until the NUL */
1306 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001307 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001308 /* Special check for an empty line, which can happen on exit, when
1309 * ml_get_buf() always returns an empty string. */
1310 if (*ptr == NUL)
1311 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001312 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001313#ifdef FEAT_MBYTE
1314 if (has_mbyte)
1315 /* always start on the first byte */
1316 posptr -= (*mb_head_off)(line, posptr);
1317#endif
1318 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319
1320 /*
1321 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001322 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1323 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001324 * Also use this when 'list' is set but tabs take their normal size.
1325 */
1326 if ((!wp->w_p_list || lcs_tab1 != NUL)
1327#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001328 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001329#endif
1330 )
1331 {
1332#ifndef FEAT_MBYTE
1333 head = 0;
1334#endif
1335 for (;;)
1336 {
1337#ifdef FEAT_MBYTE
1338 head = 0;
1339#endif
1340 c = *ptr;
1341 /* make sure we don't go past the end of the line */
1342 if (c == NUL)
1343 {
1344 incr = 1; /* NUL at end of line only takes one column */
1345 break;
1346 }
1347 /* A tab gets expanded, depending on the current column */
1348 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001349#ifdef FEAT_VARTABS
1350 incr = tabstop_padding(vcol, ts, vts);
1351#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001352 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001353#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001354 else
1355 {
1356#ifdef FEAT_MBYTE
1357 if (has_mbyte)
1358 {
1359 /* For utf-8, if the byte is >= 0x80, need to look at
1360 * further bytes to find the cell width. */
1361 if (enc_utf8 && c >= 0x80)
1362 incr = utf_ptr2cells(ptr);
1363 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001364 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001365
1366 /* If a double-cell char doesn't fit at the end of a line
1367 * it wraps to the next line, it's like this char is three
1368 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001369 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1370 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001371 {
1372 ++incr;
1373 head = 1;
1374 }
1375 }
1376 else
1377#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001378 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001379 }
1380
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001381 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001382 break;
1383
1384 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001385 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386 }
1387 }
1388 else
1389 {
1390 for (;;)
1391 {
1392 /* A tab gets expanded, depending on the current column */
1393 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001394 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001395 /* make sure we don't go past the end of the line */
1396 if (*ptr == NUL)
1397 {
1398 incr = 1; /* NUL at end of line only takes one column */
1399 break;
1400 }
1401
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001402 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001403 break;
1404
1405 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001406 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001407 }
1408 }
1409 if (start != NULL)
1410 *start = vcol + head;
1411 if (end != NULL)
1412 *end = vcol + incr - 1;
1413 if (cursor != NULL)
1414 {
1415 if (*ptr == TAB
1416 && (State & NORMAL)
1417 && !wp->w_p_list
1418 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001419 && !(VIsual_active
1420 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001421 )
1422 *cursor = vcol + incr - 1; /* cursor at end */
1423 else
1424 *cursor = vcol + head; /* cursor at start */
1425 }
1426}
1427
1428/*
1429 * Get virtual cursor column in the current window, pretending 'list' is off.
1430 */
1431 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001432getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001433{
1434 int list_save = curwin->w_p_list;
1435 colnr_T vcol;
1436
1437 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001438#ifdef FEAT_VIRTUALEDIT
1439 if (posp->coladd)
1440 getvvcol(curwin, posp, NULL, &vcol, NULL);
1441 else
1442#endif
1443 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001444 curwin->w_p_list = list_save;
1445 return vcol;
1446}
1447
1448#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1449/*
1450 * Get virtual column in virtual mode.
1451 */
1452 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001453getvvcol(
1454 win_T *wp,
1455 pos_T *pos,
1456 colnr_T *start,
1457 colnr_T *cursor,
1458 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001459{
1460 colnr_T col;
1461 colnr_T coladd;
1462 colnr_T endadd;
1463# ifdef FEAT_MBYTE
1464 char_u *ptr;
1465# endif
1466
1467 if (virtual_active())
1468 {
1469 /* For virtual mode, only want one value */
1470 getvcol(wp, pos, &col, NULL, NULL);
1471
1472 coladd = pos->coladd;
1473 endadd = 0;
1474# ifdef FEAT_MBYTE
1475 /* Cannot put the cursor on part of a wide character. */
1476 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001477 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001478 {
1479 int c = (*mb_ptr2char)(ptr + pos->col);
1480
1481 if (c != TAB && vim_isprintc(c))
1482 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001483 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001484 if (coladd > endadd) /* past end of line */
1485 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001486 else
1487 coladd = 0;
1488 }
1489 }
1490# endif
1491 col += coladd;
1492 if (start != NULL)
1493 *start = col;
1494 if (cursor != NULL)
1495 *cursor = col;
1496 if (end != NULL)
1497 *end = col + endadd;
1498 }
1499 else
1500 getvcol(wp, pos, start, cursor, end);
1501}
1502#endif
1503
Bram Moolenaar071d4272004-06-13 20:20:40 +00001504/*
1505 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1506 * Used for Visual block mode.
1507 */
1508 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001509getvcols(
1510 win_T *wp,
1511 pos_T *pos1,
1512 pos_T *pos2,
1513 colnr_T *left,
1514 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001515{
1516 colnr_T from1, from2, to1, to2;
1517
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001518 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001519 {
1520 getvvcol(wp, pos1, &from1, NULL, &to1);
1521 getvvcol(wp, pos2, &from2, NULL, &to2);
1522 }
1523 else
1524 {
1525 getvvcol(wp, pos2, &from1, NULL, &to1);
1526 getvvcol(wp, pos1, &from2, NULL, &to2);
1527 }
1528 if (from2 < from1)
1529 *left = from2;
1530 else
1531 *left = from1;
1532 if (to2 > to1)
1533 {
1534 if (*p_sel == 'e' && from2 - 1 >= to1)
1535 *right = from2 - 1;
1536 else
1537 *right = to2;
1538 }
1539 else
1540 *right = to1;
1541}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001542
1543/*
1544 * skipwhite: skip over ' ' and '\t'.
1545 */
1546 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001547skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001548{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001549 char_u *p = q;
1550
Bram Moolenaar1c465442017-03-12 20:10:05 +01001551 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001552 ++p;
1553 return p;
1554}
1555
1556/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001557 * getwhitecols: return the number of whitespace
1558 * columns (bytes) at the start of a given line
1559 */
1560 int
1561getwhitecols_curline()
1562{
1563 return getwhitecols(ml_get_curline());
1564}
1565
1566 int
1567getwhitecols(char_u *p)
1568{
1569 return skipwhite(p) - p;
1570}
1571
1572/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001573 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001574 */
1575 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001576skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001577{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001578 char_u *p = q;
1579
Bram Moolenaar071d4272004-06-13 20:20:40 +00001580 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1581 ++p;
1582 return p;
1583}
1584
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001585#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001586/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001587 * skip over binary digits
1588 */
1589 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001590skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001591{
1592 char_u *p = q;
1593
1594 while (vim_isbdigit(*p)) /* skip to next non-digit */
1595 ++p;
1596 return p;
1597}
1598
1599/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001600 * skip over digits and hex characters
1601 */
1602 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001603skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001604{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001605 char_u *p = q;
1606
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001607 while (vim_isxdigit(*p)) /* skip to next non-digit */
1608 ++p;
1609 return p;
1610}
1611#endif
1612
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001613/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001614 * skip to bin digit (or NUL after the string)
1615 */
1616 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001617skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001618{
1619 char_u *p = q;
1620
1621 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1622 ++p;
1623 return p;
1624}
1625
1626/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001627 * skip to digit (or NUL after the string)
1628 */
1629 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001630skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001631{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001632 char_u *p = q;
1633
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001634 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1635 ++p;
1636 return p;
1637}
1638
1639/*
1640 * skip to hex character (or NUL after the string)
1641 */
1642 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001643skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001644{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001645 char_u *p = q;
1646
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001647 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1648 ++p;
1649 return p;
1650}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001651
Bram Moolenaar071d4272004-06-13 20:20:40 +00001652/*
1653 * Variant of isdigit() that can handle characters > 0x100.
1654 * We don't use isdigit() here, because on some systems it also considers
1655 * superscript 1 to be a digit.
1656 * Use the VIM_ISDIGIT() macro for simple arguments.
1657 */
1658 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001659vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001660{
1661 return (c >= '0' && c <= '9');
1662}
1663
1664/*
1665 * Variant of isxdigit() that can handle characters > 0x100.
1666 * We don't use isxdigit() here, because on some systems it also considers
1667 * superscript 1 to be a digit.
1668 */
1669 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001670vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001671{
1672 return (c >= '0' && c <= '9')
1673 || (c >= 'a' && c <= 'f')
1674 || (c >= 'A' && c <= 'F');
1675}
1676
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001677/*
1678 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1679 * characters > 0x100.
1680 */
1681 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001682vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001683{
1684 return (c == '0' || c == '1');
1685}
1686
Bram Moolenaar78622822005-08-23 21:00:13 +00001687#if defined(FEAT_MBYTE) || defined(PROTO)
1688/*
1689 * Vim's own character class functions. These exist because many library
1690 * islower()/toupper() etc. do not work properly: they crash when used with
1691 * invalid values or can't handle latin1 when the locale is C.
1692 * Speed is most important here.
1693 */
1694#define LATIN1LOWER 'l'
1695#define LATIN1UPPER 'U'
1696
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001697static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001698static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1699static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001700
1701 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001702vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001703{
1704 if (c <= '@')
1705 return FALSE;
1706 if (c >= 0x80)
1707 {
1708 if (enc_utf8)
1709 return utf_islower(c);
1710 if (c >= 0x100)
1711 {
1712#ifdef HAVE_ISWLOWER
1713 if (has_mbyte)
1714 return iswlower(c);
1715#endif
1716 /* islower() can't handle these chars and may crash */
1717 return FALSE;
1718 }
1719 if (enc_latin1like)
1720 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1721 }
1722 return islower(c);
1723}
1724
1725 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001726vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001727{
1728 if (c <= '@')
1729 return FALSE;
1730 if (c >= 0x80)
1731 {
1732 if (enc_utf8)
1733 return utf_isupper(c);
1734 if (c >= 0x100)
1735 {
1736#ifdef HAVE_ISWUPPER
1737 if (has_mbyte)
1738 return iswupper(c);
1739#endif
1740 /* islower() can't handle these chars and may crash */
1741 return FALSE;
1742 }
1743 if (enc_latin1like)
1744 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1745 }
1746 return isupper(c);
1747}
1748
1749 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001750vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001751{
1752 if (c <= '@')
1753 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001754 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001755 {
1756 if (enc_utf8)
1757 return utf_toupper(c);
1758 if (c >= 0x100)
1759 {
1760#ifdef HAVE_TOWUPPER
1761 if (has_mbyte)
1762 return towupper(c);
1763#endif
1764 /* toupper() can't handle these chars and may crash */
1765 return c;
1766 }
1767 if (enc_latin1like)
1768 return latin1upper[c];
1769 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001770 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1771 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001772 return TOUPPER_LOC(c);
1773}
1774
1775 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001776vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001777{
1778 if (c <= '@')
1779 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001780 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001781 {
1782 if (enc_utf8)
1783 return utf_tolower(c);
1784 if (c >= 0x100)
1785 {
1786#ifdef HAVE_TOWLOWER
1787 if (has_mbyte)
1788 return towlower(c);
1789#endif
1790 /* tolower() can't handle these chars and may crash */
1791 return c;
1792 }
1793 if (enc_latin1like)
1794 return latin1lower[c];
1795 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001796 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1797 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001798 return TOLOWER_LOC(c);
1799}
1800#endif
1801
Bram Moolenaar071d4272004-06-13 20:20:40 +00001802/*
1803 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1804 */
1805 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001806skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001807{
1808 while (*p != ' ' && *p != '\t' && *p != NUL)
1809 ++p;
1810 return p;
1811}
1812
Bram Moolenaar071d4272004-06-13 20:20:40 +00001813/*
1814 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1815 */
1816 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001817skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001818{
1819 while (*p != ' ' && *p != '\t' && *p != NUL)
1820 {
1821 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1822 ++p;
1823 ++p;
1824 }
1825 return p;
1826}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001827
1828/*
1829 * Getdigits: Get a number from a string and skip over it.
1830 * Note: the argument is a pointer to a char_u pointer!
1831 */
1832 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001833getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001834{
1835 char_u *p;
1836 long retval;
1837
1838 p = *pp;
1839 retval = atol((char *)p);
1840 if (*p == '-') /* skip negative sign */
1841 ++p;
1842 p = skipdigits(p); /* skip to next non-digit */
1843 *pp = p;
1844 return retval;
1845}
1846
1847/*
1848 * Return TRUE if "lbuf" is empty or only contains blanks.
1849 */
1850 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001851vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001852{
1853 char_u *p;
1854
1855 p = skipwhite(lbuf);
1856 return (*p == NUL || *p == '\r' || *p == '\n');
1857}
1858
1859/*
1860 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001861 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1862 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001863 * 0 decimal
1864 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001865 * 'B' bin
1866 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001867 * 'X' hex
1868 * 'x' hex
1869 * If "len" is not NULL, the length of the number in characters is returned.
1870 * If "nptr" is not NULL, the signed result is returned in it.
1871 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001872 * If "what" contains STR2NR_BIN recognize binary numbers
1873 * If "what" contains STR2NR_OCT recognize octal numbers
1874 * If "what" contains STR2NR_HEX recognize hex numbers
1875 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaarce157752017-10-28 16:07:33 +02001876 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001877 */
1878 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001879vim_str2nr(
1880 char_u *start,
1881 int *prep, /* return: type of number 0 = decimal, 'x'
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001882 or 'X' is hex, '0' = octal, 'b' or 'B'
1883 is bin */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001884 int *len, /* return: detected length of number */
1885 int what, /* what numbers to recognize */
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001886 varnumber_T *nptr, /* return: signed result */
1887 uvarnumber_T *unptr, /* return: unsigned result */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001888 int maxlen) /* max length of string to check */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001889{
1890 char_u *ptr = start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001891 int pre = 0; /* default is decimal */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001892 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001893 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001894 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001895
1896 if (ptr[0] == '-')
1897 {
1898 negative = TRUE;
1899 ++ptr;
1900 }
1901
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001902 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001903 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1904 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001905 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001906 pre = ptr[1];
1907 if ((what & STR2NR_HEX)
1908 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1909 && (maxlen == 0 || maxlen > 2))
1910 /* hexadecimal */
1911 ptr += 2;
1912 else if ((what & STR2NR_BIN)
1913 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1914 && (maxlen == 0 || maxlen > 2))
1915 /* binary */
1916 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001917 else
1918 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001919 /* decimal or octal, default is decimal */
1920 pre = 0;
1921 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001922 {
1923 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001924 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001925 {
1926 if (ptr[n] > '7')
1927 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001928 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001929 break;
1930 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001931 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001932 }
1933 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001934 }
1935 }
1936
1937 /*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001938 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1939 */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001940 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001941 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1942 {
1943 /* bin */
1944 if (pre != 0)
1945 n += 2; /* skip over "0b" */
1946 while ('0' <= *ptr && *ptr <= '1')
1947 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001948 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001949 if (un <= UVARNUM_MAX / 2)
1950 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001951 else
1952 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001953 ++ptr;
1954 if (n++ == maxlen)
1955 break;
1956 }
1957 }
1958 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001959 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001960 /* octal */
1961 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001962 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001963 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001964 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001965 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1966 else
1967 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001968 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001969 if (n++ == maxlen)
1970 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001971 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001972 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001973 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001974 {
1975 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001976 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001977 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001978 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001979 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001980 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001981 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001982 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1983 else
1984 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001985 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001986 if (n++ == maxlen)
1987 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001988 }
1989 }
1990 else
1991 {
1992 /* decimal */
1993 while (VIM_ISDIGIT(*ptr))
1994 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001995 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1996
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001997 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001998 if (un < UVARNUM_MAX / 10
1999 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
2000 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002001 else
2002 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002003 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02002004 if (n++ == maxlen)
2005 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002006 }
2007 }
2008
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01002009 if (prep != NULL)
2010 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002011 if (len != NULL)
2012 *len = (int)(ptr - start);
2013 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002014 {
2015 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002016 {
2017 /* avoid ubsan error for overflow */
2018 if (un > VARNUM_MAX)
2019 *nptr = VARNUM_MIN;
2020 else
2021 *nptr = -(varnumber_T)un;
2022 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002023 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002024 {
2025 if (un > VARNUM_MAX)
2026 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02002027 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002028 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002029 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002030 if (unptr != NULL)
2031 *unptr = un;
2032}
2033
2034/*
2035 * Return the value of a single hex character.
2036 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2037 */
2038 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002039hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002040{
2041 if (c >= 'a' && c <= 'f')
2042 return c - 'a' + 10;
2043 if (c >= 'A' && c <= 'F')
2044 return c - 'A' + 10;
2045 return c - '0';
2046}
2047
Bram Moolenaar4033c552017-09-16 20:54:51 +02002048#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002049/*
2050 * Convert two hex characters to a byte.
2051 * Return -1 if one of the characters is not hex.
2052 */
2053 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002054hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002055{
2056 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2057 return -1;
2058 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2059}
2060#endif
2061
2062/*
2063 * Return TRUE if "str" starts with a backslash that should be removed.
2064 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
2065 * backslash is not a normal file name character.
2066 * '$' is a valid file name character, we don't remove the backslash before
2067 * it. This means it is not possible to use an environment variable after a
2068 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2069 * Although "\ name" is valid, the backslash in "Program\ files" must be
2070 * removed. Assume a file name doesn't start with a space.
2071 * For multi-byte names, never remove a backslash before a non-ascii
2072 * character, assume that all multi-byte characters are valid file name
2073 * characters.
2074 */
2075 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002076rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002077{
2078#ifdef BACKSLASH_IN_FILENAME
2079 return (str[0] == '\\'
2080# ifdef FEAT_MBYTE
2081 && str[1] < 0x80
2082# endif
2083 && (str[1] == ' '
2084 || (str[1] != NUL
2085 && str[1] != '*'
2086 && str[1] != '?'
2087 && !vim_isfilec(str[1]))));
2088#else
2089 return (str[0] == '\\' && str[1] != NUL);
2090#endif
2091}
2092
2093/*
2094 * Halve the number of backslashes in a file name argument.
2095 * For MS-DOS we only do this if the character after the backslash
2096 * is not a normal file character.
2097 */
2098 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002099backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002100{
2101 for ( ; *p; ++p)
2102 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002103 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002104}
2105
2106/*
2107 * backslash_halve() plus save the result in allocated memory.
2108 */
2109 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002110backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002111{
2112 char_u *res;
2113
2114 res = vim_strsave(p);
2115 if (res == NULL)
2116 return p;
2117 backslash_halve(res);
2118 return res;
2119}
2120
2121#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2122/*
2123 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2124 * The first 64 entries have been added to map control characters defined in
2125 * ascii.h
2126 */
2127static char_u ebcdic2ascii_tab[256] =
2128{
2129 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2130 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2131 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2132 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2133 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2134 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2135 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2136 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2137 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2138 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2139 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2140 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2141 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2142 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2143 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2144 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2145 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2146 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2147 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2148 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2149 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2150 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2151 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2152 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2153 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2154 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2155 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2156 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2157 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2158 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2159 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2160 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2161};
2162
2163/*
2164 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2165 * wanting 7-bit ASCII characters out the other end.
2166 */
2167 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002168ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002169{
2170 int i;
2171
2172 for (i = 0; i < len; i++)
2173 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2174}
2175#endif