blob: 5b091ec4d1cebb2b5a0c8417eba222e0ce85d70c [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
13# include <wchar.h> /* for towupper() and towlower() */
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
21/* b_chartab[] is an array of 32 bytes, each bit representing one of the
22 * characters 0-255. */
23#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010027/* table used below, see init_chartab() for an explanation */
28static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
33#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
34#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
35#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
36#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
37
38/*
39 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000040 * characters for current buffer.
41 *
42 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
43 * 'isprint' and 'encoding'.
44 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010045 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000046 * - For non-multi-byte index with the byte (same as the character).
47 * - For DBCS index with the first byte.
48 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
49 * the same as the character, if the first byte is 0x80 and above it depends
50 * on further bytes).
51 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010052 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000053 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
54 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
55 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
56 * translate the character before displaying it). Note that only DBCS
57 * characters can have 2 display cells and still be printable.
58 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
59 * - CT_ID_CHAR bit is set when the character can be in an identifier.
60 *
61 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
62 * error, OK otherwise.
63 */
64 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010065init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000066{
67 return buf_init_chartab(curbuf, TRUE);
68}
69
70 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010071buf_init_chartab(
72 buf_T *buf,
73 int global) /* FALSE: only set buf->b_chartab[] */
Bram Moolenaar071d4272004-06-13 20:20:40 +000074{
75 int c;
76 int c2;
77 char_u *p;
78 int i;
79 int tilde;
80 int do_isalpha;
81
82 if (global)
83 {
84 /*
85 * Set the default size for printable characters:
86 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
87 * This also inits all 'isident' and 'isfname' flags to FALSE.
88 *
89 * EBCDIC: all chars below ' ' are not printable, all others are
90 * printable.
91 */
92 c = 0;
93 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010094 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000095#ifdef EBCDIC
96 while (c < 255)
97#else
98 while (c <= '~')
99#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100100 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000101#ifdef FEAT_FKMAP
102 if (p_altkeymap)
103 {
104 while (c < YE)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100105 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 }
107#endif
108 while (c < 256)
109 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000110 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
111 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100112 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 /* euc-jp characters starting with 0x8e are single width */
114 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100115 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000116 /* other double-byte chars can be printable AND double-width */
117 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100118 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000119 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100121 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 }
123
Bram Moolenaar071d4272004-06-13 20:20:40 +0000124 /* Assume that every multi-byte char is a filename character. */
125 for (c = 1; c < 256; ++c)
126 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
127 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
128 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100129 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000130 }
131
132 /*
133 * Init word char flags all to FALSE
134 */
135 vim_memset(buf->b_chartab, 0, (size_t)32);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000136 if (enc_dbcs != 0)
137 for (c = 0; c < 256; ++c)
138 {
139 /* double-byte characters are probably word characters */
140 if (MB_BYTE2LEN(c) == 2)
141 SET_CHARTAB(buf, c);
142 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000143
144#ifdef FEAT_LISP
145 /*
146 * In lisp mode the '-' character is included in keywords.
147 */
148 if (buf->b_p_lisp)
149 SET_CHARTAB(buf, '-');
150#endif
151
152 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
153 * options Each option is a list of characters, character numbers or
154 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
155 */
156 for (i = global ? 0 : 3; i <= 3; ++i)
157 {
158 if (i == 0)
159 p = p_isi; /* first round: 'isident' */
160 else if (i == 1)
161 p = p_isp; /* second round: 'isprint' */
162 else if (i == 2)
163 p = p_isf; /* third round: 'isfname' */
164 else /* i == 3 */
165 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
166
167 while (*p)
168 {
169 tilde = FALSE;
170 do_isalpha = FALSE;
171 if (*p == '^' && p[1] != NUL)
172 {
173 tilde = TRUE;
174 ++p;
175 }
176 if (VIM_ISDIGIT(*p))
177 c = getdigits(&p);
178 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000179 if (has_mbyte)
180 c = mb_ptr2char_adv(&p);
181 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000182 c = *p++;
183 c2 = -1;
184 if (*p == '-' && p[1] != NUL)
185 {
186 ++p;
187 if (VIM_ISDIGIT(*p))
188 c2 = getdigits(&p);
189 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000190 if (has_mbyte)
191 c2 = mb_ptr2char_adv(&p);
192 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000193 c2 = *p++;
194 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000195 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000196 || !(*p == NUL || *p == ','))
197 return FAIL;
198
199 if (c2 == -1) /* not a range */
200 {
201 /*
202 * A single '@' (not "@-@"):
203 * Decide on letters being ID/printable/keyword chars with
204 * standard function isalpha(). This takes care of locale for
205 * single-byte characters).
206 */
207 if (c == '@')
208 {
209 do_isalpha = TRUE;
210 c = 1;
211 c2 = 255;
212 }
213 else
214 c2 = c;
215 }
216 while (c <= c2)
217 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000218 /* Use the MB_ functions here, because isalpha() doesn't
219 * work properly when 'encoding' is "latin1" and the locale is
220 * "C". */
221 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222#ifdef FEAT_FKMAP
223 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
224#endif
225 )
226 {
227 if (i == 0) /* (re)set ID flag */
228 {
229 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100230 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000231 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100232 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000233 }
234 else if (i == 1) /* (re)set printable */
235 {
236 if ((c < ' '
237#ifndef EBCDIC
238 || c > '~'
239#endif
240#ifdef FEAT_FKMAP
241 || (p_altkeymap
242 && (F_isalpha(c) || F_isdigit(c)))
243#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100244 // For double-byte we keep the cell width, so
245 // that we can detect it from the first byte.
246 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000247 {
248 if (tilde)
249 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100252 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 }
254 else
255 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100256 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
257 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 }
259 }
260 }
261 else if (i == 2) /* (re)set fname flag */
262 {
263 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100264 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000265 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100266 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000267 }
268 else /* i == 3 */ /* (re)set keyword flag */
269 {
270 if (tilde)
271 RESET_CHARTAB(buf, c);
272 else
273 SET_CHARTAB(buf, c);
274 }
275 }
276 ++c;
277 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100278
279 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000280 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100281 if (c == ',' && *p == NUL)
282 /* Trailing comma is not allowed. */
283 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000284 }
285 }
286 chartab_initialized = TRUE;
287 return OK;
288}
289
290/*
291 * Translate any special characters in buf[bufsize] in-place.
292 * The result is a string with only printable characters, but if there is not
293 * enough room, not all characters will be translated.
294 */
295 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100296trans_characters(
297 char_u *buf,
298 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000299{
300 int len; /* length of string needing translation */
301 int room; /* room in buffer after string */
302 char_u *trs; /* translated character */
303 int trs_len; /* length of trs[] */
304
305 len = (int)STRLEN(buf);
306 room = bufsize - len;
307 while (*buf != 0)
308 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000309 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000310 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000311 len -= trs_len;
312 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000313 {
314 trs = transchar_byte(*buf);
315 trs_len = (int)STRLEN(trs);
316 if (trs_len > 1)
317 {
318 room -= trs_len - 1;
319 if (room <= 0)
320 return;
321 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
322 }
323 mch_memmove(buf, trs, (size_t)trs_len);
324 --len;
325 }
326 buf += trs_len;
327 }
328}
329
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000330#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
331 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000332/*
333 * Translate a string into allocated memory, replacing special chars with
334 * printable chars. Returns NULL when out of memory.
335 */
336 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100337transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338{
339 char_u *res;
340 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000341 int l, len, c;
342 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344 if (has_mbyte)
345 {
346 /* Compute the length of the result, taking account of unprintable
347 * multi-byte characters. */
348 len = 0;
349 p = s;
350 while (*p != NUL)
351 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000352 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353 {
354 c = (*mb_ptr2char)(p);
355 p += l;
356 if (vim_isprintc(c))
357 len += l;
358 else
359 {
360 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000361 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000362 }
363 }
364 else
365 {
366 l = byte2cells(*p++);
367 if (l > 0)
368 len += l;
369 else
370 len += 4; /* illegal byte sequence */
371 }
372 }
373 res = alloc((unsigned)(len + 1));
374 }
375 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000376 res = alloc((unsigned)(vim_strsize(s) + 1));
377 if (res != NULL)
378 {
379 *res = NUL;
380 p = s;
381 while (*p != NUL)
382 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000383 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384 {
385 c = (*mb_ptr2char)(p);
386 if (vim_isprintc(c))
387 STRNCAT(res, p, l); /* append printable multi-byte char */
388 else
389 transchar_hex(res + STRLEN(res), c);
390 p += l;
391 }
392 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393 STRCAT(res, transchar_byte(*p++));
394 }
395 }
396 return res;
397}
398#endif
399
400#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
401/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000402 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
403 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
405 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000406 */
407 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100408str_foldcase(
409 char_u *str,
410 int orglen,
411 char_u *buf,
412 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413{
414 garray_T ga;
415 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417
418#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
419#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000420#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
421#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000423 /* Copy "str" into "buf" or allocated memory, unmodified. */
424 if (buf == NULL)
425 {
426 ga_init2(&ga, 1, 10);
427 if (ga_grow(&ga, len + 1) == FAIL)
428 return NULL;
429 mch_memmove(ga.ga_data, str, (size_t)len);
430 ga.ga_len = len;
431 }
432 else
433 {
434 if (len >= buflen) /* Ugly! */
435 len = buflen - 1;
436 mch_memmove(buf, str, (size_t)len);
437 }
438 if (buf == NULL)
439 GA_CHAR(len) = NUL;
440 else
441 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
443 /* Make each character lower case. */
444 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000445 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000447 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448 {
449 if (enc_utf8)
450 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000451 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100452 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000453 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000454
Bram Moolenaarb9839212008-06-28 11:03:50 +0000455 /* Only replace the character when it is not an invalid
456 * sequence (ASCII character or more than one byte) and
457 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100458 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000459 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100460 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461
462 /* If the byte length changes need to shift the following
463 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100464 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000467 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100468 if (buf == NULL
469 ? ga_grow(&ga, nlen - olen + 1) == FAIL
470 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 {
472 /* out of memory, keep old char */
473 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100474 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000476 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100477 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000479 if (buf == NULL)
480 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100481 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
482 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000483 }
484 else
485 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100486 STRMOVE(buf + i + nlen, buf + i + olen);
487 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000488 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000489 }
490 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000491 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492 }
493 }
494 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000495 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 }
497 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000498 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000499 if (buf == NULL)
500 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
501 else
502 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000503 ++i;
504 }
505 }
506
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000507 if (buf == NULL)
508 return (char_u *)ga.ga_data;
509 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000510}
511#endif
512
513/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100514 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100516 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000517 * Does NOT work for multi-byte characters, c must be <= 255.
518 * Also doesn't work for the first byte of a multi-byte, "c" must be a
519 * character!
520 */
521static char_u transchar_buf[7];
522
523 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100524transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000525{
526 int i;
527
528 i = 0;
529 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
530 {
531 transchar_buf[0] = '~';
532 transchar_buf[1] = '@';
533 i = 2;
534 c = K_SECOND(c);
535 }
536
537 if ((!chartab_initialized && (
538#ifdef EBCDIC
539 (c >= 64 && c < 255)
540#else
541 (c >= ' ' && c <= '~')
542#endif
543#ifdef FEAT_FKMAP
Bram Moolenaaree2615a2016-07-02 18:25:34 +0200544 || (p_altkeymap && F_ischar(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545#endif
546 )) || (c < 256 && vim_isprintc_strict(c)))
547 {
548 /* printable character */
549 transchar_buf[i] = c;
550 transchar_buf[i + 1] = NUL;
551 }
552 else
553 transchar_nonprint(transchar_buf + i, c);
554 return transchar_buf;
555}
556
Bram Moolenaar071d4272004-06-13 20:20:40 +0000557/*
558 * Like transchar(), but called with a byte instead of a character. Checks
559 * for an illegal UTF-8 byte.
560 */
561 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100562transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563{
564 if (enc_utf8 && c >= 0x80)
565 {
566 transchar_nonprint(transchar_buf, c);
567 return transchar_buf;
568 }
569 return transchar(c);
570}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000571
572/*
573 * Convert non-printable character to two or more printable characters in
574 * "buf[]". "buf" needs to be able to hold five bytes.
575 * Does NOT work for multi-byte characters, c must be <= 255.
576 */
577 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100578transchar_nonprint(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000579{
580 if (c == NL)
581 c = NUL; /* we use newline in place of a NUL */
582 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
583 c = NL; /* we use CR in place of NL in this case */
584
585 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
586 transchar_hex(buf, c);
587
588#ifdef EBCDIC
589 /* For EBCDIC only the characters 0-63 and 255 are not printable */
590 else if (CtrlChar(c) != 0 || c == DEL)
591#else
592 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
593#endif
594 {
595 buf[0] = '^';
596#ifdef EBCDIC
597 if (c == DEL)
598 buf[1] = '?'; /* DEL displayed as ^? */
599 else
600 buf[1] = CtrlChar(c);
601#else
602 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
603#endif
604
605 buf[2] = NUL;
606 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000607 else if (enc_utf8 && c >= 0x80)
608 {
609 transchar_hex(buf, c);
610 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000611#ifndef EBCDIC
612 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
613 {
614 buf[0] = '|';
615 buf[1] = c - 0x80;
616 buf[2] = NUL;
617 }
618#else
619 else if (c < 64)
620 {
621 buf[0] = '~';
622 buf[1] = MetaChar(c);
623 buf[2] = NUL;
624 }
625#endif
626 else /* 0x80 - 0x9f and 0xff */
627 {
628 /*
629 * TODO: EBCDIC I don't know what to do with this chars, so I display
630 * them as '~?' for now
631 */
632 buf[0] = '~';
633#ifdef EBCDIC
634 buf[1] = '?'; /* 0xff displayed as ~? */
635#else
636 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
637#endif
638 buf[2] = NUL;
639 }
640}
641
642 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100643transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000644{
645 int i = 0;
646
647 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648 if (c > 255)
649 {
650 buf[++i] = nr2hex((unsigned)c >> 12);
651 buf[++i] = nr2hex((unsigned)c >> 8);
652 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000653 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000654 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000655 buf[++i] = '>';
656 buf[++i] = NUL;
657}
658
659/*
660 * Convert the lower 4 bits of byte "c" to its hex character.
661 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
662 * function key 1.
663 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000664 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100665nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666{
667 if ((c & 0xf) <= 9)
668 return (c & 0xf) + '0';
669 return (c & 0xf) - 10 + 'a';
670}
671
672/*
673 * Return number of display cells occupied by byte "b".
674 * Caller must make sure 0 <= b <= 255.
675 * For multi-byte mode "b" must be the first byte of a character.
676 * A TAB is counted as two cells: "^I".
677 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
678 * cells depends on further bytes.
679 */
680 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100681byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000682{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683 if (enc_utf8 && b >= 0x80)
684 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100685 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000686}
687
688/*
689 * Return number of display cells occupied by character "c".
690 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
691 * A TAB is counted as two cells: "^I" or four: "<09>".
692 */
693 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100694char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695{
696 if (IS_SPECIAL(c))
697 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000698 if (c >= 0x80)
699 {
700 /* UTF-8: above 0x80 need to check the value */
701 if (enc_utf8)
702 return utf_char2cells(c);
703 /* DBCS: double-byte means double-width, except for euc-jp with first
704 * byte 0x8e */
705 if (enc_dbcs != 0 && c >= 0x100)
706 {
707 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
708 return 1;
709 return 2;
710 }
711 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100712 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713}
714
715/*
716 * Return number of display cells occupied by character at "*p".
717 * A TAB is counted as two cells: "^I" or four: "<09>".
718 */
719 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100720ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000721{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000722 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
723 if (enc_utf8 && *p >= 0x80)
724 return utf_ptr2cells(p);
725 /* For DBCS we can tell the cell count from the first byte. */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100726 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727}
728
729/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100730 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 * counting TABs as two characters: "^I".
732 */
733 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100734vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000735{
736 return vim_strnsize(s, (int)MAXCOL);
737}
738
739/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100740 * Return the number of character cells string "s[len]" will take on the
741 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 */
743 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100744vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000745{
746 int size = 0;
747
748 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000749 if (has_mbyte)
750 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000751 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000752
753 size += ptr2cells(s);
754 s += l;
755 len -= l - 1;
756 }
757 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000758 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100759
Bram Moolenaar071d4272004-06-13 20:20:40 +0000760 return size;
761}
762
763/*
764 * Return the number of characters 'c' will take on the screen, taking
765 * into account the size of a tab.
766 * Use a define to make it fast, this is used very often!!!
767 * Also see getvcol() below.
768 */
769
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#ifdef FEAT_VARTABS
771# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
772 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
773 { \
774 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
775 } \
776 else \
777 return ptr2cells(p);
778#else
779# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000780 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
781 { \
782 int ts; \
783 ts = (buf)->b_p_ts; \
784 return (int)(ts - (col % ts)); \
785 } \
786 else \
787 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200788#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789
Bram Moolenaar071d4272004-06-13 20:20:40 +0000790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
793 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
794}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000795
796#ifdef FEAT_LINEBREAK
797 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100798win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000799{
800 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
801}
802#endif
803
804/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200805 * Return the number of characters the string 's' will take on the screen,
806 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 */
808 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100809linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000810{
Bram Moolenaardc536092010-07-18 15:45:49 +0200811 return linetabsize_col(0, s);
812}
813
814/*
815 * Like linetabsize(), but starting at column "startcol".
816 */
817 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100818linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200819{
820 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822
823 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200824 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 return (int)col;
826}
827
828/*
829 * Like linetabsize(), but for a given window instead of the current one.
830 */
831 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100832win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833{
834 colnr_T col = 0;
835 char_u *s;
836
Bram Moolenaar597a4222014-06-25 14:39:50 +0200837 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100838 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200839 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840 return (int)col;
841}
842
843/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000844 * Return TRUE if 'c' is a normal identifier character:
845 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 */
847 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100848vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000849{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100850 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851}
852
853/*
854 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100855 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000856 * For multi-byte characters mb_get_class() is used (builtin rules).
857 */
858 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100859vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100861 return vim_iswordc_buf(c, curbuf);
862}
863
864 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100865vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100866{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000867 if (c >= 0x100)
868 {
869 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000870 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000871 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100872 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100873 return FALSE;
874 }
875 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000876}
877
878/*
879 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
880 */
881 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100882vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000883{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100884 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885}
886
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100888vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100890 int c = *p;
891
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100892 if (has_mbyte && MB_BYTE2LEN(c) > 1)
893 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100894 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000895}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000896
897/*
898 * return TRUE if 'c' is a valid file-name character
899 * Assume characters above 0x100 are valid (multi-byte).
900 */
901 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100902vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000903{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100904 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000905}
906
907/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000908 * return TRUE if 'c' is a valid file-name character or a wildcard character
909 * Assume characters above 0x100 are valid (multi-byte).
910 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
911 * returns false.
912 */
913 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100914vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000915{
916 char_u buf[2];
917
918 buf[0] = (char_u)c;
919 buf[1] = NUL;
920 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
921}
922
923/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200924 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000925 * Assume characters above 0x100 are printable (multi-byte), except for
926 * Unicode.
927 */
928 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100929vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000930{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000931 if (enc_utf8 && c >= 0x100)
932 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100933 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934}
935
936/*
937 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
938 * byte of a double-byte character.
939 */
940 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100941vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000942{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000943 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
944 return FALSE;
945 if (enc_utf8 && c >= 0x100)
946 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100947 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000948}
949
950/*
951 * like chartabsize(), but also check for line breaks on the screen
952 */
953 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100954lbr_chartabsize(
955 char_u *line UNUSED, /* start of the line */
956 unsigned char *s,
957 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958{
959#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +0200960 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000961 {
962#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000963 if (curwin->w_p_wrap)
964 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000965 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
966#ifdef FEAT_LINEBREAK
967 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200968 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000969#endif
970}
971
972/*
973 * Call lbr_chartabsize() and advance the pointer.
974 */
975 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100976lbr_chartabsize_adv(
977 char_u *line, /* start of the line */
978 char_u **s,
979 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000980{
981 int retval;
982
Bram Moolenaar597a4222014-06-25 14:39:50 +0200983 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100984 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000985 return retval;
986}
987
988/*
989 * This function is used very often, keep it fast!!!!
990 *
991 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
992 * string at start of line. Warning: *headp is only set if it's a non-zero
993 * value, init to 0 before calling.
994 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100996win_lbr_chartabsize(
997 win_T *wp,
998 char_u *line UNUSED, /* start of the line */
999 char_u *s,
1000 colnr_T col,
1001 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001002{
1003#ifdef FEAT_LINEBREAK
1004 int c;
1005 int size;
1006 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001007 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001008 colnr_T colmax;
1009 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001010 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001011 int numberextra;
1012 char_u *ps;
1013 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001014 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001015
1016 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001017 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001019 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001020#endif
1021 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001022 if (wp->w_p_wrap)
1023 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001024 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1025 }
1026
1027#ifdef FEAT_LINEBREAK
1028 /*
1029 * First get normal size, without 'linebreak'
1030 */
1031 size = win_chartabsize(wp, s, col);
1032 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001033 if (tab_corr)
1034 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001035
1036 /*
1037 * If 'linebreak' set check at a blank before a non-blank if the line
1038 * needs a break here
1039 */
1040 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001041 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001042 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001043 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001044 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001045 {
1046 /*
1047 * Count all characters from first non-blank after a blank up to next
1048 * non-blank after a blank.
1049 */
1050 numberextra = win_col_off(wp);
1051 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001052 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001053 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001054 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001055 colmax += col_adj;
1056 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001057 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001058 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001059 }
1060
Bram Moolenaar071d4272004-06-13 20:20:40 +00001061 for (;;)
1062 {
1063 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001064 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001065 c = *s;
1066 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001067 && (VIM_ISBREAK(c)
1068 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001069 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 break;
1071
1072 col2 += win_chartabsize(wp, s, col2);
1073 if (col2 >= colmax) /* doesn't fit */
1074 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001075 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 tab_corr = FALSE;
1077 break;
1078 }
1079 }
1080 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1082 && wp->w_p_wrap && in_win_border(wp, col))
1083 {
1084 ++size; /* Count the ">" in the last column. */
1085 mb_added = 1;
1086 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001087
1088 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001089 * May have to add something for 'breakindent' and/or 'showbreak'
1090 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001091 * Set *headp to the size of what we add.
1092 */
1093 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001094 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001095 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001096 colnr_T sbrlen = 0;
1097 int numberwidth = win_col_off(wp);
1098
1099 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001101 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001103 col -= wp->w_width;
1104 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001105 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001106 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001107 if (*p_sbr != NUL)
1108 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001110 if (col >= sbrlen)
1111 col -= sbrlen;
1112 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001113 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001114 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001115 else if (col > 0 && numberextra > 0)
1116 col += numberwidth - win_col_off2(wp);
1117
1118 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001119 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001120 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001121 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001122 added = 0;
1123 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001124 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001125 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001126 {
1127 /* calculate effective window width */
Bram Moolenaar02631462017-09-22 15:20:32 +02001128 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
1129 int prev_width = col ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001130 if (width == 0)
Bram Moolenaar02631462017-09-22 15:20:32 +02001131 width = (colnr_T)wp->w_width;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001132 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1133 if ((size - prev_width) % width)
1134 /* wrapped, add another length of 'sbr' */
1135 added += vim_strsize(p_sbr);
1136 }
1137 else
1138 added += vim_strsize(p_sbr);
1139 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001140 if (wp->w_p_bri)
1141 added += get_breakindent_win(wp, line);
1142
Bram Moolenaar95765082014-08-24 21:19:25 +02001143 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001144 if (col != 0)
1145 added = 0;
1146 }
1147 }
1148 if (headp != NULL)
1149 *headp = added + mb_added;
1150 return size;
1151#endif
1152}
1153
Bram Moolenaar071d4272004-06-13 20:20:40 +00001154/*
1155 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1156 * 'wrap' is on. This means we need to check for a double-byte character that
1157 * doesn't fit at the end of the screen line.
1158 */
1159 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001160win_nolbr_chartabsize(
1161 win_T *wp,
1162 char_u *s,
1163 colnr_T col,
1164 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165{
1166 int n;
1167
1168 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1169 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001170# ifdef FEAT_VARTABS
1171 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1172 wp->w_buffer->b_p_vts_array);
1173# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001174 n = wp->w_buffer->b_p_ts;
1175 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001176# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177 }
1178 n = ptr2cells(s);
1179 /* Add one cell for a double-width character in the last column of the
1180 * window, displayed with a ">". */
1181 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1182 {
1183 if (headp != NULL)
1184 *headp = 1;
1185 return 3;
1186 }
1187 return n;
1188}
1189
1190/*
1191 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1192 * "wp".
1193 */
1194 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001195in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001196{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001197 int width1; /* width of first line (after line number) */
1198 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001199
Bram Moolenaar071d4272004-06-13 20:20:40 +00001200 if (wp->w_width == 0) /* there is no border */
1201 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001202 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001203 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001204 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001205 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206 return TRUE;
1207 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001208 if (width2 <= 0)
1209 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001210 return ((vcol - width1) % width2 == width2 - 1);
1211}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001212
1213/*
1214 * Get virtual column number of pos.
1215 * start: on the first position of this character (TAB, ctrl)
1216 * cursor: where the cursor is on this character (first char, except for TAB)
1217 * end: on the last position of this character (TAB, ctrl)
1218 *
1219 * This is used very often, keep it fast!
1220 */
1221 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001222getvcol(
1223 win_T *wp,
1224 pos_T *pos,
1225 colnr_T *start,
1226 colnr_T *cursor,
1227 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001228{
1229 colnr_T vcol;
1230 char_u *ptr; /* points to current char */
1231 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001232 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001233 int incr;
1234 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001235#ifdef FEAT_VARTABS
1236 int *vts = wp->w_buffer->b_p_vts_array;
1237#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238 int ts = wp->w_buffer->b_p_ts;
1239 int c;
1240
1241 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001242 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001243 if (pos->col == MAXCOL)
1244 posptr = NULL; /* continue until the NUL */
1245 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001246 {
Bram Moolenaar955f1982017-02-05 15:10:51 +01001247 /* Special check for an empty line, which can happen on exit, when
1248 * ml_get_buf() always returns an empty string. */
1249 if (*ptr == NUL)
1250 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001251 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001252 if (has_mbyte)
1253 /* always start on the first byte */
1254 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001255 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001256
1257 /*
1258 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001259 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1260 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001261 * Also use this when 'list' is set but tabs take their normal size.
1262 */
1263 if ((!wp->w_p_list || lcs_tab1 != NUL)
1264#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001265 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001266#endif
1267 )
1268 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 for (;;)
1270 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001272 c = *ptr;
1273 /* make sure we don't go past the end of the line */
1274 if (c == NUL)
1275 {
1276 incr = 1; /* NUL at end of line only takes one column */
1277 break;
1278 }
1279 /* A tab gets expanded, depending on the current column */
1280 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001281#ifdef FEAT_VARTABS
1282 incr = tabstop_padding(vcol, ts, vts);
1283#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001284 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001285#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001286 else
1287 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001288 if (has_mbyte)
1289 {
1290 /* For utf-8, if the byte is >= 0x80, need to look at
1291 * further bytes to find the cell width. */
1292 if (enc_utf8 && c >= 0x80)
1293 incr = utf_ptr2cells(ptr);
1294 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001295 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001296
1297 /* If a double-cell char doesn't fit at the end of a line
1298 * it wraps to the next line, it's like this char is three
1299 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001300 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1301 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001302 {
1303 ++incr;
1304 head = 1;
1305 }
1306 }
1307 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001308 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001309 }
1310
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001311 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001312 break;
1313
1314 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001315 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001316 }
1317 }
1318 else
1319 {
1320 for (;;)
1321 {
1322 /* A tab gets expanded, depending on the current column */
1323 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001324 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001325 /* make sure we don't go past the end of the line */
1326 if (*ptr == NUL)
1327 {
1328 incr = 1; /* NUL at end of line only takes one column */
1329 break;
1330 }
1331
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001332 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001333 break;
1334
1335 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001336 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001337 }
1338 }
1339 if (start != NULL)
1340 *start = vcol + head;
1341 if (end != NULL)
1342 *end = vcol + incr - 1;
1343 if (cursor != NULL)
1344 {
1345 if (*ptr == TAB
1346 && (State & NORMAL)
1347 && !wp->w_p_list
1348 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001349 && !(VIsual_active
1350 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001351 )
1352 *cursor = vcol + incr - 1; /* cursor at end */
1353 else
1354 *cursor = vcol + head; /* cursor at start */
1355 }
1356}
1357
1358/*
1359 * Get virtual cursor column in the current window, pretending 'list' is off.
1360 */
1361 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001362getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001363{
1364 int list_save = curwin->w_p_list;
1365 colnr_T vcol;
1366
1367 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001368 if (posp->coladd)
1369 getvvcol(curwin, posp, NULL, &vcol, NULL);
1370 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001371 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372 curwin->w_p_list = list_save;
1373 return vcol;
1374}
1375
Bram Moolenaar071d4272004-06-13 20:20:40 +00001376/*
1377 * Get virtual column in virtual mode.
1378 */
1379 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001380getvvcol(
1381 win_T *wp,
1382 pos_T *pos,
1383 colnr_T *start,
1384 colnr_T *cursor,
1385 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386{
1387 colnr_T col;
1388 colnr_T coladd;
1389 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001391
1392 if (virtual_active())
1393 {
1394 /* For virtual mode, only want one value */
1395 getvcol(wp, pos, &col, NULL, NULL);
1396
1397 coladd = pos->coladd;
1398 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001399 /* Cannot put the cursor on part of a wide character. */
1400 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001401 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001402 {
1403 int c = (*mb_ptr2char)(ptr + pos->col);
1404
1405 if (c != TAB && vim_isprintc(c))
1406 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001407 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001408 if (coladd > endadd) /* past end of line */
1409 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001410 else
1411 coladd = 0;
1412 }
1413 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001414 col += coladd;
1415 if (start != NULL)
1416 *start = col;
1417 if (cursor != NULL)
1418 *cursor = col;
1419 if (end != NULL)
1420 *end = col + endadd;
1421 }
1422 else
1423 getvcol(wp, pos, start, cursor, end);
1424}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001425
Bram Moolenaar071d4272004-06-13 20:20:40 +00001426/*
1427 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1428 * Used for Visual block mode.
1429 */
1430 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001431getvcols(
1432 win_T *wp,
1433 pos_T *pos1,
1434 pos_T *pos2,
1435 colnr_T *left,
1436 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001437{
1438 colnr_T from1, from2, to1, to2;
1439
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001440 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001441 {
1442 getvvcol(wp, pos1, &from1, NULL, &to1);
1443 getvvcol(wp, pos2, &from2, NULL, &to2);
1444 }
1445 else
1446 {
1447 getvvcol(wp, pos2, &from1, NULL, &to1);
1448 getvvcol(wp, pos1, &from2, NULL, &to2);
1449 }
1450 if (from2 < from1)
1451 *left = from2;
1452 else
1453 *left = from1;
1454 if (to2 > to1)
1455 {
1456 if (*p_sel == 'e' && from2 - 1 >= to1)
1457 *right = from2 - 1;
1458 else
1459 *right = to2;
1460 }
1461 else
1462 *right = to1;
1463}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001464
1465/*
1466 * skipwhite: skip over ' ' and '\t'.
1467 */
1468 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001469skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001470{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001471 char_u *p = q;
1472
Bram Moolenaar1c465442017-03-12 20:10:05 +01001473 while (VIM_ISWHITE(*p)) /* skip to next non-white */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001474 ++p;
1475 return p;
1476}
1477
1478/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001479 * getwhitecols: return the number of whitespace
1480 * columns (bytes) at the start of a given line
1481 */
1482 int
1483getwhitecols_curline()
1484{
1485 return getwhitecols(ml_get_curline());
1486}
1487
1488 int
1489getwhitecols(char_u *p)
1490{
1491 return skipwhite(p) - p;
1492}
1493
1494/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001495 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001496 */
1497 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001498skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001499{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001500 char_u *p = q;
1501
Bram Moolenaar071d4272004-06-13 20:20:40 +00001502 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1503 ++p;
1504 return p;
1505}
1506
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001507#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001508/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001509 * skip over binary digits
1510 */
1511 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001512skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001513{
1514 char_u *p = q;
1515
1516 while (vim_isbdigit(*p)) /* skip to next non-digit */
1517 ++p;
1518 return p;
1519}
1520
1521/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001522 * skip over digits and hex characters
1523 */
1524 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001525skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001526{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001527 char_u *p = q;
1528
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001529 while (vim_isxdigit(*p)) /* skip to next non-digit */
1530 ++p;
1531 return p;
1532}
1533#endif
1534
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001535/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001536 * skip to bin digit (or NUL after the string)
1537 */
1538 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001539skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001540{
1541 char_u *p = q;
1542
1543 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1544 ++p;
1545 return p;
1546}
1547
1548/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001549 * skip to digit (or NUL after the string)
1550 */
1551 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001552skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001553{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001554 char_u *p = q;
1555
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001556 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1557 ++p;
1558 return p;
1559}
1560
1561/*
1562 * skip to hex character (or NUL after the string)
1563 */
1564 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001565skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001566{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001567 char_u *p = q;
1568
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001569 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1570 ++p;
1571 return p;
1572}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001573
Bram Moolenaar071d4272004-06-13 20:20:40 +00001574/*
1575 * Variant of isdigit() that can handle characters > 0x100.
1576 * We don't use isdigit() here, because on some systems it also considers
1577 * superscript 1 to be a digit.
1578 * Use the VIM_ISDIGIT() macro for simple arguments.
1579 */
1580 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001581vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001582{
1583 return (c >= '0' && c <= '9');
1584}
1585
1586/*
1587 * Variant of isxdigit() that can handle characters > 0x100.
1588 * We don't use isxdigit() here, because on some systems it also considers
1589 * superscript 1 to be a digit.
1590 */
1591 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001592vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001593{
1594 return (c >= '0' && c <= '9')
1595 || (c >= 'a' && c <= 'f')
1596 || (c >= 'A' && c <= 'F');
1597}
1598
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001599/*
1600 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1601 * characters > 0x100.
1602 */
1603 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001604vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001605{
1606 return (c == '0' || c == '1');
1607}
1608
Bram Moolenaar78622822005-08-23 21:00:13 +00001609/*
1610 * Vim's own character class functions. These exist because many library
1611 * islower()/toupper() etc. do not work properly: they crash when used with
1612 * invalid values or can't handle latin1 when the locale is C.
1613 * Speed is most important here.
1614 */
1615#define LATIN1LOWER 'l'
1616#define LATIN1UPPER 'U'
1617
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001618static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001619static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1620static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001621
1622 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001623vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001624{
1625 if (c <= '@')
1626 return FALSE;
1627 if (c >= 0x80)
1628 {
1629 if (enc_utf8)
1630 return utf_islower(c);
1631 if (c >= 0x100)
1632 {
1633#ifdef HAVE_ISWLOWER
1634 if (has_mbyte)
1635 return iswlower(c);
1636#endif
1637 /* islower() can't handle these chars and may crash */
1638 return FALSE;
1639 }
1640 if (enc_latin1like)
1641 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1642 }
1643 return islower(c);
1644}
1645
1646 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001647vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001648{
1649 if (c <= '@')
1650 return FALSE;
1651 if (c >= 0x80)
1652 {
1653 if (enc_utf8)
1654 return utf_isupper(c);
1655 if (c >= 0x100)
1656 {
1657#ifdef HAVE_ISWUPPER
1658 if (has_mbyte)
1659 return iswupper(c);
1660#endif
1661 /* islower() can't handle these chars and may crash */
1662 return FALSE;
1663 }
1664 if (enc_latin1like)
1665 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1666 }
1667 return isupper(c);
1668}
1669
1670 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001671vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001672{
1673 if (c <= '@')
1674 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001675 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001676 {
1677 if (enc_utf8)
1678 return utf_toupper(c);
1679 if (c >= 0x100)
1680 {
1681#ifdef HAVE_TOWUPPER
1682 if (has_mbyte)
1683 return towupper(c);
1684#endif
1685 /* toupper() can't handle these chars and may crash */
1686 return c;
1687 }
1688 if (enc_latin1like)
1689 return latin1upper[c];
1690 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001691 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1692 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001693 return TOUPPER_LOC(c);
1694}
1695
1696 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001697vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001698{
1699 if (c <= '@')
1700 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001701 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001702 {
1703 if (enc_utf8)
1704 return utf_tolower(c);
1705 if (c >= 0x100)
1706 {
1707#ifdef HAVE_TOWLOWER
1708 if (has_mbyte)
1709 return towlower(c);
1710#endif
1711 /* tolower() can't handle these chars and may crash */
1712 return c;
1713 }
1714 if (enc_latin1like)
1715 return latin1lower[c];
1716 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001717 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1718 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001719 return TOLOWER_LOC(c);
1720}
Bram Moolenaar78622822005-08-23 21:00:13 +00001721
Bram Moolenaar071d4272004-06-13 20:20:40 +00001722/*
1723 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1724 */
1725 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001726skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001727{
1728 while (*p != ' ' && *p != '\t' && *p != NUL)
1729 ++p;
1730 return p;
1731}
1732
Bram Moolenaar071d4272004-06-13 20:20:40 +00001733/*
1734 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1735 */
1736 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001737skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001738{
1739 while (*p != ' ' && *p != '\t' && *p != NUL)
1740 {
1741 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1742 ++p;
1743 ++p;
1744 }
1745 return p;
1746}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001747
1748/*
1749 * Getdigits: Get a number from a string and skip over it.
1750 * Note: the argument is a pointer to a char_u pointer!
1751 */
1752 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001753getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001754{
1755 char_u *p;
1756 long retval;
1757
1758 p = *pp;
1759 retval = atol((char *)p);
1760 if (*p == '-') /* skip negative sign */
1761 ++p;
1762 p = skipdigits(p); /* skip to next non-digit */
1763 *pp = p;
1764 return retval;
1765}
1766
1767/*
1768 * Return TRUE if "lbuf" is empty or only contains blanks.
1769 */
1770 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001771vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001772{
1773 char_u *p;
1774
1775 p = skipwhite(lbuf);
1776 return (*p == NUL || *p == '\r' || *p == '\n');
1777}
1778
1779/*
1780 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001781 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1782 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001783 * 0 decimal
1784 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001785 * 'B' bin
1786 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001787 * 'X' hex
1788 * 'x' hex
1789 * If "len" is not NULL, the length of the number in characters is returned.
1790 * If "nptr" is not NULL, the signed result is returned in it.
1791 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001792 * If "what" contains STR2NR_BIN recognize binary numbers
1793 * If "what" contains STR2NR_OCT recognize octal numbers
1794 * If "what" contains STR2NR_HEX recognize hex numbers
1795 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaarce157752017-10-28 16:07:33 +02001796 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001797 */
1798 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001799vim_str2nr(
1800 char_u *start,
1801 int *prep, /* return: type of number 0 = decimal, 'x'
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001802 or 'X' is hex, '0' = octal, 'b' or 'B'
1803 is bin */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001804 int *len, /* return: detected length of number */
1805 int what, /* what numbers to recognize */
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001806 varnumber_T *nptr, /* return: signed result */
1807 uvarnumber_T *unptr, /* return: unsigned result */
Bram Moolenaar7454a062016-01-30 15:14:10 +01001808 int maxlen) /* max length of string to check */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001809{
1810 char_u *ptr = start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001811 int pre = 0; /* default is decimal */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001812 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001813 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001814 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001815
1816 if (ptr[0] == '-')
1817 {
1818 negative = TRUE;
1819 ++ptr;
1820 }
1821
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001822 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001823 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1824 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001825 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001826 pre = ptr[1];
1827 if ((what & STR2NR_HEX)
1828 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1829 && (maxlen == 0 || maxlen > 2))
1830 /* hexadecimal */
1831 ptr += 2;
1832 else if ((what & STR2NR_BIN)
1833 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1834 && (maxlen == 0 || maxlen > 2))
1835 /* binary */
1836 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001837 else
1838 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001839 /* decimal or octal, default is decimal */
1840 pre = 0;
1841 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001842 {
1843 /* Don't interpret "0", "08" or "0129" as octal. */
Bram Moolenaarce157752017-10-28 16:07:33 +02001844 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001845 {
1846 if (ptr[n] > '7')
1847 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001848 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001849 break;
1850 }
Bram Moolenaar9a91c7a2017-10-28 15:38:40 +02001851 pre = '0'; /* assume octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001852 }
1853 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001854 }
1855 }
1856
1857 /*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001858 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1859 */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001860 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001861 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1862 {
1863 /* bin */
1864 if (pre != 0)
1865 n += 2; /* skip over "0b" */
1866 while ('0' <= *ptr && *ptr <= '1')
1867 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001868 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001869 if (un <= UVARNUM_MAX / 2)
1870 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001871 else
1872 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001873 ++ptr;
1874 if (n++ == maxlen)
1875 break;
1876 }
1877 }
1878 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001879 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001880 /* octal */
1881 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001882 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001883 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001884 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001885 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1886 else
1887 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001888 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001889 if (n++ == maxlen)
1890 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001891 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001892 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001893 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001894 {
1895 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001896 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001897 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001898 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001899 {
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001900 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001901 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001902 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1903 else
1904 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001905 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001906 if (n++ == maxlen)
1907 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001908 }
1909 }
1910 else
1911 {
1912 /* decimal */
1913 while (VIM_ISDIGIT(*ptr))
1914 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001915 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1916
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001917 /* avoid ubsan error for overflow */
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001918 if (un < UVARNUM_MAX / 10
1919 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1920 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001921 else
1922 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001924 if (n++ == maxlen)
1925 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001926 }
1927 }
1928
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001929 if (prep != NULL)
1930 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001931 if (len != NULL)
1932 *len = (int)(ptr - start);
1933 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001934 {
1935 if (negative) /* account for leading '-' for decimal numbers */
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001936 {
1937 /* avoid ubsan error for overflow */
1938 if (un > VARNUM_MAX)
1939 *nptr = VARNUM_MIN;
1940 else
1941 *nptr = -(varnumber_T)un;
1942 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001943 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001944 {
1945 if (un > VARNUM_MAX)
1946 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001947 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001948 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001949 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001950 if (unptr != NULL)
1951 *unptr = un;
1952}
1953
1954/*
1955 * Return the value of a single hex character.
1956 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1957 */
1958 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001959hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001960{
1961 if (c >= 'a' && c <= 'f')
1962 return c - 'a' + 10;
1963 if (c >= 'A' && c <= 'F')
1964 return c - 'A' + 10;
1965 return c - '0';
1966}
1967
Bram Moolenaar4033c552017-09-16 20:54:51 +02001968#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001969/*
1970 * Convert two hex characters to a byte.
1971 * Return -1 if one of the characters is not hex.
1972 */
1973 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001974hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001975{
1976 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1977 return -1;
1978 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1979}
1980#endif
1981
1982/*
1983 * Return TRUE if "str" starts with a backslash that should be removed.
1984 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1985 * backslash is not a normal file name character.
1986 * '$' is a valid file name character, we don't remove the backslash before
1987 * it. This means it is not possible to use an environment variable after a
1988 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1989 * Although "\ name" is valid, the backslash in "Program\ files" must be
1990 * removed. Assume a file name doesn't start with a space.
1991 * For multi-byte names, never remove a backslash before a non-ascii
1992 * character, assume that all multi-byte characters are valid file name
1993 * characters.
1994 */
1995 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001996rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001997{
1998#ifdef BACKSLASH_IN_FILENAME
1999 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002000 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002001 && (str[1] == ' '
2002 || (str[1] != NUL
2003 && str[1] != '*'
2004 && str[1] != '?'
2005 && !vim_isfilec(str[1]))));
2006#else
2007 return (str[0] == '\\' && str[1] != NUL);
2008#endif
2009}
2010
2011/*
2012 * Halve the number of backslashes in a file name argument.
2013 * For MS-DOS we only do this if the character after the backslash
2014 * is not a normal file character.
2015 */
2016 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002017backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002018{
2019 for ( ; *p; ++p)
2020 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002021 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002022}
2023
2024/*
2025 * backslash_halve() plus save the result in allocated memory.
2026 */
2027 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002028backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002029{
2030 char_u *res;
2031
2032 res = vim_strsave(p);
2033 if (res == NULL)
2034 return p;
2035 backslash_halve(res);
2036 return res;
2037}
2038
2039#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2040/*
2041 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2042 * The first 64 entries have been added to map control characters defined in
2043 * ascii.h
2044 */
2045static char_u ebcdic2ascii_tab[256] =
2046{
2047 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2048 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2049 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2050 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2051 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2052 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2053 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2054 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2055 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2056 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2057 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2058 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2059 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2060 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2061 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2062 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2063 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2064 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2065 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2066 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2067 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2068 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2069 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2070 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2071 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2072 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2073 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2074 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2075 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2076 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2077 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2078 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2079};
2080
2081/*
2082 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2083 * wanting 7-bit ASCII characters out the other end.
2084 */
2085 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002086ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002087{
2088 int i;
2089
2090 for (i = 0; i < len; i++)
2091 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2092}
2093#endif