blob: 4df4e7f3270f818c9e817099064131915e518a91 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010013static int win_chartabsize(win_T *wp, char_u *p, colnr_T col);
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
15
16#ifdef FEAT_MBYTE
Bram Moolenaard7b734a2010-08-12 20:17:02 +020017# if defined(HAVE_WCHAR_H)
18# include <wchar.h> /* for towupper() and towlower() */
19# endif
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010020static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000021#endif
22
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010023static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000024
25static int chartab_initialized = FALSE;
26
27/* b_chartab[] is an array of 32 bytes, each bit representing one of the
28 * characters 0-255. */
29#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
30#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
31#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
32
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010033/* table used below, see init_chartab() for an explanation */
34static char_u g_chartab[256];
35
Bram Moolenaar071d4272004-06-13 20:20:40 +000036/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010037 * Flags for g_chartab[].
38 */
39#define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */
40#define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */
41#define CT_ID_CHAR 0x20 /* flag: set for ID chars */
42#define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */
43
44/*
45 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000046 * characters for current buffer.
47 *
48 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
49 * 'isprint' and 'encoding'.
50 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010051 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000052 * - For non-multi-byte index with the byte (same as the character).
53 * - For DBCS index with the first byte.
54 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
55 * the same as the character, if the first byte is 0x80 and above it depends
56 * on further bytes).
57 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010058 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000059 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
60 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
61 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
62 * translate the character before displaying it). Note that only DBCS
63 * characters can have 2 display cells and still be printable.
64 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
65 * - CT_ID_CHAR bit is set when the character can be in an identifier.
66 *
67 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
68 * error, OK otherwise.
69 */
70 int
71init_chartab()
72{
73 return buf_init_chartab(curbuf, TRUE);
74}
75
76 int
77buf_init_chartab(buf, global)
78 buf_T *buf;
79 int global; /* FALSE: only set buf->b_chartab[] */
80{
81 int c;
82 int c2;
83 char_u *p;
84 int i;
85 int tilde;
86 int do_isalpha;
87
88 if (global)
89 {
90 /*
91 * Set the default size for printable characters:
92 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
93 * This also inits all 'isident' and 'isfname' flags to FALSE.
94 *
95 * EBCDIC: all chars below ' ' are not printable, all others are
96 * printable.
97 */
98 c = 0;
99 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100100 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000101#ifdef EBCDIC
102 while (c < 255)
103#else
104 while (c <= '~')
105#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100106 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000107#ifdef FEAT_FKMAP
108 if (p_altkeymap)
109 {
110 while (c < YE)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100111 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 }
113#endif
114 while (c < 256)
115 {
116#ifdef FEAT_MBYTE
117 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
118 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100119 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 /* euc-jp characters starting with 0x8e are single width */
121 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100122 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000123 /* other double-byte chars can be printable AND double-width */
124 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100125 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000126 else
127#endif
128 /* the rest is unprintable by default */
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100129 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000130 }
131
132#ifdef FEAT_MBYTE
133 /* Assume that every multi-byte char is a filename character. */
134 for (c = 1; c < 256; ++c)
135 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
136 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
137 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100138 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000139#endif
140 }
141
142 /*
143 * Init word char flags all to FALSE
144 */
145 vim_memset(buf->b_chartab, 0, (size_t)32);
146#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000147 if (enc_dbcs != 0)
148 for (c = 0; c < 256; ++c)
149 {
150 /* double-byte characters are probably word characters */
151 if (MB_BYTE2LEN(c) == 2)
152 SET_CHARTAB(buf, c);
153 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154#endif
155
156#ifdef FEAT_LISP
157 /*
158 * In lisp mode the '-' character is included in keywords.
159 */
160 if (buf->b_p_lisp)
161 SET_CHARTAB(buf, '-');
162#endif
163
164 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
165 * options Each option is a list of characters, character numbers or
166 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
167 */
168 for (i = global ? 0 : 3; i <= 3; ++i)
169 {
170 if (i == 0)
171 p = p_isi; /* first round: 'isident' */
172 else if (i == 1)
173 p = p_isp; /* second round: 'isprint' */
174 else if (i == 2)
175 p = p_isf; /* third round: 'isfname' */
176 else /* i == 3 */
177 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
178
179 while (*p)
180 {
181 tilde = FALSE;
182 do_isalpha = FALSE;
183 if (*p == '^' && p[1] != NUL)
184 {
185 tilde = TRUE;
186 ++p;
187 }
188 if (VIM_ISDIGIT(*p))
189 c = getdigits(&p);
190 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000191#ifdef FEAT_MBYTE
192 if (has_mbyte)
193 c = mb_ptr2char_adv(&p);
194 else
195#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000196 c = *p++;
197 c2 = -1;
198 if (*p == '-' && p[1] != NUL)
199 {
200 ++p;
201 if (VIM_ISDIGIT(*p))
202 c2 = getdigits(&p);
203 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000204#ifdef FEAT_MBYTE
205 if (has_mbyte)
206 c2 = mb_ptr2char_adv(&p);
207 else
208#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000209 c2 = *p++;
210 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000211 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000212 || !(*p == NUL || *p == ','))
213 return FAIL;
214
215 if (c2 == -1) /* not a range */
216 {
217 /*
218 * A single '@' (not "@-@"):
219 * Decide on letters being ID/printable/keyword chars with
220 * standard function isalpha(). This takes care of locale for
221 * single-byte characters).
222 */
223 if (c == '@')
224 {
225 do_isalpha = TRUE;
226 c = 1;
227 c2 = 255;
228 }
229 else
230 c2 = c;
231 }
232 while (c <= c2)
233 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000234 /* Use the MB_ functions here, because isalpha() doesn't
235 * work properly when 'encoding' is "latin1" and the locale is
236 * "C". */
237 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238#ifdef FEAT_FKMAP
239 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
240#endif
241 )
242 {
243 if (i == 0) /* (re)set ID flag */
244 {
245 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100246 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000247 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100248 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249 }
250 else if (i == 1) /* (re)set printable */
251 {
252 if ((c < ' '
253#ifndef EBCDIC
254 || c > '~'
255#endif
256#ifdef FEAT_FKMAP
257 || (p_altkeymap
258 && (F_isalpha(c) || F_isdigit(c)))
259#endif
260 )
261#ifdef FEAT_MBYTE
262 /* For double-byte we keep the cell width, so
263 * that we can detect it from the first byte. */
264 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
265#endif
266 )
267 {
268 if (tilde)
269 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100270 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000271 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100272 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000273 }
274 else
275 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100276 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
277 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000278 }
279 }
280 }
281 else if (i == 2) /* (re)set fname flag */
282 {
283 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100284 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000285 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100286 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000287 }
288 else /* i == 3 */ /* (re)set keyword flag */
289 {
290 if (tilde)
291 RESET_CHARTAB(buf, c);
292 else
293 SET_CHARTAB(buf, c);
294 }
295 }
296 ++c;
297 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100298
299 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100301 if (c == ',' && *p == NUL)
302 /* Trailing comma is not allowed. */
303 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000304 }
305 }
306 chartab_initialized = TRUE;
307 return OK;
308}
309
310/*
311 * Translate any special characters in buf[bufsize] in-place.
312 * The result is a string with only printable characters, but if there is not
313 * enough room, not all characters will be translated.
314 */
315 void
316trans_characters(buf, bufsize)
317 char_u *buf;
318 int bufsize;
319{
320 int len; /* length of string needing translation */
321 int room; /* room in buffer after string */
322 char_u *trs; /* translated character */
323 int trs_len; /* length of trs[] */
324
325 len = (int)STRLEN(buf);
326 room = bufsize - len;
327 while (*buf != 0)
328 {
329# ifdef FEAT_MBYTE
330 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000331 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000332 len -= trs_len;
333 else
334# endif
335 {
336 trs = transchar_byte(*buf);
337 trs_len = (int)STRLEN(trs);
338 if (trs_len > 1)
339 {
340 room -= trs_len - 1;
341 if (room <= 0)
342 return;
343 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
344 }
345 mch_memmove(buf, trs, (size_t)trs_len);
346 --len;
347 }
348 buf += trs_len;
349 }
350}
351
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000352#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
353 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354/*
355 * Translate a string into allocated memory, replacing special chars with
356 * printable chars. Returns NULL when out of memory.
357 */
358 char_u *
359transstr(s)
360 char_u *s;
361{
362 char_u *res;
363 char_u *p;
364#ifdef FEAT_MBYTE
365 int l, len, c;
366 char_u hexbuf[11];
367#endif
368
369#ifdef FEAT_MBYTE
370 if (has_mbyte)
371 {
372 /* Compute the length of the result, taking account of unprintable
373 * multi-byte characters. */
374 len = 0;
375 p = s;
376 while (*p != NUL)
377 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000378 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000379 {
380 c = (*mb_ptr2char)(p);
381 p += l;
382 if (vim_isprintc(c))
383 len += l;
384 else
385 {
386 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000387 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000388 }
389 }
390 else
391 {
392 l = byte2cells(*p++);
393 if (l > 0)
394 len += l;
395 else
396 len += 4; /* illegal byte sequence */
397 }
398 }
399 res = alloc((unsigned)(len + 1));
400 }
401 else
402#endif
403 res = alloc((unsigned)(vim_strsize(s) + 1));
404 if (res != NULL)
405 {
406 *res = NUL;
407 p = s;
408 while (*p != NUL)
409 {
410#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000411 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000412 {
413 c = (*mb_ptr2char)(p);
414 if (vim_isprintc(c))
415 STRNCAT(res, p, l); /* append printable multi-byte char */
416 else
417 transchar_hex(res + STRLEN(res), c);
418 p += l;
419 }
420 else
421#endif
422 STRCAT(res, transchar_byte(*p++));
423 }
424 }
425 return res;
426}
427#endif
428
429#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
430/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000431 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
432 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000433 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
434 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000435 */
436 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000437str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000438 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000439 int orglen;
440 char_u *buf;
441 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442{
443 garray_T ga;
444 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000445 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446
447#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
448#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000449#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
450#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000452 /* Copy "str" into "buf" or allocated memory, unmodified. */
453 if (buf == NULL)
454 {
455 ga_init2(&ga, 1, 10);
456 if (ga_grow(&ga, len + 1) == FAIL)
457 return NULL;
458 mch_memmove(ga.ga_data, str, (size_t)len);
459 ga.ga_len = len;
460 }
461 else
462 {
463 if (len >= buflen) /* Ugly! */
464 len = buflen - 1;
465 mch_memmove(buf, str, (size_t)len);
466 }
467 if (buf == NULL)
468 GA_CHAR(len) = NUL;
469 else
470 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471
472 /* Make each character lower case. */
473 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000474 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 {
476#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000477 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
479 if (enc_utf8)
480 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000481 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100482 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000483 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000484
Bram Moolenaarb9839212008-06-28 11:03:50 +0000485 /* Only replace the character when it is not an invalid
486 * sequence (ASCII character or more than one byte) and
487 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100488 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000489 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100490 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000491
492 /* If the byte length changes need to shift the following
493 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100494 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000495 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100496 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000497 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100498 if (buf == NULL
499 ? ga_grow(&ga, nlen - olen + 1) == FAIL
500 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000501 {
502 /* out of memory, keep old char */
503 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100504 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000505 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000506 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100507 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000509 if (buf == NULL)
510 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100511 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
512 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000513 }
514 else
515 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100516 STRMOVE(buf + i + nlen, buf + i + olen);
517 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000518 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000519 }
520 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000521 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000522 }
523 }
524 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000525 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000526 }
527 else
528#endif
529 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000530 if (buf == NULL)
531 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
532 else
533 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000534 ++i;
535 }
536 }
537
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000538 if (buf == NULL)
539 return (char_u *)ga.ga_data;
540 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000541}
542#endif
543
544/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100545 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000546 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100547 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000548 * Does NOT work for multi-byte characters, c must be <= 255.
549 * Also doesn't work for the first byte of a multi-byte, "c" must be a
550 * character!
551 */
552static char_u transchar_buf[7];
553
554 char_u *
555transchar(c)
556 int c;
557{
558 int i;
559
560 i = 0;
561 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
562 {
563 transchar_buf[0] = '~';
564 transchar_buf[1] = '@';
565 i = 2;
566 c = K_SECOND(c);
567 }
568
569 if ((!chartab_initialized && (
570#ifdef EBCDIC
571 (c >= 64 && c < 255)
572#else
573 (c >= ' ' && c <= '~')
574#endif
575#ifdef FEAT_FKMAP
576 || F_ischar(c)
577#endif
578 )) || (c < 256 && vim_isprintc_strict(c)))
579 {
580 /* printable character */
581 transchar_buf[i] = c;
582 transchar_buf[i + 1] = NUL;
583 }
584 else
585 transchar_nonprint(transchar_buf + i, c);
586 return transchar_buf;
587}
588
589#if defined(FEAT_MBYTE) || defined(PROTO)
590/*
591 * Like transchar(), but called with a byte instead of a character. Checks
592 * for an illegal UTF-8 byte.
593 */
594 char_u *
595transchar_byte(c)
596 int c;
597{
598 if (enc_utf8 && c >= 0x80)
599 {
600 transchar_nonprint(transchar_buf, c);
601 return transchar_buf;
602 }
603 return transchar(c);
604}
605#endif
606
607/*
608 * Convert non-printable character to two or more printable characters in
609 * "buf[]". "buf" needs to be able to hold five bytes.
610 * Does NOT work for multi-byte characters, c must be <= 255.
611 */
612 void
613transchar_nonprint(buf, c)
614 char_u *buf;
615 int c;
616{
617 if (c == NL)
618 c = NUL; /* we use newline in place of a NUL */
619 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
620 c = NL; /* we use CR in place of NL in this case */
621
622 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
623 transchar_hex(buf, c);
624
625#ifdef EBCDIC
626 /* For EBCDIC only the characters 0-63 and 255 are not printable */
627 else if (CtrlChar(c) != 0 || c == DEL)
628#else
629 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
630#endif
631 {
632 buf[0] = '^';
633#ifdef EBCDIC
634 if (c == DEL)
635 buf[1] = '?'; /* DEL displayed as ^? */
636 else
637 buf[1] = CtrlChar(c);
638#else
639 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
640#endif
641
642 buf[2] = NUL;
643 }
644#ifdef FEAT_MBYTE
645 else if (enc_utf8 && c >= 0x80)
646 {
647 transchar_hex(buf, c);
648 }
649#endif
650#ifndef EBCDIC
651 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
652 {
653 buf[0] = '|';
654 buf[1] = c - 0x80;
655 buf[2] = NUL;
656 }
657#else
658 else if (c < 64)
659 {
660 buf[0] = '~';
661 buf[1] = MetaChar(c);
662 buf[2] = NUL;
663 }
664#endif
665 else /* 0x80 - 0x9f and 0xff */
666 {
667 /*
668 * TODO: EBCDIC I don't know what to do with this chars, so I display
669 * them as '~?' for now
670 */
671 buf[0] = '~';
672#ifdef EBCDIC
673 buf[1] = '?'; /* 0xff displayed as ~? */
674#else
675 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
676#endif
677 buf[2] = NUL;
678 }
679}
680
681 void
682transchar_hex(buf, c)
683 char_u *buf;
684 int c;
685{
686 int i = 0;
687
688 buf[0] = '<';
689#ifdef FEAT_MBYTE
690 if (c > 255)
691 {
692 buf[++i] = nr2hex((unsigned)c >> 12);
693 buf[++i] = nr2hex((unsigned)c >> 8);
694 }
695#endif
696 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000697 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000698 buf[++i] = '>';
699 buf[++i] = NUL;
700}
701
702/*
703 * Convert the lower 4 bits of byte "c" to its hex character.
704 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
705 * function key 1.
706 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000707 static unsigned
Bram Moolenaar071d4272004-06-13 20:20:40 +0000708nr2hex(c)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000709 unsigned c;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000710{
711 if ((c & 0xf) <= 9)
712 return (c & 0xf) + '0';
713 return (c & 0xf) - 10 + 'a';
714}
715
716/*
717 * Return number of display cells occupied by byte "b".
718 * Caller must make sure 0 <= b <= 255.
719 * For multi-byte mode "b" must be the first byte of a character.
720 * A TAB is counted as two cells: "^I".
721 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
722 * cells depends on further bytes.
723 */
724 int
725byte2cells(b)
726 int b;
727{
728#ifdef FEAT_MBYTE
729 if (enc_utf8 && b >= 0x80)
730 return 0;
731#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100732 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000733}
734
735/*
736 * Return number of display cells occupied by character "c".
737 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
738 * A TAB is counted as two cells: "^I" or four: "<09>".
739 */
740 int
741char2cells(c)
742 int c;
743{
744 if (IS_SPECIAL(c))
745 return char2cells(K_SECOND(c)) + 2;
746#ifdef FEAT_MBYTE
747 if (c >= 0x80)
748 {
749 /* UTF-8: above 0x80 need to check the value */
750 if (enc_utf8)
751 return utf_char2cells(c);
752 /* DBCS: double-byte means double-width, except for euc-jp with first
753 * byte 0x8e */
754 if (enc_dbcs != 0 && c >= 0x100)
755 {
756 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
757 return 1;
758 return 2;
759 }
760 }
761#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100762 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763}
764
765/*
766 * Return number of display cells occupied by character at "*p".
767 * A TAB is counted as two cells: "^I" or four: "<09>".
768 */
769 int
770ptr2cells(p)
771 char_u *p;
772{
773#ifdef FEAT_MBYTE
774 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
775 if (enc_utf8 && *p >= 0x80)
776 return utf_ptr2cells(p);
777 /* For DBCS we can tell the cell count from the first byte. */
778#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100779 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000780}
781
782/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100783 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000784 * counting TABs as two characters: "^I".
785 */
786 int
787vim_strsize(s)
788 char_u *s;
789{
790 return vim_strnsize(s, (int)MAXCOL);
791}
792
793/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100794 * Return the number of character cells string "s[len]" will take on the
795 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000796 */
797 int
798vim_strnsize(s, len)
799 char_u *s;
800 int len;
801{
802 int size = 0;
803
804 while (*s != NUL && --len >= 0)
805 {
806#ifdef FEAT_MBYTE
807 if (has_mbyte)
808 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000809 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000810
811 size += ptr2cells(s);
812 s += l;
813 len -= l - 1;
814 }
815 else
816#endif
817 size += byte2cells(*s++);
818 }
819 return size;
820}
821
822/*
823 * Return the number of characters 'c' will take on the screen, taking
824 * into account the size of a tab.
825 * Use a define to make it fast, this is used very often!!!
826 * Also see getvcol() below.
827 */
828
829#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
830 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
831 { \
832 int ts; \
833 ts = (buf)->b_p_ts; \
834 return (int)(ts - (col % ts)); \
835 } \
836 else \
837 return ptr2cells(p);
838
839#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
840 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
841 int
842chartabsize(p, col)
843 char_u *p;
844 colnr_T col;
845{
846 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
847}
848#endif
849
850#ifdef FEAT_LINEBREAK
851 static int
852win_chartabsize(wp, p, col)
853 win_T *wp;
854 char_u *p;
855 colnr_T col;
856{
857 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
858}
859#endif
860
861/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200862 * Return the number of characters the string 's' will take on the screen,
863 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000864 */
865 int
866linetabsize(s)
867 char_u *s;
868{
Bram Moolenaardc536092010-07-18 15:45:49 +0200869 return linetabsize_col(0, s);
870}
871
872/*
873 * Like linetabsize(), but starting at column "startcol".
874 */
875 int
876linetabsize_col(startcol, s)
877 int startcol;
878 char_u *s;
879{
880 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200881 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000882
883 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200884 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885 return (int)col;
886}
887
888/*
889 * Like linetabsize(), but for a given window instead of the current one.
890 */
891 int
Bram Moolenaar597a4222014-06-25 14:39:50 +0200892win_linetabsize(wp, line, len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000893 win_T *wp;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200894 char_u *line;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000895 colnr_T len;
896{
897 colnr_T col = 0;
898 char_u *s;
899
Bram Moolenaar597a4222014-06-25 14:39:50 +0200900 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
901 mb_ptr_adv(s))
902 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000903 return (int)col;
904}
905
906/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000907 * Return TRUE if 'c' is a normal identifier character:
908 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000909 */
910 int
911vim_isIDc(c)
912 int c;
913{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100914 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000915}
916
917/*
918 * return TRUE if 'c' is a keyword character: Letters and characters from
919 * 'iskeyword' option for current buffer.
920 * For multi-byte characters mb_get_class() is used (builtin rules).
921 */
922 int
923vim_iswordc(c)
924 int c;
925{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100926 return vim_iswordc_buf(c, curbuf);
927}
928
929 int
930vim_iswordc_buf(c, buf)
931 int c;
932 buf_T *buf;
933{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934#ifdef FEAT_MBYTE
935 if (c >= 0x100)
936 {
937 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000938 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000939 if (enc_utf8)
940 return utf_class(c) >= 2;
941 }
942#endif
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100943 return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000944}
945
946/*
947 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
948 */
949 int
950vim_iswordp(p)
951 char_u *p;
952{
953#ifdef FEAT_MBYTE
954 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
955 return mb_get_class(p) >= 2;
956#endif
957 return GET_CHARTAB(curbuf, *p) != 0;
958}
959
Bram Moolenaar071d4272004-06-13 20:20:40 +0000960 int
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100961vim_iswordp_buf(p, buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000962 char_u *p;
963 buf_T *buf;
964{
Bram Moolenaara50e5862013-01-30 17:30:17 +0100965#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +0000966 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
967 return mb_get_class(p) >= 2;
Bram Moolenaara50e5862013-01-30 17:30:17 +0100968#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000969 return (GET_CHARTAB(buf, *p) != 0);
970}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000971
972/*
973 * return TRUE if 'c' is a valid file-name character
974 * Assume characters above 0x100 are valid (multi-byte).
975 */
976 int
977vim_isfilec(c)
978 int c;
979{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100980 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000981}
982
983/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000984 * return TRUE if 'c' is a valid file-name character or a wildcard character
985 * Assume characters above 0x100 are valid (multi-byte).
986 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
987 * returns false.
988 */
989 int
990vim_isfilec_or_wc(c)
991 int c;
992{
993 char_u buf[2];
994
995 buf[0] = (char_u)c;
996 buf[1] = NUL;
997 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
998}
999
1000/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001 * return TRUE if 'c' is a printable character
1002 * Assume characters above 0x100 are printable (multi-byte), except for
1003 * Unicode.
1004 */
1005 int
1006vim_isprintc(c)
1007 int c;
1008{
1009#ifdef FEAT_MBYTE
1010 if (enc_utf8 && c >= 0x100)
1011 return utf_printable(c);
1012#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001013 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001014}
1015
1016/*
1017 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
1018 * byte of a double-byte character.
1019 */
1020 int
1021vim_isprintc_strict(c)
1022 int c;
1023{
1024#ifdef FEAT_MBYTE
1025 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
1026 return FALSE;
1027 if (enc_utf8 && c >= 0x100)
1028 return utf_printable(c);
1029#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001030 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001031}
1032
1033/*
1034 * like chartabsize(), but also check for line breaks on the screen
1035 */
1036 int
Bram Moolenaar597a4222014-06-25 14:39:50 +02001037lbr_chartabsize(line, s, col)
Bram Moolenaara0485492014-07-16 23:39:54 +02001038 char_u *line UNUSED; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001039 unsigned char *s;
1040 colnr_T col;
1041{
1042#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001043 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001044 {
1045#endif
1046#ifdef FEAT_MBYTE
1047 if (curwin->w_p_wrap)
1048 return win_nolbr_chartabsize(curwin, s, col, NULL);
1049#endif
1050 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1051#ifdef FEAT_LINEBREAK
1052 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001053 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001054#endif
1055}
1056
1057/*
1058 * Call lbr_chartabsize() and advance the pointer.
1059 */
1060 int
Bram Moolenaar597a4222014-06-25 14:39:50 +02001061lbr_chartabsize_adv(line, s, col)
1062 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001063 char_u **s;
1064 colnr_T col;
1065{
1066 int retval;
1067
Bram Moolenaar597a4222014-06-25 14:39:50 +02001068 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001069 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 return retval;
1071}
1072
1073/*
1074 * This function is used very often, keep it fast!!!!
1075 *
1076 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1077 * string at start of line. Warning: *headp is only set if it's a non-zero
1078 * value, init to 0 before calling.
1079 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080 int
Bram Moolenaar597a4222014-06-25 14:39:50 +02001081win_lbr_chartabsize(wp, line, s, col, headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001082 win_T *wp;
Bram Moolenaara0485492014-07-16 23:39:54 +02001083 char_u *line UNUSED; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001084 char_u *s;
1085 colnr_T col;
Bram Moolenaar0c094b92009-05-14 20:20:33 +00001086 int *headp UNUSED;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001087{
1088#ifdef FEAT_LINEBREAK
1089 int c;
1090 int size;
1091 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001092 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001093 colnr_T colmax;
1094 int added;
1095# ifdef FEAT_MBYTE
1096 int mb_added = 0;
1097# else
1098# define mb_added 0
1099# endif
1100 int numberextra;
1101 char_u *ps;
1102 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001103 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001104
1105 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001106 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001107 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001108 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001109#endif
1110 {
1111#ifdef FEAT_MBYTE
1112 if (wp->w_p_wrap)
1113 return win_nolbr_chartabsize(wp, s, col, headp);
1114#endif
1115 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1116 }
1117
1118#ifdef FEAT_LINEBREAK
1119 /*
1120 * First get normal size, without 'linebreak'
1121 */
1122 size = win_chartabsize(wp, s, col);
1123 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001124 if (tab_corr)
1125 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001126
1127 /*
1128 * If 'linebreak' set check at a blank before a non-blank if the line
1129 * needs a break here
1130 */
1131 if (wp->w_p_lbr
1132 && vim_isbreak(c)
1133 && !vim_isbreak(s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001134 && wp->w_p_wrap
1135# ifdef FEAT_VERTSPLIT
1136 && wp->w_width != 0
1137# endif
1138 )
1139 {
1140 /*
1141 * Count all characters from first non-blank after a blank up to next
1142 * non-blank after a blank.
1143 */
1144 numberextra = win_col_off(wp);
1145 col2 = col;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001146 colmax = (colnr_T)(W_WIDTH(wp) - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001147 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001148 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001149 colmax += col_adj;
1150 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001151 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001152 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001153 }
1154
Bram Moolenaar071d4272004-06-13 20:20:40 +00001155 for (;;)
1156 {
1157 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001158 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001159 c = *s;
1160 if (!(c != NUL
1161 && (vim_isbreak(c)
1162 || (!vim_isbreak(c)
1163 && (col2 == col || !vim_isbreak(*ps))))))
1164 break;
1165
1166 col2 += win_chartabsize(wp, s, col2);
1167 if (col2 >= colmax) /* doesn't fit */
1168 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001169 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001170 tab_corr = FALSE;
1171 break;
1172 }
1173 }
1174 }
1175# ifdef FEAT_MBYTE
1176 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1177 && wp->w_p_wrap && in_win_border(wp, col))
1178 {
1179 ++size; /* Count the ">" in the last column. */
1180 mb_added = 1;
1181 }
1182# endif
1183
1184 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001185 * May have to add something for 'breakindent' and/or 'showbreak'
1186 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001187 * Set *headp to the size of what we add.
1188 */
1189 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001190 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001191 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001192 colnr_T sbrlen = 0;
1193 int numberwidth = win_col_off(wp);
1194
1195 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001196 col += numberextra + mb_added;
1197 if (col >= (colnr_T)W_WIDTH(wp))
1198 {
1199 col -= W_WIDTH(wp);
1200 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001201 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001202 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001203 if (*p_sbr != NUL)
1204 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001205 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001206 if (col >= sbrlen)
1207 col -= sbrlen;
1208 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001209 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001210 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001211 else if (col > 0 && numberextra > 0)
1212 col += numberwidth - win_col_off2(wp);
1213
1214 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001215 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001216 if (col == 0 || col + size + sbrlen > (colnr_T)W_WIDTH(wp))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001217 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001218 added = 0;
1219 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001220 {
1221 if (size + sbrlen + numberwidth > (colnr_T)W_WIDTH(wp))
1222 {
1223 /* calculate effective window width */
1224 int width = (colnr_T)W_WIDTH(wp) - sbrlen - numberwidth;
1225 int prev_width = col ? ((colnr_T)W_WIDTH(wp) - (sbrlen + col)) : 0;
1226 if (width == 0)
1227 width = (colnr_T)W_WIDTH(wp);
1228 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1229 if ((size - prev_width) % width)
1230 /* wrapped, add another length of 'sbr' */
1231 added += vim_strsize(p_sbr);
1232 }
1233 else
1234 added += vim_strsize(p_sbr);
1235 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001236 if (wp->w_p_bri)
1237 added += get_breakindent_win(wp, line);
1238
Bram Moolenaar95765082014-08-24 21:19:25 +02001239 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001240 if (col != 0)
1241 added = 0;
1242 }
1243 }
1244 if (headp != NULL)
1245 *headp = added + mb_added;
1246 return size;
1247#endif
1248}
1249
1250#if defined(FEAT_MBYTE) || defined(PROTO)
1251/*
1252 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1253 * 'wrap' is on. This means we need to check for a double-byte character that
1254 * doesn't fit at the end of the screen line.
1255 */
1256 static int
1257win_nolbr_chartabsize(wp, s, col, headp)
1258 win_T *wp;
1259 char_u *s;
1260 colnr_T col;
1261 int *headp;
1262{
1263 int n;
1264
1265 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1266 {
1267 n = wp->w_buffer->b_p_ts;
1268 return (int)(n - (col % n));
1269 }
1270 n = ptr2cells(s);
1271 /* Add one cell for a double-width character in the last column of the
1272 * window, displayed with a ">". */
1273 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1274 {
1275 if (headp != NULL)
1276 *headp = 1;
1277 return 3;
1278 }
1279 return n;
1280}
1281
1282/*
1283 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1284 * "wp".
1285 */
1286 int
1287in_win_border(wp, vcol)
1288 win_T *wp;
1289 colnr_T vcol;
1290{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001291 int width1; /* width of first line (after line number) */
1292 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001293
1294#ifdef FEAT_VERTSPLIT
1295 if (wp->w_width == 0) /* there is no border */
1296 return FALSE;
1297#endif
1298 width1 = W_WIDTH(wp) - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001299 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001300 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001301 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001302 return TRUE;
1303 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001304 if (width2 <= 0)
1305 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001306 return ((vcol - width1) % width2 == width2 - 1);
1307}
1308#endif /* FEAT_MBYTE */
1309
1310/*
1311 * Get virtual column number of pos.
1312 * start: on the first position of this character (TAB, ctrl)
1313 * cursor: where the cursor is on this character (first char, except for TAB)
1314 * end: on the last position of this character (TAB, ctrl)
1315 *
1316 * This is used very often, keep it fast!
1317 */
1318 void
1319getvcol(wp, pos, start, cursor, end)
1320 win_T *wp;
1321 pos_T *pos;
1322 colnr_T *start;
1323 colnr_T *cursor;
1324 colnr_T *end;
1325{
1326 colnr_T vcol;
1327 char_u *ptr; /* points to current char */
1328 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001329 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 int incr;
1331 int head;
1332 int ts = wp->w_buffer->b_p_ts;
1333 int c;
1334
1335 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001336 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001337 if (pos->col == MAXCOL)
1338 posptr = NULL; /* continue until the NUL */
1339 else
1340 posptr = ptr + pos->col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001341
1342 /*
1343 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001344 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1345 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001346 * Also use this when 'list' is set but tabs take their normal size.
1347 */
1348 if ((!wp->w_p_list || lcs_tab1 != NUL)
1349#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001350 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001351#endif
1352 )
1353 {
1354#ifndef FEAT_MBYTE
1355 head = 0;
1356#endif
1357 for (;;)
1358 {
1359#ifdef FEAT_MBYTE
1360 head = 0;
1361#endif
1362 c = *ptr;
1363 /* make sure we don't go past the end of the line */
1364 if (c == NUL)
1365 {
1366 incr = 1; /* NUL at end of line only takes one column */
1367 break;
1368 }
1369 /* A tab gets expanded, depending on the current column */
1370 if (c == TAB)
1371 incr = ts - (vcol % ts);
1372 else
1373 {
1374#ifdef FEAT_MBYTE
1375 if (has_mbyte)
1376 {
1377 /* For utf-8, if the byte is >= 0x80, need to look at
1378 * further bytes to find the cell width. */
1379 if (enc_utf8 && c >= 0x80)
1380 incr = utf_ptr2cells(ptr);
1381 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001382 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001383
1384 /* If a double-cell char doesn't fit at the end of a line
1385 * it wraps to the next line, it's like this char is three
1386 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001387 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1388 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001389 {
1390 ++incr;
1391 head = 1;
1392 }
1393 }
1394 else
1395#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001396 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001397 }
1398
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001399 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001400 break;
1401
1402 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001403 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001404 }
1405 }
1406 else
1407 {
1408 for (;;)
1409 {
1410 /* A tab gets expanded, depending on the current column */
1411 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001412 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001413 /* make sure we don't go past the end of the line */
1414 if (*ptr == NUL)
1415 {
1416 incr = 1; /* NUL at end of line only takes one column */
1417 break;
1418 }
1419
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001420 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001421 break;
1422
1423 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001424 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001425 }
1426 }
1427 if (start != NULL)
1428 *start = vcol + head;
1429 if (end != NULL)
1430 *end = vcol + incr - 1;
1431 if (cursor != NULL)
1432 {
1433 if (*ptr == TAB
1434 && (State & NORMAL)
1435 && !wp->w_p_list
1436 && !virtual_active()
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001437 && !(VIsual_active && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001438 )
1439 *cursor = vcol + incr - 1; /* cursor at end */
1440 else
1441 *cursor = vcol + head; /* cursor at start */
1442 }
1443}
1444
1445/*
1446 * Get virtual cursor column in the current window, pretending 'list' is off.
1447 */
1448 colnr_T
1449getvcol_nolist(posp)
1450 pos_T *posp;
1451{
1452 int list_save = curwin->w_p_list;
1453 colnr_T vcol;
1454
1455 curwin->w_p_list = FALSE;
1456 getvcol(curwin, posp, NULL, &vcol, NULL);
1457 curwin->w_p_list = list_save;
1458 return vcol;
1459}
1460
1461#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1462/*
1463 * Get virtual column in virtual mode.
1464 */
1465 void
1466getvvcol(wp, pos, start, cursor, end)
1467 win_T *wp;
1468 pos_T *pos;
1469 colnr_T *start;
1470 colnr_T *cursor;
1471 colnr_T *end;
1472{
1473 colnr_T col;
1474 colnr_T coladd;
1475 colnr_T endadd;
1476# ifdef FEAT_MBYTE
1477 char_u *ptr;
1478# endif
1479
1480 if (virtual_active())
1481 {
1482 /* For virtual mode, only want one value */
1483 getvcol(wp, pos, &col, NULL, NULL);
1484
1485 coladd = pos->coladd;
1486 endadd = 0;
1487# ifdef FEAT_MBYTE
1488 /* Cannot put the cursor on part of a wide character. */
1489 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001490 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001491 {
1492 int c = (*mb_ptr2char)(ptr + pos->col);
1493
1494 if (c != TAB && vim_isprintc(c))
1495 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001496 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001497 if (coladd > endadd) /* past end of line */
1498 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001499 else
1500 coladd = 0;
1501 }
1502 }
1503# endif
1504 col += coladd;
1505 if (start != NULL)
1506 *start = col;
1507 if (cursor != NULL)
1508 *cursor = col;
1509 if (end != NULL)
1510 *end = col + endadd;
1511 }
1512 else
1513 getvcol(wp, pos, start, cursor, end);
1514}
1515#endif
1516
Bram Moolenaar071d4272004-06-13 20:20:40 +00001517/*
1518 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1519 * Used for Visual block mode.
1520 */
1521 void
1522getvcols(wp, pos1, pos2, left, right)
1523 win_T *wp;
1524 pos_T *pos1, *pos2;
1525 colnr_T *left, *right;
1526{
1527 colnr_T from1, from2, to1, to2;
1528
1529 if (ltp(pos1, pos2))
1530 {
1531 getvvcol(wp, pos1, &from1, NULL, &to1);
1532 getvvcol(wp, pos2, &from2, NULL, &to2);
1533 }
1534 else
1535 {
1536 getvvcol(wp, pos2, &from1, NULL, &to1);
1537 getvvcol(wp, pos1, &from2, NULL, &to2);
1538 }
1539 if (from2 < from1)
1540 *left = from2;
1541 else
1542 *left = from1;
1543 if (to2 > to1)
1544 {
1545 if (*p_sel == 'e' && from2 - 1 >= to1)
1546 *right = from2 - 1;
1547 else
1548 *right = to2;
1549 }
1550 else
1551 *right = to1;
1552}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001553
1554/*
1555 * skipwhite: skip over ' ' and '\t'.
1556 */
1557 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001558skipwhite(q)
1559 char_u *q;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001560{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001561 char_u *p = q;
1562
Bram Moolenaar071d4272004-06-13 20:20:40 +00001563 while (vim_iswhite(*p)) /* skip to next non-white */
1564 ++p;
1565 return p;
1566}
1567
1568/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001569 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570 */
1571 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001572skipdigits(q)
1573 char_u *q;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001574{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001575 char_u *p = q;
1576
Bram Moolenaar071d4272004-06-13 20:20:40 +00001577 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1578 ++p;
1579 return p;
1580}
1581
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001582#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001583/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001584 * skip over binary digits
1585 */
1586 char_u *
1587skipbin(q)
1588 char_u *q;
1589{
1590 char_u *p = q;
1591
1592 while (vim_isbdigit(*p)) /* skip to next non-digit */
1593 ++p;
1594 return p;
1595}
1596
1597/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001598 * skip over digits and hex characters
1599 */
1600 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001601skiphex(q)
1602 char_u *q;
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001603{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001604 char_u *p = q;
1605
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001606 while (vim_isxdigit(*p)) /* skip to next non-digit */
1607 ++p;
1608 return p;
1609}
1610#endif
1611
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001612#if defined(FEAT_EX_EXTRA) || defined(PROTO)
1613/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001614 * skip to bin digit (or NUL after the string)
1615 */
1616 char_u *
1617skiptobin(q)
1618 char_u *q;
1619{
1620 char_u *p = q;
1621
1622 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1623 ++p;
1624 return p;
1625}
1626
1627/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001628 * skip to digit (or NUL after the string)
1629 */
1630 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001631skiptodigit(q)
1632 char_u *q;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001633{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001634 char_u *p = q;
1635
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001636 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1637 ++p;
1638 return p;
1639}
1640
1641/*
1642 * skip to hex character (or NUL after the string)
1643 */
1644 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001645skiptohex(q)
1646 char_u *q;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001647{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001648 char_u *p = q;
1649
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001650 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1651 ++p;
1652 return p;
1653}
1654#endif
1655
Bram Moolenaar071d4272004-06-13 20:20:40 +00001656/*
1657 * Variant of isdigit() that can handle characters > 0x100.
1658 * We don't use isdigit() here, because on some systems it also considers
1659 * superscript 1 to be a digit.
1660 * Use the VIM_ISDIGIT() macro for simple arguments.
1661 */
1662 int
1663vim_isdigit(c)
1664 int c;
1665{
1666 return (c >= '0' && c <= '9');
1667}
1668
1669/*
1670 * Variant of isxdigit() that can handle characters > 0x100.
1671 * We don't use isxdigit() here, because on some systems it also considers
1672 * superscript 1 to be a digit.
1673 */
1674 int
1675vim_isxdigit(c)
1676 int c;
1677{
1678 return (c >= '0' && c <= '9')
1679 || (c >= 'a' && c <= 'f')
1680 || (c >= 'A' && c <= 'F');
1681}
1682
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001683/*
1684 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1685 * characters > 0x100.
1686 */
1687 int
1688vim_isbdigit(c)
1689 int c;
1690{
1691 return (c == '0' || c == '1');
1692}
1693
Bram Moolenaar78622822005-08-23 21:00:13 +00001694#if defined(FEAT_MBYTE) || defined(PROTO)
1695/*
1696 * Vim's own character class functions. These exist because many library
1697 * islower()/toupper() etc. do not work properly: they crash when used with
1698 * invalid values or can't handle latin1 when the locale is C.
1699 * Speed is most important here.
1700 */
1701#define LATIN1LOWER 'l'
1702#define LATIN1UPPER 'U'
1703
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001704static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001705static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1706static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001707
1708 int
1709vim_islower(c)
1710 int c;
1711{
1712 if (c <= '@')
1713 return FALSE;
1714 if (c >= 0x80)
1715 {
1716 if (enc_utf8)
1717 return utf_islower(c);
1718 if (c >= 0x100)
1719 {
1720#ifdef HAVE_ISWLOWER
1721 if (has_mbyte)
1722 return iswlower(c);
1723#endif
1724 /* islower() can't handle these chars and may crash */
1725 return FALSE;
1726 }
1727 if (enc_latin1like)
1728 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1729 }
1730 return islower(c);
1731}
1732
1733 int
1734vim_isupper(c)
1735 int c;
1736{
1737 if (c <= '@')
1738 return FALSE;
1739 if (c >= 0x80)
1740 {
1741 if (enc_utf8)
1742 return utf_isupper(c);
1743 if (c >= 0x100)
1744 {
1745#ifdef HAVE_ISWUPPER
1746 if (has_mbyte)
1747 return iswupper(c);
1748#endif
1749 /* islower() can't handle these chars and may crash */
1750 return FALSE;
1751 }
1752 if (enc_latin1like)
1753 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1754 }
1755 return isupper(c);
1756}
1757
1758 int
1759vim_toupper(c)
1760 int c;
1761{
1762 if (c <= '@')
1763 return c;
1764 if (c >= 0x80)
1765 {
1766 if (enc_utf8)
1767 return utf_toupper(c);
1768 if (c >= 0x100)
1769 {
1770#ifdef HAVE_TOWUPPER
1771 if (has_mbyte)
1772 return towupper(c);
1773#endif
1774 /* toupper() can't handle these chars and may crash */
1775 return c;
1776 }
1777 if (enc_latin1like)
1778 return latin1upper[c];
1779 }
1780 return TOUPPER_LOC(c);
1781}
1782
1783 int
1784vim_tolower(c)
1785 int c;
1786{
1787 if (c <= '@')
1788 return c;
1789 if (c >= 0x80)
1790 {
1791 if (enc_utf8)
1792 return utf_tolower(c);
1793 if (c >= 0x100)
1794 {
1795#ifdef HAVE_TOWLOWER
1796 if (has_mbyte)
1797 return towlower(c);
1798#endif
1799 /* tolower() can't handle these chars and may crash */
1800 return c;
1801 }
1802 if (enc_latin1like)
1803 return latin1lower[c];
1804 }
1805 return TOLOWER_LOC(c);
1806}
1807#endif
1808
Bram Moolenaar071d4272004-06-13 20:20:40 +00001809/*
1810 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1811 */
1812 char_u *
1813skiptowhite(p)
1814 char_u *p;
1815{
1816 while (*p != ' ' && *p != '\t' && *p != NUL)
1817 ++p;
1818 return p;
1819}
1820
1821#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1822 || defined(PROTO)
1823/*
1824 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1825 */
1826 char_u *
1827skiptowhite_esc(p)
1828 char_u *p;
1829{
1830 while (*p != ' ' && *p != '\t' && *p != NUL)
1831 {
1832 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1833 ++p;
1834 ++p;
1835 }
1836 return p;
1837}
1838#endif
1839
1840/*
1841 * Getdigits: Get a number from a string and skip over it.
1842 * Note: the argument is a pointer to a char_u pointer!
1843 */
1844 long
1845getdigits(pp)
1846 char_u **pp;
1847{
1848 char_u *p;
1849 long retval;
1850
1851 p = *pp;
1852 retval = atol((char *)p);
1853 if (*p == '-') /* skip negative sign */
1854 ++p;
1855 p = skipdigits(p); /* skip to next non-digit */
1856 *pp = p;
1857 return retval;
1858}
1859
1860/*
1861 * Return TRUE if "lbuf" is empty or only contains blanks.
1862 */
1863 int
1864vim_isblankline(lbuf)
1865 char_u *lbuf;
1866{
1867 char_u *p;
1868
1869 p = skipwhite(lbuf);
1870 return (*p == NUL || *p == '\r' || *p == '\n');
1871}
1872
1873/*
1874 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001875 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1876 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001877 * 0 decimal
1878 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001879 * 'B' bin
1880 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001881 * 'X' hex
1882 * 'x' hex
1883 * If "len" is not NULL, the length of the number in characters is returned.
1884 * If "nptr" is not NULL, the signed result is returned in it.
1885 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001886 * If "what" contains STR2NR_BIN recognize binary numbers
1887 * If "what" contains STR2NR_OCT recognize octal numbers
1888 * If "what" contains STR2NR_HEX recognize hex numbers
1889 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001890 * If maxlen > 0, check at a maximum maxlen chars
Bram Moolenaar071d4272004-06-13 20:20:40 +00001891 */
1892 void
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001893vim_str2nr(start, prep, len, what, nptr, unptr, maxlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001894 char_u *start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001895 int *prep; /* return: type of number 0 = decimal, 'x'
1896 or 'X' is hex, '0' = octal, 'b' or 'B'
1897 is bin */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001898 int *len; /* return: detected length of number */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001899 int what; /* what numbers to recognize */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001900 long *nptr; /* return: signed result */
1901 unsigned long *unptr; /* return: unsigned result */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001902 int maxlen; /* max length of string to check */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001903{
1904 char_u *ptr = start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001905 int pre = 0; /* default is decimal */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001906 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001907 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001908 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001909
1910 if (ptr[0] == '-')
1911 {
1912 negative = TRUE;
1913 ++ptr;
1914 }
1915
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001916 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001917 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1918 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001919 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001920 pre = ptr[1];
1921 if ((what & STR2NR_HEX)
1922 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1923 && (maxlen == 0 || maxlen > 2))
1924 /* hexadecimal */
1925 ptr += 2;
1926 else if ((what & STR2NR_BIN)
1927 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1928 && (maxlen == 0 || maxlen > 2))
1929 /* binary */
1930 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001931 else
1932 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001933 /* decimal or octal, default is decimal */
1934 pre = 0;
1935 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001936 {
1937 /* Don't interpret "0", "08" or "0129" as octal. */
1938 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1939 {
1940 if (ptr[n] > '7')
1941 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001942 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001943 break;
1944 }
Bram Moolenaar06af6022012-01-26 13:40:08 +01001945 if (ptr[n] >= '0')
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001946 pre = '0'; /* assume octal */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001947 if (n == maxlen)
1948 break;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001949 }
1950 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001951 }
1952 }
1953
1954 /*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001955 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1956 */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001957 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001958 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1959 {
1960 /* bin */
1961 if (pre != 0)
1962 n += 2; /* skip over "0b" */
1963 while ('0' <= *ptr && *ptr <= '1')
1964 {
1965 un = 2 * un + (unsigned long)(*ptr - '0');
1966 ++ptr;
1967 if (n++ == maxlen)
1968 break;
1969 }
1970 }
1971 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001972 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001973 /* octal */
1974 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001975 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001976 un = 8 * un + (unsigned long)(*ptr - '0');
1977 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001978 if (n++ == maxlen)
1979 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001980 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001981 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001982 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001983 {
1984 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001985 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001986 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001987 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001988 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001989 un = 16 * un + (unsigned long)hex2nr(*ptr);
1990 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001991 if (n++ == maxlen)
1992 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001993 }
1994 }
1995 else
1996 {
1997 /* decimal */
1998 while (VIM_ISDIGIT(*ptr))
1999 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002000 un = 10 * un + (unsigned long)(*ptr - '0');
2001 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02002002 if (n++ == maxlen)
2003 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002004 }
2005 }
2006
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01002007 if (prep != NULL)
2008 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002009 if (len != NULL)
2010 *len = (int)(ptr - start);
2011 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002012 {
2013 if (negative) /* account for leading '-' for decimal numbers */
2014 *nptr = -(long)un;
2015 else
2016 *nptr = (long)un;
2017 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002018 if (unptr != NULL)
2019 *unptr = un;
2020}
2021
2022/*
2023 * Return the value of a single hex character.
2024 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2025 */
2026 int
2027hex2nr(c)
2028 int c;
2029{
2030 if (c >= 'a' && c <= 'f')
2031 return c - 'a' + 10;
2032 if (c >= 'A' && c <= 'F')
2033 return c - 'A' + 10;
2034 return c - '0';
2035}
2036
2037#if defined(FEAT_TERMRESPONSE) \
2038 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
2039/*
2040 * Convert two hex characters to a byte.
2041 * Return -1 if one of the characters is not hex.
2042 */
2043 int
2044hexhex2nr(p)
2045 char_u *p;
2046{
2047 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2048 return -1;
2049 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2050}
2051#endif
2052
2053/*
2054 * Return TRUE if "str" starts with a backslash that should be removed.
2055 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
2056 * backslash is not a normal file name character.
2057 * '$' is a valid file name character, we don't remove the backslash before
2058 * it. This means it is not possible to use an environment variable after a
2059 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2060 * Although "\ name" is valid, the backslash in "Program\ files" must be
2061 * removed. Assume a file name doesn't start with a space.
2062 * For multi-byte names, never remove a backslash before a non-ascii
2063 * character, assume that all multi-byte characters are valid file name
2064 * characters.
2065 */
2066 int
2067rem_backslash(str)
2068 char_u *str;
2069{
2070#ifdef BACKSLASH_IN_FILENAME
2071 return (str[0] == '\\'
2072# ifdef FEAT_MBYTE
2073 && str[1] < 0x80
2074# endif
2075 && (str[1] == ' '
2076 || (str[1] != NUL
2077 && str[1] != '*'
2078 && str[1] != '?'
2079 && !vim_isfilec(str[1]))));
2080#else
2081 return (str[0] == '\\' && str[1] != NUL);
2082#endif
2083}
2084
2085/*
2086 * Halve the number of backslashes in a file name argument.
2087 * For MS-DOS we only do this if the character after the backslash
2088 * is not a normal file character.
2089 */
2090 void
2091backslash_halve(p)
2092 char_u *p;
2093{
2094 for ( ; *p; ++p)
2095 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002096 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002097}
2098
2099/*
2100 * backslash_halve() plus save the result in allocated memory.
2101 */
2102 char_u *
2103backslash_halve_save(p)
2104 char_u *p;
2105{
2106 char_u *res;
2107
2108 res = vim_strsave(p);
2109 if (res == NULL)
2110 return p;
2111 backslash_halve(res);
2112 return res;
2113}
2114
2115#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2116/*
2117 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2118 * The first 64 entries have been added to map control characters defined in
2119 * ascii.h
2120 */
2121static char_u ebcdic2ascii_tab[256] =
2122{
2123 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2124 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2125 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2126 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2127 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2128 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2129 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2130 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2131 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2132 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2133 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2134 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2135 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2136 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2137 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2138 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2139 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2140 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2141 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2142 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2143 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2144 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2145 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2146 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2147 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2148 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2149 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2150 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2151 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2152 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2153 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2154 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2155};
2156
2157/*
2158 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2159 * wanting 7-bit ASCII characters out the other end.
2160 */
2161 void
2162ebcdic2ascii(buffer, len)
2163 char_u *buffer;
2164 int len;
2165{
2166 int i;
2167
2168 for (i = 0; i < len; i++)
2169 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2170}
2171#endif