blob: 1000692cc1932ee11cbe13dfeb7fd113710b734e [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
Bram Moolenaard7b734a2010-08-12 20:17:02 +020017# if defined(HAVE_WCHAR_H)
18# include <wchar.h> /* for towupper() and towlower() */
19# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000020static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
21#endif
22
Bram Moolenaar0ab2a882009-05-13 10:51:08 +000023static unsigned nr2hex __ARGS((unsigned c));
Bram Moolenaar071d4272004-06-13 20:20:40 +000024
25static int chartab_initialized = FALSE;
26
27/* b_chartab[] is an array of 32 bytes, each bit representing one of the
28 * characters 0-255. */
29#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
30#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
31#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
32
33/*
34 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
35 * characters for current buffer.
36 *
37 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
38 * 'isprint' and 'encoding'.
39 *
40 * The index in chartab[] depends on 'encoding':
41 * - For non-multi-byte index with the byte (same as the character).
42 * - For DBCS index with the first byte.
43 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
44 * the same as the character, if the first byte is 0x80 and above it depends
45 * on further bytes).
46 *
47 * The contents of chartab[]:
48 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
49 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
50 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
51 * translate the character before displaying it). Note that only DBCS
52 * characters can have 2 display cells and still be printable.
53 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
54 * - CT_ID_CHAR bit is set when the character can be in an identifier.
55 *
56 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
57 * error, OK otherwise.
58 */
59 int
60init_chartab()
61{
62 return buf_init_chartab(curbuf, TRUE);
63}
64
65 int
66buf_init_chartab(buf, global)
67 buf_T *buf;
68 int global; /* FALSE: only set buf->b_chartab[] */
69{
70 int c;
71 int c2;
72 char_u *p;
73 int i;
74 int tilde;
75 int do_isalpha;
76
77 if (global)
78 {
79 /*
80 * Set the default size for printable characters:
81 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
82 * This also inits all 'isident' and 'isfname' flags to FALSE.
83 *
84 * EBCDIC: all chars below ' ' are not printable, all others are
85 * printable.
86 */
87 c = 0;
88 while (c < ' ')
89 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
90#ifdef EBCDIC
91 while (c < 255)
92#else
93 while (c <= '~')
94#endif
95 chartab[c++] = 1 + CT_PRINT_CHAR;
96#ifdef FEAT_FKMAP
97 if (p_altkeymap)
98 {
99 while (c < YE)
100 chartab[c++] = 1 + CT_PRINT_CHAR;
101 }
102#endif
103 while (c < 256)
104 {
105#ifdef FEAT_MBYTE
106 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
107 if (enc_utf8 && c >= 0xa0)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* euc-jp characters starting with 0x8e are single width */
110 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
111 chartab[c++] = CT_PRINT_CHAR + 1;
112 /* other double-byte chars can be printable AND double-width */
113 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
114 chartab[c++] = CT_PRINT_CHAR + 2;
115 else
116#endif
117 /* the rest is unprintable by default */
118 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
119 }
120
121#ifdef FEAT_MBYTE
122 /* Assume that every multi-byte char is a filename character. */
123 for (c = 1; c < 256; ++c)
124 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
125 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
126 || (enc_utf8 && c >= 0xa0))
127 chartab[c] |= CT_FNAME_CHAR;
128#endif
129 }
130
131 /*
132 * Init word char flags all to FALSE
133 */
134 vim_memset(buf->b_chartab, 0, (size_t)32);
135#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000136 if (enc_dbcs != 0)
137 for (c = 0; c < 256; ++c)
138 {
139 /* double-byte characters are probably word characters */
140 if (MB_BYTE2LEN(c) == 2)
141 SET_CHARTAB(buf, c);
142 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000143#endif
144
145#ifdef FEAT_LISP
146 /*
147 * In lisp mode the '-' character is included in keywords.
148 */
149 if (buf->b_p_lisp)
150 SET_CHARTAB(buf, '-');
151#endif
152
153 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
154 * options Each option is a list of characters, character numbers or
155 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
156 */
157 for (i = global ? 0 : 3; i <= 3; ++i)
158 {
159 if (i == 0)
160 p = p_isi; /* first round: 'isident' */
161 else if (i == 1)
162 p = p_isp; /* second round: 'isprint' */
163 else if (i == 2)
164 p = p_isf; /* third round: 'isfname' */
165 else /* i == 3 */
166 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
167
168 while (*p)
169 {
170 tilde = FALSE;
171 do_isalpha = FALSE;
172 if (*p == '^' && p[1] != NUL)
173 {
174 tilde = TRUE;
175 ++p;
176 }
177 if (VIM_ISDIGIT(*p))
178 c = getdigits(&p);
179 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000180#ifdef FEAT_MBYTE
181 if (has_mbyte)
182 c = mb_ptr2char_adv(&p);
183 else
184#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185 c = *p++;
186 c2 = -1;
187 if (*p == '-' && p[1] != NUL)
188 {
189 ++p;
190 if (VIM_ISDIGIT(*p))
191 c2 = getdigits(&p);
192 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000193#ifdef FEAT_MBYTE
194 if (has_mbyte)
195 c2 = mb_ptr2char_adv(&p);
196 else
197#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000198 c2 = *p++;
199 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000200 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000201 || !(*p == NUL || *p == ','))
202 return FAIL;
203
204 if (c2 == -1) /* not a range */
205 {
206 /*
207 * A single '@' (not "@-@"):
208 * Decide on letters being ID/printable/keyword chars with
209 * standard function isalpha(). This takes care of locale for
210 * single-byte characters).
211 */
212 if (c == '@')
213 {
214 do_isalpha = TRUE;
215 c = 1;
216 c2 = 255;
217 }
218 else
219 c2 = c;
220 }
221 while (c <= c2)
222 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000223 /* Use the MB_ functions here, because isalpha() doesn't
224 * work properly when 'encoding' is "latin1" and the locale is
225 * "C". */
226 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000227#ifdef FEAT_FKMAP
228 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
229#endif
230 )
231 {
232 if (i == 0) /* (re)set ID flag */
233 {
234 if (tilde)
235 chartab[c] &= ~CT_ID_CHAR;
236 else
237 chartab[c] |= CT_ID_CHAR;
238 }
239 else if (i == 1) /* (re)set printable */
240 {
241 if ((c < ' '
242#ifndef EBCDIC
243 || c > '~'
244#endif
245#ifdef FEAT_FKMAP
246 || (p_altkeymap
247 && (F_isalpha(c) || F_isdigit(c)))
248#endif
249 )
250#ifdef FEAT_MBYTE
251 /* For double-byte we keep the cell width, so
252 * that we can detect it from the first byte. */
253 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
254#endif
255 )
256 {
257 if (tilde)
258 {
259 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
260 + ((dy_flags & DY_UHEX) ? 4 : 2);
261 chartab[c] &= ~CT_PRINT_CHAR;
262 }
263 else
264 {
265 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
266 chartab[c] |= CT_PRINT_CHAR;
267 }
268 }
269 }
270 else if (i == 2) /* (re)set fname flag */
271 {
272 if (tilde)
273 chartab[c] &= ~CT_FNAME_CHAR;
274 else
275 chartab[c] |= CT_FNAME_CHAR;
276 }
277 else /* i == 3 */ /* (re)set keyword flag */
278 {
279 if (tilde)
280 RESET_CHARTAB(buf, c);
281 else
282 SET_CHARTAB(buf, c);
283 }
284 }
285 ++c;
286 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100287
288 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000289 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100290 if (c == ',' && *p == NUL)
291 /* Trailing comma is not allowed. */
292 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000293 }
294 }
295 chartab_initialized = TRUE;
296 return OK;
297}
298
299/*
300 * Translate any special characters in buf[bufsize] in-place.
301 * The result is a string with only printable characters, but if there is not
302 * enough room, not all characters will be translated.
303 */
304 void
305trans_characters(buf, bufsize)
306 char_u *buf;
307 int bufsize;
308{
309 int len; /* length of string needing translation */
310 int room; /* room in buffer after string */
311 char_u *trs; /* translated character */
312 int trs_len; /* length of trs[] */
313
314 len = (int)STRLEN(buf);
315 room = bufsize - len;
316 while (*buf != 0)
317 {
318# ifdef FEAT_MBYTE
319 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000320 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000321 len -= trs_len;
322 else
323# endif
324 {
325 trs = transchar_byte(*buf);
326 trs_len = (int)STRLEN(trs);
327 if (trs_len > 1)
328 {
329 room -= trs_len - 1;
330 if (room <= 0)
331 return;
332 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
333 }
334 mch_memmove(buf, trs, (size_t)trs_len);
335 --len;
336 }
337 buf += trs_len;
338 }
339}
340
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000341#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
342 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343/*
344 * Translate a string into allocated memory, replacing special chars with
345 * printable chars. Returns NULL when out of memory.
346 */
347 char_u *
348transstr(s)
349 char_u *s;
350{
351 char_u *res;
352 char_u *p;
353#ifdef FEAT_MBYTE
354 int l, len, c;
355 char_u hexbuf[11];
356#endif
357
358#ifdef FEAT_MBYTE
359 if (has_mbyte)
360 {
361 /* Compute the length of the result, taking account of unprintable
362 * multi-byte characters. */
363 len = 0;
364 p = s;
365 while (*p != NUL)
366 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000367 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000368 {
369 c = (*mb_ptr2char)(p);
370 p += l;
371 if (vim_isprintc(c))
372 len += l;
373 else
374 {
375 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000376 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000377 }
378 }
379 else
380 {
381 l = byte2cells(*p++);
382 if (l > 0)
383 len += l;
384 else
385 len += 4; /* illegal byte sequence */
386 }
387 }
388 res = alloc((unsigned)(len + 1));
389 }
390 else
391#endif
392 res = alloc((unsigned)(vim_strsize(s) + 1));
393 if (res != NULL)
394 {
395 *res = NUL;
396 p = s;
397 while (*p != NUL)
398 {
399#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000400 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000401 {
402 c = (*mb_ptr2char)(p);
403 if (vim_isprintc(c))
404 STRNCAT(res, p, l); /* append printable multi-byte char */
405 else
406 transchar_hex(res + STRLEN(res), c);
407 p += l;
408 }
409 else
410#endif
411 STRCAT(res, transchar_byte(*p++));
412 }
413 }
414 return res;
415}
416#endif
417
418#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
419/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000420 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
421 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000422 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
423 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424 */
425 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000426str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000427 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000428 int orglen;
429 char_u *buf;
430 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000431{
432 garray_T ga;
433 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000434 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000435
436#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
437#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000438#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
439#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000440
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000441 /* Copy "str" into "buf" or allocated memory, unmodified. */
442 if (buf == NULL)
443 {
444 ga_init2(&ga, 1, 10);
445 if (ga_grow(&ga, len + 1) == FAIL)
446 return NULL;
447 mch_memmove(ga.ga_data, str, (size_t)len);
448 ga.ga_len = len;
449 }
450 else
451 {
452 if (len >= buflen) /* Ugly! */
453 len = buflen - 1;
454 mch_memmove(buf, str, (size_t)len);
455 }
456 if (buf == NULL)
457 GA_CHAR(len) = NUL;
458 else
459 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000460
461 /* Make each character lower case. */
462 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000463 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000464 {
465#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000466 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000467 {
468 if (enc_utf8)
469 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000470 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100471 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000472 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000473
Bram Moolenaarb9839212008-06-28 11:03:50 +0000474 /* Only replace the character when it is not an invalid
475 * sequence (ASCII character or more than one byte) and
476 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100477 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100479 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000480
481 /* If the byte length changes need to shift the following
482 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100483 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000484 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100485 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000486 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100487 if (buf == NULL
488 ? ga_grow(&ga, nlen - olen + 1) == FAIL
489 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490 {
491 /* out of memory, keep old char */
492 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100493 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000495 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100496 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000497 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000498 if (buf == NULL)
499 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100500 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
501 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000502 }
503 else
504 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100505 STRMOVE(buf + i + nlen, buf + i + olen);
506 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000507 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508 }
509 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000510 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511 }
512 }
513 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000514 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 }
516 else
517#endif
518 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000519 if (buf == NULL)
520 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
521 else
522 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000523 ++i;
524 }
525 }
526
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000527 if (buf == NULL)
528 return (char_u *)ga.ga_data;
529 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000530}
531#endif
532
533/*
534 * Catch 22: chartab[] can't be initialized before the options are
535 * initialized, and initializing options may cause transchar() to be called!
536 * When chartab_initialized == FALSE don't use chartab[].
537 * Does NOT work for multi-byte characters, c must be <= 255.
538 * Also doesn't work for the first byte of a multi-byte, "c" must be a
539 * character!
540 */
541static char_u transchar_buf[7];
542
543 char_u *
544transchar(c)
545 int c;
546{
547 int i;
548
549 i = 0;
550 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
551 {
552 transchar_buf[0] = '~';
553 transchar_buf[1] = '@';
554 i = 2;
555 c = K_SECOND(c);
556 }
557
558 if ((!chartab_initialized && (
559#ifdef EBCDIC
560 (c >= 64 && c < 255)
561#else
562 (c >= ' ' && c <= '~')
563#endif
564#ifdef FEAT_FKMAP
565 || F_ischar(c)
566#endif
567 )) || (c < 256 && vim_isprintc_strict(c)))
568 {
569 /* printable character */
570 transchar_buf[i] = c;
571 transchar_buf[i + 1] = NUL;
572 }
573 else
574 transchar_nonprint(transchar_buf + i, c);
575 return transchar_buf;
576}
577
578#if defined(FEAT_MBYTE) || defined(PROTO)
579/*
580 * Like transchar(), but called with a byte instead of a character. Checks
581 * for an illegal UTF-8 byte.
582 */
583 char_u *
584transchar_byte(c)
585 int c;
586{
587 if (enc_utf8 && c >= 0x80)
588 {
589 transchar_nonprint(transchar_buf, c);
590 return transchar_buf;
591 }
592 return transchar(c);
593}
594#endif
595
596/*
597 * Convert non-printable character to two or more printable characters in
598 * "buf[]". "buf" needs to be able to hold five bytes.
599 * Does NOT work for multi-byte characters, c must be <= 255.
600 */
601 void
602transchar_nonprint(buf, c)
603 char_u *buf;
604 int c;
605{
606 if (c == NL)
607 c = NUL; /* we use newline in place of a NUL */
608 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
609 c = NL; /* we use CR in place of NL in this case */
610
611 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
612 transchar_hex(buf, c);
613
614#ifdef EBCDIC
615 /* For EBCDIC only the characters 0-63 and 255 are not printable */
616 else if (CtrlChar(c) != 0 || c == DEL)
617#else
618 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
619#endif
620 {
621 buf[0] = '^';
622#ifdef EBCDIC
623 if (c == DEL)
624 buf[1] = '?'; /* DEL displayed as ^? */
625 else
626 buf[1] = CtrlChar(c);
627#else
628 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
629#endif
630
631 buf[2] = NUL;
632 }
633#ifdef FEAT_MBYTE
634 else if (enc_utf8 && c >= 0x80)
635 {
636 transchar_hex(buf, c);
637 }
638#endif
639#ifndef EBCDIC
640 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
641 {
642 buf[0] = '|';
643 buf[1] = c - 0x80;
644 buf[2] = NUL;
645 }
646#else
647 else if (c < 64)
648 {
649 buf[0] = '~';
650 buf[1] = MetaChar(c);
651 buf[2] = NUL;
652 }
653#endif
654 else /* 0x80 - 0x9f and 0xff */
655 {
656 /*
657 * TODO: EBCDIC I don't know what to do with this chars, so I display
658 * them as '~?' for now
659 */
660 buf[0] = '~';
661#ifdef EBCDIC
662 buf[1] = '?'; /* 0xff displayed as ~? */
663#else
664 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
665#endif
666 buf[2] = NUL;
667 }
668}
669
670 void
671transchar_hex(buf, c)
672 char_u *buf;
673 int c;
674{
675 int i = 0;
676
677 buf[0] = '<';
678#ifdef FEAT_MBYTE
679 if (c > 255)
680 {
681 buf[++i] = nr2hex((unsigned)c >> 12);
682 buf[++i] = nr2hex((unsigned)c >> 8);
683 }
684#endif
685 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000686 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687 buf[++i] = '>';
688 buf[++i] = NUL;
689}
690
691/*
692 * Convert the lower 4 bits of byte "c" to its hex character.
693 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
694 * function key 1.
695 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000696 static unsigned
Bram Moolenaar071d4272004-06-13 20:20:40 +0000697nr2hex(c)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000698 unsigned c;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000699{
700 if ((c & 0xf) <= 9)
701 return (c & 0xf) + '0';
702 return (c & 0xf) - 10 + 'a';
703}
704
705/*
706 * Return number of display cells occupied by byte "b".
707 * Caller must make sure 0 <= b <= 255.
708 * For multi-byte mode "b" must be the first byte of a character.
709 * A TAB is counted as two cells: "^I".
710 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
711 * cells depends on further bytes.
712 */
713 int
714byte2cells(b)
715 int b;
716{
717#ifdef FEAT_MBYTE
718 if (enc_utf8 && b >= 0x80)
719 return 0;
720#endif
721 return (chartab[b] & CT_CELL_MASK);
722}
723
724/*
725 * Return number of display cells occupied by character "c".
726 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
727 * A TAB is counted as two cells: "^I" or four: "<09>".
728 */
729 int
730char2cells(c)
731 int c;
732{
733 if (IS_SPECIAL(c))
734 return char2cells(K_SECOND(c)) + 2;
735#ifdef FEAT_MBYTE
736 if (c >= 0x80)
737 {
738 /* UTF-8: above 0x80 need to check the value */
739 if (enc_utf8)
740 return utf_char2cells(c);
741 /* DBCS: double-byte means double-width, except for euc-jp with first
742 * byte 0x8e */
743 if (enc_dbcs != 0 && c >= 0x100)
744 {
745 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
746 return 1;
747 return 2;
748 }
749 }
750#endif
751 return (chartab[c & 0xff] & CT_CELL_MASK);
752}
753
754/*
755 * Return number of display cells occupied by character at "*p".
756 * A TAB is counted as two cells: "^I" or four: "<09>".
757 */
758 int
759ptr2cells(p)
760 char_u *p;
761{
762#ifdef FEAT_MBYTE
763 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
764 if (enc_utf8 && *p >= 0x80)
765 return utf_ptr2cells(p);
766 /* For DBCS we can tell the cell count from the first byte. */
767#endif
768 return (chartab[*p] & CT_CELL_MASK);
769}
770
771/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100772 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000773 * counting TABs as two characters: "^I".
774 */
775 int
776vim_strsize(s)
777 char_u *s;
778{
779 return vim_strnsize(s, (int)MAXCOL);
780}
781
782/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100783 * Return the number of character cells string "s[len]" will take on the
784 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000785 */
786 int
787vim_strnsize(s, len)
788 char_u *s;
789 int len;
790{
791 int size = 0;
792
793 while (*s != NUL && --len >= 0)
794 {
795#ifdef FEAT_MBYTE
796 if (has_mbyte)
797 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000798 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000799
800 size += ptr2cells(s);
801 s += l;
802 len -= l - 1;
803 }
804 else
805#endif
806 size += byte2cells(*s++);
807 }
808 return size;
809}
810
811/*
812 * Return the number of characters 'c' will take on the screen, taking
813 * into account the size of a tab.
814 * Use a define to make it fast, this is used very often!!!
815 * Also see getvcol() below.
816 */
817
818#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
819 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
820 { \
821 int ts; \
822 ts = (buf)->b_p_ts; \
823 return (int)(ts - (col % ts)); \
824 } \
825 else \
826 return ptr2cells(p);
827
828#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
829 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
830 int
831chartabsize(p, col)
832 char_u *p;
833 colnr_T col;
834{
835 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
836}
837#endif
838
839#ifdef FEAT_LINEBREAK
840 static int
841win_chartabsize(wp, p, col)
842 win_T *wp;
843 char_u *p;
844 colnr_T col;
845{
846 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
847}
848#endif
849
850/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200851 * Return the number of characters the string 's' will take on the screen,
852 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000853 */
854 int
855linetabsize(s)
856 char_u *s;
857{
Bram Moolenaardc536092010-07-18 15:45:49 +0200858 return linetabsize_col(0, s);
859}
860
861/*
862 * Like linetabsize(), but starting at column "startcol".
863 */
864 int
865linetabsize_col(startcol, s)
866 int startcol;
867 char_u *s;
868{
869 colnr_T col = startcol;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200870 char_u *line = s; /* pointer to start of line, for breakindent */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000871
872 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200873 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000874 return (int)col;
875}
876
877/*
878 * Like linetabsize(), but for a given window instead of the current one.
879 */
880 int
Bram Moolenaar597a4222014-06-25 14:39:50 +0200881win_linetabsize(wp, line, len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000882 win_T *wp;
Bram Moolenaar597a4222014-06-25 14:39:50 +0200883 char_u *line;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000884 colnr_T len;
885{
886 colnr_T col = 0;
887 char_u *s;
888
Bram Moolenaar597a4222014-06-25 14:39:50 +0200889 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
890 mb_ptr_adv(s))
891 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000892 return (int)col;
893}
894
895/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000896 * Return TRUE if 'c' is a normal identifier character:
897 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000898 */
899 int
900vim_isIDc(c)
901 int c;
902{
903 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
904}
905
906/*
907 * return TRUE if 'c' is a keyword character: Letters and characters from
908 * 'iskeyword' option for current buffer.
909 * For multi-byte characters mb_get_class() is used (builtin rules).
910 */
911 int
912vim_iswordc(c)
913 int c;
914{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100915 return vim_iswordc_buf(c, curbuf);
916}
917
918 int
919vim_iswordc_buf(c, buf)
920 int c;
921 buf_T *buf;
922{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000923#ifdef FEAT_MBYTE
924 if (c >= 0x100)
925 {
926 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000927 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000928 if (enc_utf8)
929 return utf_class(c) >= 2;
930 }
931#endif
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100932 return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000933}
934
935/*
936 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
937 */
938 int
939vim_iswordp(p)
940 char_u *p;
941{
942#ifdef FEAT_MBYTE
943 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
944 return mb_get_class(p) >= 2;
945#endif
946 return GET_CHARTAB(curbuf, *p) != 0;
947}
948
Bram Moolenaar071d4272004-06-13 20:20:40 +0000949 int
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100950vim_iswordp_buf(p, buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000951 char_u *p;
952 buf_T *buf;
953{
Bram Moolenaara50e5862013-01-30 17:30:17 +0100954#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +0000955 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
956 return mb_get_class(p) >= 2;
Bram Moolenaara50e5862013-01-30 17:30:17 +0100957#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958 return (GET_CHARTAB(buf, *p) != 0);
959}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000960
961/*
962 * return TRUE if 'c' is a valid file-name character
963 * Assume characters above 0x100 are valid (multi-byte).
964 */
965 int
966vim_isfilec(c)
967 int c;
968{
969 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
970}
971
972/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000973 * return TRUE if 'c' is a valid file-name character or a wildcard character
974 * Assume characters above 0x100 are valid (multi-byte).
975 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
976 * returns false.
977 */
978 int
979vim_isfilec_or_wc(c)
980 int c;
981{
982 char_u buf[2];
983
984 buf[0] = (char_u)c;
985 buf[1] = NUL;
986 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
987}
988
989/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 * return TRUE if 'c' is a printable character
991 * Assume characters above 0x100 are printable (multi-byte), except for
992 * Unicode.
993 */
994 int
995vim_isprintc(c)
996 int c;
997{
998#ifdef FEAT_MBYTE
999 if (enc_utf8 && c >= 0x100)
1000 return utf_printable(c);
1001#endif
1002 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1003}
1004
1005/*
1006 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
1007 * byte of a double-byte character.
1008 */
1009 int
1010vim_isprintc_strict(c)
1011 int c;
1012{
1013#ifdef FEAT_MBYTE
1014 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
1015 return FALSE;
1016 if (enc_utf8 && c >= 0x100)
1017 return utf_printable(c);
1018#endif
1019 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1020}
1021
1022/*
1023 * like chartabsize(), but also check for line breaks on the screen
1024 */
1025 int
Bram Moolenaar597a4222014-06-25 14:39:50 +02001026lbr_chartabsize(line, s, col)
Bram Moolenaara0485492014-07-16 23:39:54 +02001027 char_u *line UNUSED; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001028 unsigned char *s;
1029 colnr_T col;
1030{
1031#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001032 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001033 {
1034#endif
1035#ifdef FEAT_MBYTE
1036 if (curwin->w_p_wrap)
1037 return win_nolbr_chartabsize(curwin, s, col, NULL);
1038#endif
1039 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1040#ifdef FEAT_LINEBREAK
1041 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001042 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001043#endif
1044}
1045
1046/*
1047 * Call lbr_chartabsize() and advance the pointer.
1048 */
1049 int
Bram Moolenaar597a4222014-06-25 14:39:50 +02001050lbr_chartabsize_adv(line, s, col)
1051 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001052 char_u **s;
1053 colnr_T col;
1054{
1055 int retval;
1056
Bram Moolenaar597a4222014-06-25 14:39:50 +02001057 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001058 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 return retval;
1060}
1061
1062/*
1063 * This function is used very often, keep it fast!!!!
1064 *
1065 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1066 * string at start of line. Warning: *headp is only set if it's a non-zero
1067 * value, init to 0 before calling.
1068 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001069 int
Bram Moolenaar597a4222014-06-25 14:39:50 +02001070win_lbr_chartabsize(wp, line, s, col, headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001071 win_T *wp;
Bram Moolenaara0485492014-07-16 23:39:54 +02001072 char_u *line UNUSED; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001073 char_u *s;
1074 colnr_T col;
Bram Moolenaar0c094b92009-05-14 20:20:33 +00001075 int *headp UNUSED;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076{
1077#ifdef FEAT_LINEBREAK
1078 int c;
1079 int size;
1080 colnr_T col2;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001081 colnr_T col_adj = 0; /* col + screen size of tab */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001082 colnr_T colmax;
1083 int added;
1084# ifdef FEAT_MBYTE
1085 int mb_added = 0;
1086# else
1087# define mb_added 0
1088# endif
1089 int numberextra;
1090 char_u *ps;
1091 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001092 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001093
1094 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001095 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001096 */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001097 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001098#endif
1099 {
1100#ifdef FEAT_MBYTE
1101 if (wp->w_p_wrap)
1102 return win_nolbr_chartabsize(wp, s, col, headp);
1103#endif
1104 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1105 }
1106
1107#ifdef FEAT_LINEBREAK
1108 /*
1109 * First get normal size, without 'linebreak'
1110 */
1111 size = win_chartabsize(wp, s, col);
1112 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001113 if (tab_corr)
1114 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001115
1116 /*
1117 * If 'linebreak' set check at a blank before a non-blank if the line
1118 * needs a break here
1119 */
1120 if (wp->w_p_lbr
1121 && vim_isbreak(c)
1122 && !vim_isbreak(s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001123 && wp->w_p_wrap
1124# ifdef FEAT_VERTSPLIT
1125 && wp->w_width != 0
1126# endif
1127 )
1128 {
1129 /*
1130 * Count all characters from first non-blank after a blank up to next
1131 * non-blank after a blank.
1132 */
1133 numberextra = win_col_off(wp);
1134 col2 = col;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001135 colmax = (colnr_T)(W_WIDTH(wp) - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001136 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001137 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001138 colmax += col_adj;
1139 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001140 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001141 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001142 }
1143
Bram Moolenaar071d4272004-06-13 20:20:40 +00001144 for (;;)
1145 {
1146 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001147 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001148 c = *s;
1149 if (!(c != NUL
1150 && (vim_isbreak(c)
1151 || (!vim_isbreak(c)
1152 && (col2 == col || !vim_isbreak(*ps))))))
1153 break;
1154
1155 col2 += win_chartabsize(wp, s, col2);
1156 if (col2 >= colmax) /* doesn't fit */
1157 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001158 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001159 tab_corr = FALSE;
1160 break;
1161 }
1162 }
1163 }
1164# ifdef FEAT_MBYTE
1165 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1166 && wp->w_p_wrap && in_win_border(wp, col))
1167 {
1168 ++size; /* Count the ">" in the last column. */
1169 mb_added = 1;
1170 }
1171# endif
1172
1173 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001174 * May have to add something for 'breakindent' and/or 'showbreak'
1175 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001176 * Set *headp to the size of what we add.
1177 */
1178 added = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001179 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001180 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001181 colnr_T sbrlen = 0;
1182 int numberwidth = win_col_off(wp);
1183
1184 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001185 col += numberextra + mb_added;
1186 if (col >= (colnr_T)W_WIDTH(wp))
1187 {
1188 col -= W_WIDTH(wp);
1189 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001190 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001191 col %= numberextra;
Bram Moolenaar1c852102014-10-15 21:26:40 +02001192 if (*p_sbr != NUL)
1193 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001194 sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001195 if (col >= sbrlen)
1196 col -= sbrlen;
1197 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001198 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001199 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001200 else if (col > 0 && numberextra > 0)
1201 col += numberwidth - win_col_off2(wp);
1202
1203 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001204 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001205 if (col == 0 || col + size + sbrlen > (colnr_T)W_WIDTH(wp))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001207 added = 0;
1208 if (*p_sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001209 {
1210 if (size + sbrlen + numberwidth > (colnr_T)W_WIDTH(wp))
1211 {
1212 /* calculate effective window width */
1213 int width = (colnr_T)W_WIDTH(wp) - sbrlen - numberwidth;
1214 int prev_width = col ? ((colnr_T)W_WIDTH(wp) - (sbrlen + col)) : 0;
1215 if (width == 0)
1216 width = (colnr_T)W_WIDTH(wp);
1217 added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1218 if ((size - prev_width) % width)
1219 /* wrapped, add another length of 'sbr' */
1220 added += vim_strsize(p_sbr);
1221 }
1222 else
1223 added += vim_strsize(p_sbr);
1224 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001225 if (wp->w_p_bri)
1226 added += get_breakindent_win(wp, line);
1227
Bram Moolenaar95765082014-08-24 21:19:25 +02001228 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001229 if (col != 0)
1230 added = 0;
1231 }
1232 }
1233 if (headp != NULL)
1234 *headp = added + mb_added;
1235 return size;
1236#endif
1237}
1238
1239#if defined(FEAT_MBYTE) || defined(PROTO)
1240/*
1241 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1242 * 'wrap' is on. This means we need to check for a double-byte character that
1243 * doesn't fit at the end of the screen line.
1244 */
1245 static int
1246win_nolbr_chartabsize(wp, s, col, headp)
1247 win_T *wp;
1248 char_u *s;
1249 colnr_T col;
1250 int *headp;
1251{
1252 int n;
1253
1254 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1255 {
1256 n = wp->w_buffer->b_p_ts;
1257 return (int)(n - (col % n));
1258 }
1259 n = ptr2cells(s);
1260 /* Add one cell for a double-width character in the last column of the
1261 * window, displayed with a ">". */
1262 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1263 {
1264 if (headp != NULL)
1265 *headp = 1;
1266 return 3;
1267 }
1268 return n;
1269}
1270
1271/*
1272 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1273 * "wp".
1274 */
1275 int
1276in_win_border(wp, vcol)
1277 win_T *wp;
1278 colnr_T vcol;
1279{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001280 int width1; /* width of first line (after line number) */
1281 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001282
1283#ifdef FEAT_VERTSPLIT
1284 if (wp->w_width == 0) /* there is no border */
1285 return FALSE;
1286#endif
1287 width1 = W_WIDTH(wp) - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001288 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001289 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001290 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001291 return TRUE;
1292 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001293 if (width2 <= 0)
1294 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001295 return ((vcol - width1) % width2 == width2 - 1);
1296}
1297#endif /* FEAT_MBYTE */
1298
1299/*
1300 * Get virtual column number of pos.
1301 * start: on the first position of this character (TAB, ctrl)
1302 * cursor: where the cursor is on this character (first char, except for TAB)
1303 * end: on the last position of this character (TAB, ctrl)
1304 *
1305 * This is used very often, keep it fast!
1306 */
1307 void
1308getvcol(wp, pos, start, cursor, end)
1309 win_T *wp;
1310 pos_T *pos;
1311 colnr_T *start;
1312 colnr_T *cursor;
1313 colnr_T *end;
1314{
1315 colnr_T vcol;
1316 char_u *ptr; /* points to current char */
1317 char_u *posptr; /* points to char at pos->col */
Bram Moolenaar597a4222014-06-25 14:39:50 +02001318 char_u *line; /* start of the line */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319 int incr;
1320 int head;
1321 int ts = wp->w_buffer->b_p_ts;
1322 int c;
1323
1324 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001325 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001326 if (pos->col == MAXCOL)
1327 posptr = NULL; /* continue until the NUL */
1328 else
1329 posptr = ptr + pos->col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330
1331 /*
1332 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001333 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1334 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001335 * Also use this when 'list' is set but tabs take their normal size.
1336 */
1337 if ((!wp->w_p_list || lcs_tab1 != NUL)
1338#ifdef FEAT_LINEBREAK
Bram Moolenaar597a4222014-06-25 14:39:50 +02001339 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001340#endif
1341 )
1342 {
1343#ifndef FEAT_MBYTE
1344 head = 0;
1345#endif
1346 for (;;)
1347 {
1348#ifdef FEAT_MBYTE
1349 head = 0;
1350#endif
1351 c = *ptr;
1352 /* make sure we don't go past the end of the line */
1353 if (c == NUL)
1354 {
1355 incr = 1; /* NUL at end of line only takes one column */
1356 break;
1357 }
1358 /* A tab gets expanded, depending on the current column */
1359 if (c == TAB)
1360 incr = ts - (vcol % ts);
1361 else
1362 {
1363#ifdef FEAT_MBYTE
1364 if (has_mbyte)
1365 {
1366 /* For utf-8, if the byte is >= 0x80, need to look at
1367 * further bytes to find the cell width. */
1368 if (enc_utf8 && c >= 0x80)
1369 incr = utf_ptr2cells(ptr);
1370 else
1371 incr = CHARSIZE(c);
1372
1373 /* If a double-cell char doesn't fit at the end of a line
1374 * it wraps to the next line, it's like this char is three
1375 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001376 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1377 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001378 {
1379 ++incr;
1380 head = 1;
1381 }
1382 }
1383 else
1384#endif
1385 incr = CHARSIZE(c);
1386 }
1387
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001388 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001389 break;
1390
1391 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001392 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001393 }
1394 }
1395 else
1396 {
1397 for (;;)
1398 {
1399 /* A tab gets expanded, depending on the current column */
1400 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001401 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001402 /* make sure we don't go past the end of the line */
1403 if (*ptr == NUL)
1404 {
1405 incr = 1; /* NUL at end of line only takes one column */
1406 break;
1407 }
1408
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001409 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001410 break;
1411
1412 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001413 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001414 }
1415 }
1416 if (start != NULL)
1417 *start = vcol + head;
1418 if (end != NULL)
1419 *end = vcol + incr - 1;
1420 if (cursor != NULL)
1421 {
1422 if (*ptr == TAB
1423 && (State & NORMAL)
1424 && !wp->w_p_list
1425 && !virtual_active()
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001426 && !(VIsual_active && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001427 )
1428 *cursor = vcol + incr - 1; /* cursor at end */
1429 else
1430 *cursor = vcol + head; /* cursor at start */
1431 }
1432}
1433
1434/*
1435 * Get virtual cursor column in the current window, pretending 'list' is off.
1436 */
1437 colnr_T
1438getvcol_nolist(posp)
1439 pos_T *posp;
1440{
1441 int list_save = curwin->w_p_list;
1442 colnr_T vcol;
1443
1444 curwin->w_p_list = FALSE;
1445 getvcol(curwin, posp, NULL, &vcol, NULL);
1446 curwin->w_p_list = list_save;
1447 return vcol;
1448}
1449
1450#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1451/*
1452 * Get virtual column in virtual mode.
1453 */
1454 void
1455getvvcol(wp, pos, start, cursor, end)
1456 win_T *wp;
1457 pos_T *pos;
1458 colnr_T *start;
1459 colnr_T *cursor;
1460 colnr_T *end;
1461{
1462 colnr_T col;
1463 colnr_T coladd;
1464 colnr_T endadd;
1465# ifdef FEAT_MBYTE
1466 char_u *ptr;
1467# endif
1468
1469 if (virtual_active())
1470 {
1471 /* For virtual mode, only want one value */
1472 getvcol(wp, pos, &col, NULL, NULL);
1473
1474 coladd = pos->coladd;
1475 endadd = 0;
1476# ifdef FEAT_MBYTE
1477 /* Cannot put the cursor on part of a wide character. */
1478 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001479 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001480 {
1481 int c = (*mb_ptr2char)(ptr + pos->col);
1482
1483 if (c != TAB && vim_isprintc(c))
1484 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001485 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001486 if (coladd > endadd) /* past end of line */
1487 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001488 else
1489 coladd = 0;
1490 }
1491 }
1492# endif
1493 col += coladd;
1494 if (start != NULL)
1495 *start = col;
1496 if (cursor != NULL)
1497 *cursor = col;
1498 if (end != NULL)
1499 *end = col + endadd;
1500 }
1501 else
1502 getvcol(wp, pos, start, cursor, end);
1503}
1504#endif
1505
Bram Moolenaar071d4272004-06-13 20:20:40 +00001506/*
1507 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1508 * Used for Visual block mode.
1509 */
1510 void
1511getvcols(wp, pos1, pos2, left, right)
1512 win_T *wp;
1513 pos_T *pos1, *pos2;
1514 colnr_T *left, *right;
1515{
1516 colnr_T from1, from2, to1, to2;
1517
1518 if (ltp(pos1, pos2))
1519 {
1520 getvvcol(wp, pos1, &from1, NULL, &to1);
1521 getvvcol(wp, pos2, &from2, NULL, &to2);
1522 }
1523 else
1524 {
1525 getvvcol(wp, pos2, &from1, NULL, &to1);
1526 getvvcol(wp, pos1, &from2, NULL, &to2);
1527 }
1528 if (from2 < from1)
1529 *left = from2;
1530 else
1531 *left = from1;
1532 if (to2 > to1)
1533 {
1534 if (*p_sel == 'e' && from2 - 1 >= to1)
1535 *right = from2 - 1;
1536 else
1537 *right = to2;
1538 }
1539 else
1540 *right = to1;
1541}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001542
1543/*
1544 * skipwhite: skip over ' ' and '\t'.
1545 */
1546 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001547skipwhite(q)
1548 char_u *q;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001549{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001550 char_u *p = q;
1551
Bram Moolenaar071d4272004-06-13 20:20:40 +00001552 while (vim_iswhite(*p)) /* skip to next non-white */
1553 ++p;
1554 return p;
1555}
1556
1557/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001558 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001559 */
1560 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001561skipdigits(q)
1562 char_u *q;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001563{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001564 char_u *p = q;
1565
Bram Moolenaar071d4272004-06-13 20:20:40 +00001566 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1567 ++p;
1568 return p;
1569}
1570
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001571#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001572/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001573 * skip over binary digits
1574 */
1575 char_u *
1576skipbin(q)
1577 char_u *q;
1578{
1579 char_u *p = q;
1580
1581 while (vim_isbdigit(*p)) /* skip to next non-digit */
1582 ++p;
1583 return p;
1584}
1585
1586/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001587 * skip over digits and hex characters
1588 */
1589 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001590skiphex(q)
1591 char_u *q;
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001592{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001593 char_u *p = q;
1594
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001595 while (vim_isxdigit(*p)) /* skip to next non-digit */
1596 ++p;
1597 return p;
1598}
1599#endif
1600
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001601#if defined(FEAT_EX_EXTRA) || defined(PROTO)
1602/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001603 * skip to bin digit (or NUL after the string)
1604 */
1605 char_u *
1606skiptobin(q)
1607 char_u *q;
1608{
1609 char_u *p = q;
1610
1611 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */
1612 ++p;
1613 return p;
1614}
1615
1616/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001617 * skip to digit (or NUL after the string)
1618 */
1619 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001620skiptodigit(q)
1621 char_u *q;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001622{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001623 char_u *p = q;
1624
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001625 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1626 ++p;
1627 return p;
1628}
1629
1630/*
1631 * skip to hex character (or NUL after the string)
1632 */
1633 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001634skiptohex(q)
1635 char_u *q;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001636{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001637 char_u *p = q;
1638
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001639 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1640 ++p;
1641 return p;
1642}
1643#endif
1644
Bram Moolenaar071d4272004-06-13 20:20:40 +00001645/*
1646 * Variant of isdigit() that can handle characters > 0x100.
1647 * We don't use isdigit() here, because on some systems it also considers
1648 * superscript 1 to be a digit.
1649 * Use the VIM_ISDIGIT() macro for simple arguments.
1650 */
1651 int
1652vim_isdigit(c)
1653 int c;
1654{
1655 return (c >= '0' && c <= '9');
1656}
1657
1658/*
1659 * Variant of isxdigit() that can handle characters > 0x100.
1660 * We don't use isxdigit() here, because on some systems it also considers
1661 * superscript 1 to be a digit.
1662 */
1663 int
1664vim_isxdigit(c)
1665 int c;
1666{
1667 return (c >= '0' && c <= '9')
1668 || (c >= 'a' && c <= 'f')
1669 || (c >= 'A' && c <= 'F');
1670}
1671
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001672/*
1673 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1674 * characters > 0x100.
1675 */
1676 int
1677vim_isbdigit(c)
1678 int c;
1679{
1680 return (c == '0' || c == '1');
1681}
1682
Bram Moolenaar78622822005-08-23 21:00:13 +00001683#if defined(FEAT_MBYTE) || defined(PROTO)
1684/*
1685 * Vim's own character class functions. These exist because many library
1686 * islower()/toupper() etc. do not work properly: they crash when used with
1687 * invalid values or can't handle latin1 when the locale is C.
1688 * Speed is most important here.
1689 */
1690#define LATIN1LOWER 'l'
1691#define LATIN1UPPER 'U'
1692
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001693static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001694static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1695static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001696
1697 int
1698vim_islower(c)
1699 int c;
1700{
1701 if (c <= '@')
1702 return FALSE;
1703 if (c >= 0x80)
1704 {
1705 if (enc_utf8)
1706 return utf_islower(c);
1707 if (c >= 0x100)
1708 {
1709#ifdef HAVE_ISWLOWER
1710 if (has_mbyte)
1711 return iswlower(c);
1712#endif
1713 /* islower() can't handle these chars and may crash */
1714 return FALSE;
1715 }
1716 if (enc_latin1like)
1717 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1718 }
1719 return islower(c);
1720}
1721
1722 int
1723vim_isupper(c)
1724 int c;
1725{
1726 if (c <= '@')
1727 return FALSE;
1728 if (c >= 0x80)
1729 {
1730 if (enc_utf8)
1731 return utf_isupper(c);
1732 if (c >= 0x100)
1733 {
1734#ifdef HAVE_ISWUPPER
1735 if (has_mbyte)
1736 return iswupper(c);
1737#endif
1738 /* islower() can't handle these chars and may crash */
1739 return FALSE;
1740 }
1741 if (enc_latin1like)
1742 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1743 }
1744 return isupper(c);
1745}
1746
1747 int
1748vim_toupper(c)
1749 int c;
1750{
1751 if (c <= '@')
1752 return c;
1753 if (c >= 0x80)
1754 {
1755 if (enc_utf8)
1756 return utf_toupper(c);
1757 if (c >= 0x100)
1758 {
1759#ifdef HAVE_TOWUPPER
1760 if (has_mbyte)
1761 return towupper(c);
1762#endif
1763 /* toupper() can't handle these chars and may crash */
1764 return c;
1765 }
1766 if (enc_latin1like)
1767 return latin1upper[c];
1768 }
1769 return TOUPPER_LOC(c);
1770}
1771
1772 int
1773vim_tolower(c)
1774 int c;
1775{
1776 if (c <= '@')
1777 return c;
1778 if (c >= 0x80)
1779 {
1780 if (enc_utf8)
1781 return utf_tolower(c);
1782 if (c >= 0x100)
1783 {
1784#ifdef HAVE_TOWLOWER
1785 if (has_mbyte)
1786 return towlower(c);
1787#endif
1788 /* tolower() can't handle these chars and may crash */
1789 return c;
1790 }
1791 if (enc_latin1like)
1792 return latin1lower[c];
1793 }
1794 return TOLOWER_LOC(c);
1795}
1796#endif
1797
Bram Moolenaar071d4272004-06-13 20:20:40 +00001798/*
1799 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1800 */
1801 char_u *
1802skiptowhite(p)
1803 char_u *p;
1804{
1805 while (*p != ' ' && *p != '\t' && *p != NUL)
1806 ++p;
1807 return p;
1808}
1809
1810#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1811 || defined(PROTO)
1812/*
1813 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1814 */
1815 char_u *
1816skiptowhite_esc(p)
1817 char_u *p;
1818{
1819 while (*p != ' ' && *p != '\t' && *p != NUL)
1820 {
1821 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1822 ++p;
1823 ++p;
1824 }
1825 return p;
1826}
1827#endif
1828
1829/*
1830 * Getdigits: Get a number from a string and skip over it.
1831 * Note: the argument is a pointer to a char_u pointer!
1832 */
1833 long
1834getdigits(pp)
1835 char_u **pp;
1836{
1837 char_u *p;
1838 long retval;
1839
1840 p = *pp;
1841 retval = atol((char *)p);
1842 if (*p == '-') /* skip negative sign */
1843 ++p;
1844 p = skipdigits(p); /* skip to next non-digit */
1845 *pp = p;
1846 return retval;
1847}
1848
1849/*
1850 * Return TRUE if "lbuf" is empty or only contains blanks.
1851 */
1852 int
1853vim_isblankline(lbuf)
1854 char_u *lbuf;
1855{
1856 char_u *p;
1857
1858 p = skipwhite(lbuf);
1859 return (*p == NUL || *p == '\r' || *p == '\n');
1860}
1861
1862/*
1863 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001864 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1865 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001866 * 0 decimal
1867 * '0' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001868 * 'B' bin
1869 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001870 * 'X' hex
1871 * 'x' hex
1872 * If "len" is not NULL, the length of the number in characters is returned.
1873 * If "nptr" is not NULL, the signed result is returned in it.
1874 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001875 * If "what" contains STR2NR_BIN recognize binary numbers
1876 * If "what" contains STR2NR_OCT recognize octal numbers
1877 * If "what" contains STR2NR_HEX recognize hex numbers
1878 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001879 * If maxlen > 0, check at a maximum maxlen chars
Bram Moolenaar071d4272004-06-13 20:20:40 +00001880 */
1881 void
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001882vim_str2nr(start, prep, len, what, nptr, unptr, maxlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001883 char_u *start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001884 int *prep; /* return: type of number 0 = decimal, 'x'
1885 or 'X' is hex, '0' = octal, 'b' or 'B'
1886 is bin */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001887 int *len; /* return: detected length of number */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001888 int what; /* what numbers to recognize */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001889 long *nptr; /* return: signed result */
1890 unsigned long *unptr; /* return: unsigned result */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001891 int maxlen; /* max length of string to check */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001892{
1893 char_u *ptr = start;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001894 int pre = 0; /* default is decimal */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001895 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001896 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001897 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001898
1899 if (ptr[0] == '-')
1900 {
1901 negative = TRUE;
1902 ++ptr;
1903 }
1904
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001905 /* Recognize hex, octal, and bin. */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001906 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1907 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001908 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001909 pre = ptr[1];
1910 if ((what & STR2NR_HEX)
1911 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1912 && (maxlen == 0 || maxlen > 2))
1913 /* hexadecimal */
1914 ptr += 2;
1915 else if ((what & STR2NR_BIN)
1916 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1917 && (maxlen == 0 || maxlen > 2))
1918 /* binary */
1919 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001920 else
1921 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001922 /* decimal or octal, default is decimal */
1923 pre = 0;
1924 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001925 {
1926 /* Don't interpret "0", "08" or "0129" as octal. */
1927 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1928 {
1929 if (ptr[n] > '7')
1930 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001931 pre = 0; /* can't be octal */
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001932 break;
1933 }
Bram Moolenaar06af6022012-01-26 13:40:08 +01001934 if (ptr[n] >= '0')
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001935 pre = '0'; /* assume octal */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001936 if (n == maxlen)
1937 break;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001938 }
1939 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001940 }
1941 }
1942
1943 /*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001944 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1945 */
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001946 n = 1;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001947 if (pre == 'B' || pre == 'b' || what == STR2NR_BIN + STR2NR_FORCE)
1948 {
1949 /* bin */
1950 if (pre != 0)
1951 n += 2; /* skip over "0b" */
1952 while ('0' <= *ptr && *ptr <= '1')
1953 {
1954 un = 2 * un + (unsigned long)(*ptr - '0');
1955 ++ptr;
1956 if (n++ == maxlen)
1957 break;
1958 }
1959 }
1960 else if (pre == '0' || what == STR2NR_OCT + STR2NR_FORCE)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001961 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001962 /* octal */
1963 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001964 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001965 un = 8 * un + (unsigned long)(*ptr - '0');
1966 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001967 if (n++ == maxlen)
1968 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001969 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001970 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001971 else if (pre != 0 || what == STR2NR_HEX + STR2NR_FORCE)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001972 {
1973 /* hex */
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001974 if (pre != 0)
Bram Moolenaar5adfea12015-09-01 18:51:39 +02001975 n += 2; /* skip over "0x" */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001976 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001977 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001978 un = 16 * un + (unsigned long)hex2nr(*ptr);
1979 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001980 if (n++ == maxlen)
1981 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001982 }
1983 }
1984 else
1985 {
1986 /* decimal */
1987 while (VIM_ISDIGIT(*ptr))
1988 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001989 un = 10 * un + (unsigned long)(*ptr - '0');
1990 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001991 if (n++ == maxlen)
1992 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001993 }
1994 }
1995
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001996 if (prep != NULL)
1997 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001998 if (len != NULL)
1999 *len = (int)(ptr - start);
2000 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002001 {
2002 if (negative) /* account for leading '-' for decimal numbers */
2003 *nptr = -(long)un;
2004 else
2005 *nptr = (long)un;
2006 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002007 if (unptr != NULL)
2008 *unptr = un;
2009}
2010
2011/*
2012 * Return the value of a single hex character.
2013 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2014 */
2015 int
2016hex2nr(c)
2017 int c;
2018{
2019 if (c >= 'a' && c <= 'f')
2020 return c - 'a' + 10;
2021 if (c >= 'A' && c <= 'F')
2022 return c - 'A' + 10;
2023 return c - '0';
2024}
2025
2026#if defined(FEAT_TERMRESPONSE) \
2027 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
2028/*
2029 * Convert two hex characters to a byte.
2030 * Return -1 if one of the characters is not hex.
2031 */
2032 int
2033hexhex2nr(p)
2034 char_u *p;
2035{
2036 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2037 return -1;
2038 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2039}
2040#endif
2041
2042/*
2043 * Return TRUE if "str" starts with a backslash that should be removed.
2044 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
2045 * backslash is not a normal file name character.
2046 * '$' is a valid file name character, we don't remove the backslash before
2047 * it. This means it is not possible to use an environment variable after a
2048 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2049 * Although "\ name" is valid, the backslash in "Program\ files" must be
2050 * removed. Assume a file name doesn't start with a space.
2051 * For multi-byte names, never remove a backslash before a non-ascii
2052 * character, assume that all multi-byte characters are valid file name
2053 * characters.
2054 */
2055 int
2056rem_backslash(str)
2057 char_u *str;
2058{
2059#ifdef BACKSLASH_IN_FILENAME
2060 return (str[0] == '\\'
2061# ifdef FEAT_MBYTE
2062 && str[1] < 0x80
2063# endif
2064 && (str[1] == ' '
2065 || (str[1] != NUL
2066 && str[1] != '*'
2067 && str[1] != '?'
2068 && !vim_isfilec(str[1]))));
2069#else
2070 return (str[0] == '\\' && str[1] != NUL);
2071#endif
2072}
2073
2074/*
2075 * Halve the number of backslashes in a file name argument.
2076 * For MS-DOS we only do this if the character after the backslash
2077 * is not a normal file character.
2078 */
2079 void
2080backslash_halve(p)
2081 char_u *p;
2082{
2083 for ( ; *p; ++p)
2084 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002085 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002086}
2087
2088/*
2089 * backslash_halve() plus save the result in allocated memory.
2090 */
2091 char_u *
2092backslash_halve_save(p)
2093 char_u *p;
2094{
2095 char_u *res;
2096
2097 res = vim_strsave(p);
2098 if (res == NULL)
2099 return p;
2100 backslash_halve(res);
2101 return res;
2102}
2103
2104#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2105/*
2106 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2107 * The first 64 entries have been added to map control characters defined in
2108 * ascii.h
2109 */
2110static char_u ebcdic2ascii_tab[256] =
2111{
2112 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2113 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2114 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2115 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2116 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2117 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2118 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2119 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2120 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2121 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2122 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2123 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2124 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2125 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2126 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2127 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2128 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2129 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2130 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2131 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2132 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2133 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2134 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2135 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2136 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2137 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2138 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2139 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2140 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2141 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2142 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2143 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2144};
2145
2146/*
2147 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2148 * wanting 7-bit ASCII characters out the other end.
2149 */
2150 void
2151ebcdic2ascii(buffer, len)
2152 char_u *buffer;
2153 int len;
2154{
2155 int i;
2156
2157 for (i = 0; i < len; i++)
2158 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2159}
2160#endif