blob: be908ed5d6d9cc41b640c37e1e6df2f2e5295825 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
17static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18#endif
19
20static int nr2hex __ARGS((int c));
21
22static int chartab_initialized = FALSE;
23
24/* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
29
30/*
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
33 *
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
36 *
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
43 *
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
52 *
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
55 */
56 int
57init_chartab()
58{
59 return buf_init_chartab(curbuf, TRUE);
60}
61
62 int
63buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
66{
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
73
74 if (global)
75 {
76 /*
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
80 *
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
83 */
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87#ifdef EBCDIC
88 while (c < 255)
89#else
90 while (c <= '~')
91#endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93#ifdef FEAT_FKMAP
94 if (p_altkeymap)
95 {
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
98 }
99#endif
100 while (c < 256)
101 {
102#ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113#endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
116 }
117
118#ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125#endif
126 }
127
128 /*
129 * Init word char flags all to FALSE
130 */
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132#ifdef FEAT_MBYTE
133 for (c = 0; c < 256; ++c)
134 {
135 /* double-byte characters are probably word characters */
136 if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
137 SET_CHARTAB(buf, c);
138 }
139#endif
140
141#ifdef FEAT_LISP
142 /*
143 * In lisp mode the '-' character is included in keywords.
144 */
145 if (buf->b_p_lisp)
146 SET_CHARTAB(buf, '-');
147#endif
148
149 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
150 * options Each option is a list of characters, character numbers or
151 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
152 */
153 for (i = global ? 0 : 3; i <= 3; ++i)
154 {
155 if (i == 0)
156 p = p_isi; /* first round: 'isident' */
157 else if (i == 1)
158 p = p_isp; /* second round: 'isprint' */
159 else if (i == 2)
160 p = p_isf; /* third round: 'isfname' */
161 else /* i == 3 */
162 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
163
164 while (*p)
165 {
166 tilde = FALSE;
167 do_isalpha = FALSE;
168 if (*p == '^' && p[1] != NUL)
169 {
170 tilde = TRUE;
171 ++p;
172 }
173 if (VIM_ISDIGIT(*p))
174 c = getdigits(&p);
175 else
176 c = *p++;
177 c2 = -1;
178 if (*p == '-' && p[1] != NUL)
179 {
180 ++p;
181 if (VIM_ISDIGIT(*p))
182 c2 = getdigits(&p);
183 else
184 c2 = *p++;
185 }
186 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
187 || !(*p == NUL || *p == ','))
188 return FAIL;
189
190 if (c2 == -1) /* not a range */
191 {
192 /*
193 * A single '@' (not "@-@"):
194 * Decide on letters being ID/printable/keyword chars with
195 * standard function isalpha(). This takes care of locale for
196 * single-byte characters).
197 */
198 if (c == '@')
199 {
200 do_isalpha = TRUE;
201 c = 1;
202 c2 = 255;
203 }
204 else
205 c2 = c;
206 }
207 while (c <= c2)
208 {
209 if (!do_isalpha || isalpha(c)
210#ifdef FEAT_FKMAP
211 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
212#endif
213 )
214 {
215 if (i == 0) /* (re)set ID flag */
216 {
217 if (tilde)
218 chartab[c] &= ~CT_ID_CHAR;
219 else
220 chartab[c] |= CT_ID_CHAR;
221 }
222 else if (i == 1) /* (re)set printable */
223 {
224 if ((c < ' '
225#ifndef EBCDIC
226 || c > '~'
227#endif
228#ifdef FEAT_FKMAP
229 || (p_altkeymap
230 && (F_isalpha(c) || F_isdigit(c)))
231#endif
232 )
233#ifdef FEAT_MBYTE
234 /* For double-byte we keep the cell width, so
235 * that we can detect it from the first byte. */
236 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
237#endif
238 )
239 {
240 if (tilde)
241 {
242 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
243 + ((dy_flags & DY_UHEX) ? 4 : 2);
244 chartab[c] &= ~CT_PRINT_CHAR;
245 }
246 else
247 {
248 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
249 chartab[c] |= CT_PRINT_CHAR;
250 }
251 }
252 }
253 else if (i == 2) /* (re)set fname flag */
254 {
255 if (tilde)
256 chartab[c] &= ~CT_FNAME_CHAR;
257 else
258 chartab[c] |= CT_FNAME_CHAR;
259 }
260 else /* i == 3 */ /* (re)set keyword flag */
261 {
262 if (tilde)
263 RESET_CHARTAB(buf, c);
264 else
265 SET_CHARTAB(buf, c);
266 }
267 }
268 ++c;
269 }
270 p = skip_to_option_part(p);
271 }
272 }
273 chartab_initialized = TRUE;
274 return OK;
275}
276
277/*
278 * Translate any special characters in buf[bufsize] in-place.
279 * The result is a string with only printable characters, but if there is not
280 * enough room, not all characters will be translated.
281 */
282 void
283trans_characters(buf, bufsize)
284 char_u *buf;
285 int bufsize;
286{
287 int len; /* length of string needing translation */
288 int room; /* room in buffer after string */
289 char_u *trs; /* translated character */
290 int trs_len; /* length of trs[] */
291
292 len = (int)STRLEN(buf);
293 room = bufsize - len;
294 while (*buf != 0)
295 {
296# ifdef FEAT_MBYTE
297 /* Assume a multi-byte character doesn't need translation. */
298 if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
299 len -= trs_len;
300 else
301# endif
302 {
303 trs = transchar_byte(*buf);
304 trs_len = (int)STRLEN(trs);
305 if (trs_len > 1)
306 {
307 room -= trs_len - 1;
308 if (room <= 0)
309 return;
310 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
311 }
312 mch_memmove(buf, trs, (size_t)trs_len);
313 --len;
314 }
315 buf += trs_len;
316 }
317}
318
319#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(PROTO)
320/*
321 * Translate a string into allocated memory, replacing special chars with
322 * printable chars. Returns NULL when out of memory.
323 */
324 char_u *
325transstr(s)
326 char_u *s;
327{
328 char_u *res;
329 char_u *p;
330#ifdef FEAT_MBYTE
331 int l, len, c;
332 char_u hexbuf[11];
333#endif
334
335#ifdef FEAT_MBYTE
336 if (has_mbyte)
337 {
338 /* Compute the length of the result, taking account of unprintable
339 * multi-byte characters. */
340 len = 0;
341 p = s;
342 while (*p != NUL)
343 {
344 if ((l = (*mb_ptr2len_check)(p)) > 1)
345 {
346 c = (*mb_ptr2char)(p);
347 p += l;
348 if (vim_isprintc(c))
349 len += l;
350 else
351 {
352 transchar_hex(hexbuf, c);
353 len += STRLEN(hexbuf);
354 }
355 }
356 else
357 {
358 l = byte2cells(*p++);
359 if (l > 0)
360 len += l;
361 else
362 len += 4; /* illegal byte sequence */
363 }
364 }
365 res = alloc((unsigned)(len + 1));
366 }
367 else
368#endif
369 res = alloc((unsigned)(vim_strsize(s) + 1));
370 if (res != NULL)
371 {
372 *res = NUL;
373 p = s;
374 while (*p != NUL)
375 {
376#ifdef FEAT_MBYTE
377 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
378 {
379 c = (*mb_ptr2char)(p);
380 if (vim_isprintc(c))
381 STRNCAT(res, p, l); /* append printable multi-byte char */
382 else
383 transchar_hex(res + STRLEN(res), c);
384 p += l;
385 }
386 else
387#endif
388 STRCAT(res, transchar_byte(*p++));
389 }
390 }
391 return res;
392}
393#endif
394
395#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
396/*
397 * Convert the string "p[len]" to do ignore-case comparing. Uses the current
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000398 * locale.
399 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
400 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000401 */
402 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000403str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000405 int orglen;
406 char_u *buf;
407 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000408{
409 garray_T ga;
410 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000411 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000412
413#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
414#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000415#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
416#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000418 /* Copy "str" into "buf" or allocated memory, unmodified. */
419 if (buf == NULL)
420 {
421 ga_init2(&ga, 1, 10);
422 if (ga_grow(&ga, len + 1) == FAIL)
423 return NULL;
424 mch_memmove(ga.ga_data, str, (size_t)len);
425 ga.ga_len = len;
426 }
427 else
428 {
429 if (len >= buflen) /* Ugly! */
430 len = buflen - 1;
431 mch_memmove(buf, str, (size_t)len);
432 }
433 if (buf == NULL)
434 GA_CHAR(len) = NUL;
435 else
436 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000437
438 /* Make each character lower case. */
439 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000440 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441 {
442#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000443 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000444 {
445 if (enc_utf8)
446 {
447 int c, lc;
448
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000449 c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000450 lc = utf_tolower(c);
451 if (c != lc)
452 {
453 int ol = utf_char2len(c);
454 int nl = utf_char2len(lc);
455
456 /* If the byte length changes need to shift the following
457 * characters forward or backward. */
458 if (ol != nl)
459 {
460 if (nl > ol)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000461 {
462 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
463 : len + nl - ol >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000464 {
465 /* out of memory, keep old char */
466 lc = c;
467 nl = ol;
468 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000469 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000470 if (ol != nl)
471 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000472 if (buf == NULL)
473 {
474 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000476 ga.ga_len += nl - ol;
477 }
478 else
479 {
480 mch_memmove(buf + i + nl, buf + i + ol,
481 STRLEN(buf + i + ol) + 1);
482 len += nl - ol;
483 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000484 }
485 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000486 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000487 }
488 }
489 /* skip to next multi-byte char */
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000490 i += (*mb_ptr2len_check)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000491 }
492 else
493#endif
494 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000495 if (buf == NULL)
496 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
497 else
498 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000499 ++i;
500 }
501 }
502
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000503 if (buf == NULL)
504 return (char_u *)ga.ga_data;
505 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506}
507#endif
508
509/*
510 * Catch 22: chartab[] can't be initialized before the options are
511 * initialized, and initializing options may cause transchar() to be called!
512 * When chartab_initialized == FALSE don't use chartab[].
513 * Does NOT work for multi-byte characters, c must be <= 255.
514 * Also doesn't work for the first byte of a multi-byte, "c" must be a
515 * character!
516 */
517static char_u transchar_buf[7];
518
519 char_u *
520transchar(c)
521 int c;
522{
523 int i;
524
525 i = 0;
526 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
527 {
528 transchar_buf[0] = '~';
529 transchar_buf[1] = '@';
530 i = 2;
531 c = K_SECOND(c);
532 }
533
534 if ((!chartab_initialized && (
535#ifdef EBCDIC
536 (c >= 64 && c < 255)
537#else
538 (c >= ' ' && c <= '~')
539#endif
540#ifdef FEAT_FKMAP
541 || F_ischar(c)
542#endif
543 )) || (c < 256 && vim_isprintc_strict(c)))
544 {
545 /* printable character */
546 transchar_buf[i] = c;
547 transchar_buf[i + 1] = NUL;
548 }
549 else
550 transchar_nonprint(transchar_buf + i, c);
551 return transchar_buf;
552}
553
554#if defined(FEAT_MBYTE) || defined(PROTO)
555/*
556 * Like transchar(), but called with a byte instead of a character. Checks
557 * for an illegal UTF-8 byte.
558 */
559 char_u *
560transchar_byte(c)
561 int c;
562{
563 if (enc_utf8 && c >= 0x80)
564 {
565 transchar_nonprint(transchar_buf, c);
566 return transchar_buf;
567 }
568 return transchar(c);
569}
570#endif
571
572/*
573 * Convert non-printable character to two or more printable characters in
574 * "buf[]". "buf" needs to be able to hold five bytes.
575 * Does NOT work for multi-byte characters, c must be <= 255.
576 */
577 void
578transchar_nonprint(buf, c)
579 char_u *buf;
580 int c;
581{
582 if (c == NL)
583 c = NUL; /* we use newline in place of a NUL */
584 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
585 c = NL; /* we use CR in place of NL in this case */
586
587 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
588 transchar_hex(buf, c);
589
590#ifdef EBCDIC
591 /* For EBCDIC only the characters 0-63 and 255 are not printable */
592 else if (CtrlChar(c) != 0 || c == DEL)
593#else
594 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
595#endif
596 {
597 buf[0] = '^';
598#ifdef EBCDIC
599 if (c == DEL)
600 buf[1] = '?'; /* DEL displayed as ^? */
601 else
602 buf[1] = CtrlChar(c);
603#else
604 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
605#endif
606
607 buf[2] = NUL;
608 }
609#ifdef FEAT_MBYTE
610 else if (enc_utf8 && c >= 0x80)
611 {
612 transchar_hex(buf, c);
613 }
614#endif
615#ifndef EBCDIC
616 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
617 {
618 buf[0] = '|';
619 buf[1] = c - 0x80;
620 buf[2] = NUL;
621 }
622#else
623 else if (c < 64)
624 {
625 buf[0] = '~';
626 buf[1] = MetaChar(c);
627 buf[2] = NUL;
628 }
629#endif
630 else /* 0x80 - 0x9f and 0xff */
631 {
632 /*
633 * TODO: EBCDIC I don't know what to do with this chars, so I display
634 * them as '~?' for now
635 */
636 buf[0] = '~';
637#ifdef EBCDIC
638 buf[1] = '?'; /* 0xff displayed as ~? */
639#else
640 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
641#endif
642 buf[2] = NUL;
643 }
644}
645
646 void
647transchar_hex(buf, c)
648 char_u *buf;
649 int c;
650{
651 int i = 0;
652
653 buf[0] = '<';
654#ifdef FEAT_MBYTE
655 if (c > 255)
656 {
657 buf[++i] = nr2hex((unsigned)c >> 12);
658 buf[++i] = nr2hex((unsigned)c >> 8);
659 }
660#endif
661 buf[++i] = nr2hex((unsigned)c >> 4);
662 buf[++i] = nr2hex(c);
663 buf[++i] = '>';
664 buf[++i] = NUL;
665}
666
667/*
668 * Convert the lower 4 bits of byte "c" to its hex character.
669 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
670 * function key 1.
671 */
672 static int
673nr2hex(c)
674 int c;
675{
676 if ((c & 0xf) <= 9)
677 return (c & 0xf) + '0';
678 return (c & 0xf) - 10 + 'a';
679}
680
681/*
682 * Return number of display cells occupied by byte "b".
683 * Caller must make sure 0 <= b <= 255.
684 * For multi-byte mode "b" must be the first byte of a character.
685 * A TAB is counted as two cells: "^I".
686 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
687 * cells depends on further bytes.
688 */
689 int
690byte2cells(b)
691 int b;
692{
693#ifdef FEAT_MBYTE
694 if (enc_utf8 && b >= 0x80)
695 return 0;
696#endif
697 return (chartab[b] & CT_CELL_MASK);
698}
699
700/*
701 * Return number of display cells occupied by character "c".
702 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
703 * A TAB is counted as two cells: "^I" or four: "<09>".
704 */
705 int
706char2cells(c)
707 int c;
708{
709 if (IS_SPECIAL(c))
710 return char2cells(K_SECOND(c)) + 2;
711#ifdef FEAT_MBYTE
712 if (c >= 0x80)
713 {
714 /* UTF-8: above 0x80 need to check the value */
715 if (enc_utf8)
716 return utf_char2cells(c);
717 /* DBCS: double-byte means double-width, except for euc-jp with first
718 * byte 0x8e */
719 if (enc_dbcs != 0 && c >= 0x100)
720 {
721 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
722 return 1;
723 return 2;
724 }
725 }
726#endif
727 return (chartab[c & 0xff] & CT_CELL_MASK);
728}
729
730/*
731 * Return number of display cells occupied by character at "*p".
732 * A TAB is counted as two cells: "^I" or four: "<09>".
733 */
734 int
735ptr2cells(p)
736 char_u *p;
737{
738#ifdef FEAT_MBYTE
739 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
740 if (enc_utf8 && *p >= 0x80)
741 return utf_ptr2cells(p);
742 /* For DBCS we can tell the cell count from the first byte. */
743#endif
744 return (chartab[*p] & CT_CELL_MASK);
745}
746
747/*
748 * Return the number of characters string "s" will take on the screen,
749 * counting TABs as two characters: "^I".
750 */
751 int
752vim_strsize(s)
753 char_u *s;
754{
755 return vim_strnsize(s, (int)MAXCOL);
756}
757
758/*
759 * Return the number of characters string "s[len]" will take on the screen,
760 * counting TABs as two characters: "^I".
761 */
762 int
763vim_strnsize(s, len)
764 char_u *s;
765 int len;
766{
767 int size = 0;
768
769 while (*s != NUL && --len >= 0)
770 {
771#ifdef FEAT_MBYTE
772 if (has_mbyte)
773 {
774 int l = (*mb_ptr2len_check)(s);
775
776 size += ptr2cells(s);
777 s += l;
778 len -= l - 1;
779 }
780 else
781#endif
782 size += byte2cells(*s++);
783 }
784 return size;
785}
786
787/*
788 * Return the number of characters 'c' will take on the screen, taking
789 * into account the size of a tab.
790 * Use a define to make it fast, this is used very often!!!
791 * Also see getvcol() below.
792 */
793
794#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
795 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
796 { \
797 int ts; \
798 ts = (buf)->b_p_ts; \
799 return (int)(ts - (col % ts)); \
800 } \
801 else \
802 return ptr2cells(p);
803
804#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
805 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
806 int
807chartabsize(p, col)
808 char_u *p;
809 colnr_T col;
810{
811 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
812}
813#endif
814
815#ifdef FEAT_LINEBREAK
816 static int
817win_chartabsize(wp, p, col)
818 win_T *wp;
819 char_u *p;
820 colnr_T col;
821{
822 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
823}
824#endif
825
826/*
827 * return the number of characters the string 's' will take on the screen,
828 * taking into account the size of a tab
829 */
830 int
831linetabsize(s)
832 char_u *s;
833{
834 colnr_T col = 0;
835
836 while (*s != NUL)
837 col += lbr_chartabsize_adv(&s, col);
838 return (int)col;
839}
840
841/*
842 * Like linetabsize(), but for a given window instead of the current one.
843 */
844 int
845win_linetabsize(wp, p, len)
846 win_T *wp;
847 char_u *p;
848 colnr_T len;
849{
850 colnr_T col = 0;
851 char_u *s;
852
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000853 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000854 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 return (int)col;
856}
857
858/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000859 * Return TRUE if 'c' is a normal identifier character:
860 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000861 */
862 int
863vim_isIDc(c)
864 int c;
865{
866 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
867}
868
869/*
870 * return TRUE if 'c' is a keyword character: Letters and characters from
871 * 'iskeyword' option for current buffer.
872 * For multi-byte characters mb_get_class() is used (builtin rules).
873 */
874 int
875vim_iswordc(c)
876 int c;
877{
878#ifdef FEAT_MBYTE
879 if (c >= 0x100)
880 {
881 if (enc_dbcs != 0)
882 return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
883 if (enc_utf8)
884 return utf_class(c) >= 2;
885 }
886#endif
887 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
888}
889
890/*
891 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
892 */
893 int
894vim_iswordp(p)
895 char_u *p;
896{
897#ifdef FEAT_MBYTE
898 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
899 return mb_get_class(p) >= 2;
900#endif
901 return GET_CHARTAB(curbuf, *p) != 0;
902}
903
904#if defined(FEAT_SYN_HL) || defined(PROTO)
905 int
906vim_iswordc_buf(p, buf)
907 char_u *p;
908 buf_T *buf;
909{
910# ifdef FEAT_MBYTE
911 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
912 return mb_get_class(p) >= 2;
913# endif
914 return (GET_CHARTAB(buf, *p) != 0);
915}
916#endif
917
918/*
919 * return TRUE if 'c' is a valid file-name character
920 * Assume characters above 0x100 are valid (multi-byte).
921 */
922 int
923vim_isfilec(c)
924 int c;
925{
926 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
927}
928
929/*
930 * return TRUE if 'c' is a printable character
931 * Assume characters above 0x100 are printable (multi-byte), except for
932 * Unicode.
933 */
934 int
935vim_isprintc(c)
936 int c;
937{
938#ifdef FEAT_MBYTE
939 if (enc_utf8 && c >= 0x100)
940 return utf_printable(c);
941#endif
942 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
943}
944
945/*
946 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
947 * byte of a double-byte character.
948 */
949 int
950vim_isprintc_strict(c)
951 int c;
952{
953#ifdef FEAT_MBYTE
954 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
955 return FALSE;
956 if (enc_utf8 && c >= 0x100)
957 return utf_printable(c);
958#endif
959 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
960}
961
962/*
963 * like chartabsize(), but also check for line breaks on the screen
964 */
965 int
966lbr_chartabsize(s, col)
967 unsigned char *s;
968 colnr_T col;
969{
970#ifdef FEAT_LINEBREAK
971 if (!curwin->w_p_lbr && *p_sbr == NUL)
972 {
973#endif
974#ifdef FEAT_MBYTE
975 if (curwin->w_p_wrap)
976 return win_nolbr_chartabsize(curwin, s, col, NULL);
977#endif
978 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
979#ifdef FEAT_LINEBREAK
980 }
981 return win_lbr_chartabsize(curwin, s, col, NULL);
982#endif
983}
984
985/*
986 * Call lbr_chartabsize() and advance the pointer.
987 */
988 int
989lbr_chartabsize_adv(s, col)
990 char_u **s;
991 colnr_T col;
992{
993 int retval;
994
995 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +0000996 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997 return retval;
998}
999
1000/*
1001 * This function is used very often, keep it fast!!!!
1002 *
1003 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1004 * string at start of line. Warning: *headp is only set if it's a non-zero
1005 * value, init to 0 before calling.
1006 */
1007/*ARGSUSED*/
1008 int
1009win_lbr_chartabsize(wp, s, col, headp)
1010 win_T *wp;
1011 char_u *s;
1012 colnr_T col;
1013 int *headp;
1014{
1015#ifdef FEAT_LINEBREAK
1016 int c;
1017 int size;
1018 colnr_T col2;
1019 colnr_T colmax;
1020 int added;
1021# ifdef FEAT_MBYTE
1022 int mb_added = 0;
1023# else
1024# define mb_added 0
1025# endif
1026 int numberextra;
1027 char_u *ps;
1028 int tab_corr = (*s == TAB);
1029
1030 /*
1031 * No 'linebreak' and 'showbreak': return quickly.
1032 */
1033 if (!wp->w_p_lbr && *p_sbr == NUL)
1034#endif
1035 {
1036#ifdef FEAT_MBYTE
1037 if (wp->w_p_wrap)
1038 return win_nolbr_chartabsize(wp, s, col, headp);
1039#endif
1040 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1041 }
1042
1043#ifdef FEAT_LINEBREAK
1044 /*
1045 * First get normal size, without 'linebreak'
1046 */
1047 size = win_chartabsize(wp, s, col);
1048 c = *s;
1049
1050 /*
1051 * If 'linebreak' set check at a blank before a non-blank if the line
1052 * needs a break here
1053 */
1054 if (wp->w_p_lbr
1055 && vim_isbreak(c)
1056 && !vim_isbreak(s[1])
1057 && !wp->w_p_list
1058 && wp->w_p_wrap
1059# ifdef FEAT_VERTSPLIT
1060 && wp->w_width != 0
1061# endif
1062 )
1063 {
1064 /*
1065 * Count all characters from first non-blank after a blank up to next
1066 * non-blank after a blank.
1067 */
1068 numberextra = win_col_off(wp);
1069 col2 = col;
1070 colmax = W_WIDTH(wp) - numberextra;
1071 if (col >= colmax)
1072 colmax += (((col - colmax)
1073 / (colmax + win_col_off2(wp))) + 1)
1074 * (colmax + win_col_off2(wp));
1075 for (;;)
1076 {
1077 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001078 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001079 c = *s;
1080 if (!(c != NUL
1081 && (vim_isbreak(c)
1082 || (!vim_isbreak(c)
1083 && (col2 == col || !vim_isbreak(*ps))))))
1084 break;
1085
1086 col2 += win_chartabsize(wp, s, col2);
1087 if (col2 >= colmax) /* doesn't fit */
1088 {
1089 size = colmax - col;
1090 tab_corr = FALSE;
1091 break;
1092 }
1093 }
1094 }
1095# ifdef FEAT_MBYTE
1096 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1097 && wp->w_p_wrap && in_win_border(wp, col))
1098 {
1099 ++size; /* Count the ">" in the last column. */
1100 mb_added = 1;
1101 }
1102# endif
1103
1104 /*
1105 * May have to add something for 'showbreak' string at start of line
1106 * Set *headp to the size of what we add.
1107 */
1108 added = 0;
1109 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1110 {
1111 numberextra = win_col_off(wp);
1112 col += numberextra + mb_added;
1113 if (col >= (colnr_T)W_WIDTH(wp))
1114 {
1115 col -= W_WIDTH(wp);
1116 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1117 if (numberextra > 0)
1118 col = col % numberextra;
1119 }
1120 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1121 {
1122 added = vim_strsize(p_sbr);
1123 if (tab_corr)
1124 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1125 else
1126 size += added;
1127 if (col != 0)
1128 added = 0;
1129 }
1130 }
1131 if (headp != NULL)
1132 *headp = added + mb_added;
1133 return size;
1134#endif
1135}
1136
1137#if defined(FEAT_MBYTE) || defined(PROTO)
1138/*
1139 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1140 * 'wrap' is on. This means we need to check for a double-byte character that
1141 * doesn't fit at the end of the screen line.
1142 */
1143 static int
1144win_nolbr_chartabsize(wp, s, col, headp)
1145 win_T *wp;
1146 char_u *s;
1147 colnr_T col;
1148 int *headp;
1149{
1150 int n;
1151
1152 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1153 {
1154 n = wp->w_buffer->b_p_ts;
1155 return (int)(n - (col % n));
1156 }
1157 n = ptr2cells(s);
1158 /* Add one cell for a double-width character in the last column of the
1159 * window, displayed with a ">". */
1160 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1161 {
1162 if (headp != NULL)
1163 *headp = 1;
1164 return 3;
1165 }
1166 return n;
1167}
1168
1169/*
1170 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1171 * "wp".
1172 */
1173 int
1174in_win_border(wp, vcol)
1175 win_T *wp;
1176 colnr_T vcol;
1177{
1178 colnr_T width1; /* width of first line (after line number) */
1179 colnr_T width2; /* width of further lines */
1180
1181#ifdef FEAT_VERTSPLIT
1182 if (wp->w_width == 0) /* there is no border */
1183 return FALSE;
1184#endif
1185 width1 = W_WIDTH(wp) - win_col_off(wp);
1186 if (vcol < width1 - 1)
1187 return FALSE;
1188 if (vcol == width1 - 1)
1189 return TRUE;
1190 width2 = width1 + win_col_off2(wp);
1191 return ((vcol - width1) % width2 == width2 - 1);
1192}
1193#endif /* FEAT_MBYTE */
1194
1195/*
1196 * Get virtual column number of pos.
1197 * start: on the first position of this character (TAB, ctrl)
1198 * cursor: where the cursor is on this character (first char, except for TAB)
1199 * end: on the last position of this character (TAB, ctrl)
1200 *
1201 * This is used very often, keep it fast!
1202 */
1203 void
1204getvcol(wp, pos, start, cursor, end)
1205 win_T *wp;
1206 pos_T *pos;
1207 colnr_T *start;
1208 colnr_T *cursor;
1209 colnr_T *end;
1210{
1211 colnr_T vcol;
1212 char_u *ptr; /* points to current char */
1213 char_u *posptr; /* points to char at pos->col */
1214 int incr;
1215 int head;
1216 int ts = wp->w_buffer->b_p_ts;
1217 int c;
1218
1219 vcol = 0;
1220 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1221 posptr = ptr + pos->col;
1222
1223 /*
1224 * This function is used very often, do some speed optimizations.
1225 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1226 * Also use this when 'list' is set but tabs take their normal size.
1227 */
1228 if ((!wp->w_p_list || lcs_tab1 != NUL)
1229#ifdef FEAT_LINEBREAK
1230 && !wp->w_p_lbr && *p_sbr == NUL
1231#endif
1232 )
1233 {
1234#ifndef FEAT_MBYTE
1235 head = 0;
1236#endif
1237 for (;;)
1238 {
1239#ifdef FEAT_MBYTE
1240 head = 0;
1241#endif
1242 c = *ptr;
1243 /* make sure we don't go past the end of the line */
1244 if (c == NUL)
1245 {
1246 incr = 1; /* NUL at end of line only takes one column */
1247 break;
1248 }
1249 /* A tab gets expanded, depending on the current column */
1250 if (c == TAB)
1251 incr = ts - (vcol % ts);
1252 else
1253 {
1254#ifdef FEAT_MBYTE
1255 if (has_mbyte)
1256 {
1257 /* For utf-8, if the byte is >= 0x80, need to look at
1258 * further bytes to find the cell width. */
1259 if (enc_utf8 && c >= 0x80)
1260 incr = utf_ptr2cells(ptr);
1261 else
1262 incr = CHARSIZE(c);
1263
1264 /* If a double-cell char doesn't fit at the end of a line
1265 * it wraps to the next line, it's like this char is three
1266 * cells wide. */
1267 if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1268 {
1269 ++incr;
1270 head = 1;
1271 }
1272 }
1273 else
1274#endif
1275 incr = CHARSIZE(c);
1276 }
1277
1278 if (ptr >= posptr) /* character at pos->col */
1279 break;
1280
1281 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001282 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001283 }
1284 }
1285 else
1286 {
1287 for (;;)
1288 {
1289 /* A tab gets expanded, depending on the current column */
1290 head = 0;
1291 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1292 /* make sure we don't go past the end of the line */
1293 if (*ptr == NUL)
1294 {
1295 incr = 1; /* NUL at end of line only takes one column */
1296 break;
1297 }
1298
1299 if (ptr >= posptr) /* character at pos->col */
1300 break;
1301
1302 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001303 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001304 }
1305 }
1306 if (start != NULL)
1307 *start = vcol + head;
1308 if (end != NULL)
1309 *end = vcol + incr - 1;
1310 if (cursor != NULL)
1311 {
1312 if (*ptr == TAB
1313 && (State & NORMAL)
1314 && !wp->w_p_list
1315 && !virtual_active()
1316#ifdef FEAT_VISUAL
1317 && !(VIsual_active
1318 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1319#endif
1320 )
1321 *cursor = vcol + incr - 1; /* cursor at end */
1322 else
1323 *cursor = vcol + head; /* cursor at start */
1324 }
1325}
1326
1327/*
1328 * Get virtual cursor column in the current window, pretending 'list' is off.
1329 */
1330 colnr_T
1331getvcol_nolist(posp)
1332 pos_T *posp;
1333{
1334 int list_save = curwin->w_p_list;
1335 colnr_T vcol;
1336
1337 curwin->w_p_list = FALSE;
1338 getvcol(curwin, posp, NULL, &vcol, NULL);
1339 curwin->w_p_list = list_save;
1340 return vcol;
1341}
1342
1343#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1344/*
1345 * Get virtual column in virtual mode.
1346 */
1347 void
1348getvvcol(wp, pos, start, cursor, end)
1349 win_T *wp;
1350 pos_T *pos;
1351 colnr_T *start;
1352 colnr_T *cursor;
1353 colnr_T *end;
1354{
1355 colnr_T col;
1356 colnr_T coladd;
1357 colnr_T endadd;
1358# ifdef FEAT_MBYTE
1359 char_u *ptr;
1360# endif
1361
1362 if (virtual_active())
1363 {
1364 /* For virtual mode, only want one value */
1365 getvcol(wp, pos, &col, NULL, NULL);
1366
1367 coladd = pos->coladd;
1368 endadd = 0;
1369# ifdef FEAT_MBYTE
1370 /* Cannot put the cursor on part of a wide character. */
1371 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1372 if (pos->col < STRLEN(ptr))
1373 {
1374 int c = (*mb_ptr2char)(ptr + pos->col);
1375
1376 if (c != TAB && vim_isprintc(c))
1377 {
1378 endadd = char2cells(c) - 1;
1379 if (coladd >= endadd)
1380 coladd -= endadd;
1381 else
1382 coladd = 0;
1383 }
1384 }
1385# endif
1386 col += coladd;
1387 if (start != NULL)
1388 *start = col;
1389 if (cursor != NULL)
1390 *cursor = col;
1391 if (end != NULL)
1392 *end = col + endadd;
1393 }
1394 else
1395 getvcol(wp, pos, start, cursor, end);
1396}
1397#endif
1398
1399#if defined(FEAT_VISUAL) || defined(PROTO)
1400/*
1401 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1402 * Used for Visual block mode.
1403 */
1404 void
1405getvcols(wp, pos1, pos2, left, right)
1406 win_T *wp;
1407 pos_T *pos1, *pos2;
1408 colnr_T *left, *right;
1409{
1410 colnr_T from1, from2, to1, to2;
1411
1412 if (ltp(pos1, pos2))
1413 {
1414 getvvcol(wp, pos1, &from1, NULL, &to1);
1415 getvvcol(wp, pos2, &from2, NULL, &to2);
1416 }
1417 else
1418 {
1419 getvvcol(wp, pos2, &from1, NULL, &to1);
1420 getvvcol(wp, pos1, &from2, NULL, &to2);
1421 }
1422 if (from2 < from1)
1423 *left = from2;
1424 else
1425 *left = from1;
1426 if (to2 > to1)
1427 {
1428 if (*p_sel == 'e' && from2 - 1 >= to1)
1429 *right = from2 - 1;
1430 else
1431 *right = to2;
1432 }
1433 else
1434 *right = to1;
1435}
1436#endif
1437
1438/*
1439 * skipwhite: skip over ' ' and '\t'.
1440 */
1441 char_u *
1442skipwhite(p)
1443 char_u *p;
1444{
1445 while (vim_iswhite(*p)) /* skip to next non-white */
1446 ++p;
1447 return p;
1448}
1449
1450/*
1451 * skipdigits: skip over digits;
1452 */
1453 char_u *
1454skipdigits(p)
1455 char_u *p;
1456{
1457 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1458 ++p;
1459 return p;
1460}
1461
1462/*
1463 * Variant of isdigit() that can handle characters > 0x100.
1464 * We don't use isdigit() here, because on some systems it also considers
1465 * superscript 1 to be a digit.
1466 * Use the VIM_ISDIGIT() macro for simple arguments.
1467 */
1468 int
1469vim_isdigit(c)
1470 int c;
1471{
1472 return (c >= '0' && c <= '9');
1473}
1474
1475/*
1476 * Variant of isxdigit() that can handle characters > 0x100.
1477 * We don't use isxdigit() here, because on some systems it also considers
1478 * superscript 1 to be a digit.
1479 */
1480 int
1481vim_isxdigit(c)
1482 int c;
1483{
1484 return (c >= '0' && c <= '9')
1485 || (c >= 'a' && c <= 'f')
1486 || (c >= 'A' && c <= 'F');
1487}
1488
1489/*
1490 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1491 */
1492 char_u *
1493skiptowhite(p)
1494 char_u *p;
1495{
1496 while (*p != ' ' && *p != '\t' && *p != NUL)
1497 ++p;
1498 return p;
1499}
1500
1501#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1502 || defined(PROTO)
1503/*
1504 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1505 */
1506 char_u *
1507skiptowhite_esc(p)
1508 char_u *p;
1509{
1510 while (*p != ' ' && *p != '\t' && *p != NUL)
1511 {
1512 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1513 ++p;
1514 ++p;
1515 }
1516 return p;
1517}
1518#endif
1519
1520/*
1521 * Getdigits: Get a number from a string and skip over it.
1522 * Note: the argument is a pointer to a char_u pointer!
1523 */
1524 long
1525getdigits(pp)
1526 char_u **pp;
1527{
1528 char_u *p;
1529 long retval;
1530
1531 p = *pp;
1532 retval = atol((char *)p);
1533 if (*p == '-') /* skip negative sign */
1534 ++p;
1535 p = skipdigits(p); /* skip to next non-digit */
1536 *pp = p;
1537 return retval;
1538}
1539
1540/*
1541 * Return TRUE if "lbuf" is empty or only contains blanks.
1542 */
1543 int
1544vim_isblankline(lbuf)
1545 char_u *lbuf;
1546{
1547 char_u *p;
1548
1549 p = skipwhite(lbuf);
1550 return (*p == NUL || *p == '\r' || *p == '\n');
1551}
1552
1553/*
1554 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001555 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001556 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1557 * 0 decimal
1558 * '0' octal
1559 * 'X' hex
1560 * 'x' hex
1561 * If "len" is not NULL, the length of the number in characters is returned.
1562 * If "nptr" is not NULL, the signed result is returned in it.
1563 * If "unptr" is not NULL, the unsigned result is returned in it.
1564 */
1565 void
1566vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1567 char_u *start;
1568 int *hexp; /* return: type of number 0 = decimal, 'x'
1569 or 'X' is hex, '0' = octal */
1570 int *len; /* return: detected length of number */
1571 int dooct; /* recognize octal number */
1572 int dohex; /* recognize hex number */
1573 long *nptr; /* return: signed result */
1574 unsigned long *unptr; /* return: unsigned result */
1575{
1576 char_u *ptr = start;
1577 int hex = 0; /* default is decimal */
1578 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001579 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001580 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001581
1582 if (ptr[0] == '-')
1583 {
1584 negative = TRUE;
1585 ++ptr;
1586 }
1587
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001588 /* Recognize hex and octal. */
1589 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001590 {
1591 hex = ptr[1];
1592 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1593 ptr += 2; /* hexadecimal */
1594 else
1595 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001596 hex = 0; /* default is decimal */
1597 if (dooct)
1598 {
1599 /* Don't interpret "0", "08" or "0129" as octal. */
1600 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1601 {
1602 if (ptr[n] > '7')
1603 {
1604 hex = 0; /* can't be octal */
1605 break;
1606 }
1607 if (ptr[n] > '0')
1608 hex = '0'; /* assume octal */
1609 }
1610 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001611 }
1612 }
1613
1614 /*
1615 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1616 */
1617 if (hex)
1618 {
1619 if (hex == '0')
1620 {
1621 /* octal */
1622 while ('0' <= *ptr && *ptr <= '7')
1623 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001624 un = 8 * un + (unsigned long)(*ptr - '0');
1625 ++ptr;
1626 }
1627 }
1628 else
1629 {
1630 /* hex */
1631 while (vim_isxdigit(*ptr))
1632 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001633 un = 16 * un + (unsigned long)hex2nr(*ptr);
1634 ++ptr;
1635 }
1636 }
1637 }
1638 else
1639 {
1640 /* decimal */
1641 while (VIM_ISDIGIT(*ptr))
1642 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001643 un = 10 * un + (unsigned long)(*ptr - '0');
1644 ++ptr;
1645 }
1646 }
1647
Bram Moolenaar071d4272004-06-13 20:20:40 +00001648 if (hexp != NULL)
1649 *hexp = hex;
1650 if (len != NULL)
1651 *len = (int)(ptr - start);
1652 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001653 {
1654 if (negative) /* account for leading '-' for decimal numbers */
1655 *nptr = -(long)un;
1656 else
1657 *nptr = (long)un;
1658 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001659 if (unptr != NULL)
1660 *unptr = un;
1661}
1662
1663/*
1664 * Return the value of a single hex character.
1665 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1666 */
1667 int
1668hex2nr(c)
1669 int c;
1670{
1671 if (c >= 'a' && c <= 'f')
1672 return c - 'a' + 10;
1673 if (c >= 'A' && c <= 'F')
1674 return c - 'A' + 10;
1675 return c - '0';
1676}
1677
1678#if defined(FEAT_TERMRESPONSE) \
1679 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1680/*
1681 * Convert two hex characters to a byte.
1682 * Return -1 if one of the characters is not hex.
1683 */
1684 int
1685hexhex2nr(p)
1686 char_u *p;
1687{
1688 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1689 return -1;
1690 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1691}
1692#endif
1693
1694/*
1695 * Return TRUE if "str" starts with a backslash that should be removed.
1696 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1697 * backslash is not a normal file name character.
1698 * '$' is a valid file name character, we don't remove the backslash before
1699 * it. This means it is not possible to use an environment variable after a
1700 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1701 * Although "\ name" is valid, the backslash in "Program\ files" must be
1702 * removed. Assume a file name doesn't start with a space.
1703 * For multi-byte names, never remove a backslash before a non-ascii
1704 * character, assume that all multi-byte characters are valid file name
1705 * characters.
1706 */
1707 int
1708rem_backslash(str)
1709 char_u *str;
1710{
1711#ifdef BACKSLASH_IN_FILENAME
1712 return (str[0] == '\\'
1713# ifdef FEAT_MBYTE
1714 && str[1] < 0x80
1715# endif
1716 && (str[1] == ' '
1717 || (str[1] != NUL
1718 && str[1] != '*'
1719 && str[1] != '?'
1720 && !vim_isfilec(str[1]))));
1721#else
1722 return (str[0] == '\\' && str[1] != NUL);
1723#endif
1724}
1725
1726/*
1727 * Halve the number of backslashes in a file name argument.
1728 * For MS-DOS we only do this if the character after the backslash
1729 * is not a normal file character.
1730 */
1731 void
1732backslash_halve(p)
1733 char_u *p;
1734{
1735 for ( ; *p; ++p)
1736 if (rem_backslash(p))
1737 STRCPY(p, p + 1);
1738}
1739
1740/*
1741 * backslash_halve() plus save the result in allocated memory.
1742 */
1743 char_u *
1744backslash_halve_save(p)
1745 char_u *p;
1746{
1747 char_u *res;
1748
1749 res = vim_strsave(p);
1750 if (res == NULL)
1751 return p;
1752 backslash_halve(res);
1753 return res;
1754}
1755
1756#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1757/*
1758 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1759 * The first 64 entries have been added to map control characters defined in
1760 * ascii.h
1761 */
1762static char_u ebcdic2ascii_tab[256] =
1763{
1764 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1765 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1766 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1767 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1768 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1769 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1770 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1771 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1772 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1773 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1774 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1775 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1776 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1777 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1778 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1779 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1780 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1781 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1782 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1783 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1784 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1785 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1786 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1787 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1788 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1789 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1790 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1791 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1792 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1793 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1794 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1795 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1796};
1797
1798/*
1799 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1800 * wanting 7-bit ASCII characters out the other end.
1801 */
1802 void
1803ebcdic2ascii(buffer, len)
1804 char_u *buffer;
1805 int len;
1806{
1807 int i;
1808
1809 for (i = 0; i < len; i++)
1810 buffer[i] = ebcdic2ascii_tab[buffer[i]];
1811}
1812#endif