blob: 5436bb40b0fe0fc506fab4985d822552add2d470 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
17static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18#endif
19
20static int nr2hex __ARGS((int c));
21
22static int chartab_initialized = FALSE;
23
24/* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
29
30/*
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
33 *
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
36 *
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
43 *
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
52 *
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
55 */
56 int
57init_chartab()
58{
59 return buf_init_chartab(curbuf, TRUE);
60}
61
62 int
63buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
66{
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
73
74 if (global)
75 {
76 /*
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
80 *
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
83 */
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87#ifdef EBCDIC
88 while (c < 255)
89#else
90 while (c <= '~')
91#endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93#ifdef FEAT_FKMAP
94 if (p_altkeymap)
95 {
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
98 }
99#endif
100 while (c < 256)
101 {
102#ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113#endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
116 }
117
118#ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125#endif
126 }
127
128 /*
129 * Init word char flags all to FALSE
130 */
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
135 {
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
139 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140#endif
141
142#ifdef FEAT_LISP
143 /*
144 * In lisp mode the '-' character is included in keywords.
145 */
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148#endif
149
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
153 */
154 for (i = global ? 0 : 3; i <= 3; ++i)
155 {
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
164
165 while (*p)
166 {
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
170 {
171 tilde = TRUE;
172 ++p;
173 }
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
185 c2 = *p++;
186 }
187 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
191 if (c2 == -1) /* not a range */
192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
210 if (!do_isalpha || isalpha(c)
211#ifdef FEAT_FKMAP
212 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
213#endif
214 )
215 {
216 if (i == 0) /* (re)set ID flag */
217 {
218 if (tilde)
219 chartab[c] &= ~CT_ID_CHAR;
220 else
221 chartab[c] |= CT_ID_CHAR;
222 }
223 else if (i == 1) /* (re)set printable */
224 {
225 if ((c < ' '
226#ifndef EBCDIC
227 || c > '~'
228#endif
229#ifdef FEAT_FKMAP
230 || (p_altkeymap
231 && (F_isalpha(c) || F_isdigit(c)))
232#endif
233 )
234#ifdef FEAT_MBYTE
235 /* For double-byte we keep the cell width, so
236 * that we can detect it from the first byte. */
237 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
238#endif
239 )
240 {
241 if (tilde)
242 {
243 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
244 + ((dy_flags & DY_UHEX) ? 4 : 2);
245 chartab[c] &= ~CT_PRINT_CHAR;
246 }
247 else
248 {
249 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
250 chartab[c] |= CT_PRINT_CHAR;
251 }
252 }
253 }
254 else if (i == 2) /* (re)set fname flag */
255 {
256 if (tilde)
257 chartab[c] &= ~CT_FNAME_CHAR;
258 else
259 chartab[c] |= CT_FNAME_CHAR;
260 }
261 else /* i == 3 */ /* (re)set keyword flag */
262 {
263 if (tilde)
264 RESET_CHARTAB(buf, c);
265 else
266 SET_CHARTAB(buf, c);
267 }
268 }
269 ++c;
270 }
271 p = skip_to_option_part(p);
272 }
273 }
274 chartab_initialized = TRUE;
275 return OK;
276}
277
278/*
279 * Translate any special characters in buf[bufsize] in-place.
280 * The result is a string with only printable characters, but if there is not
281 * enough room, not all characters will be translated.
282 */
283 void
284trans_characters(buf, bufsize)
285 char_u *buf;
286 int bufsize;
287{
288 int len; /* length of string needing translation */
289 int room; /* room in buffer after string */
290 char_u *trs; /* translated character */
291 int trs_len; /* length of trs[] */
292
293 len = (int)STRLEN(buf);
294 room = bufsize - len;
295 while (*buf != 0)
296 {
297# ifdef FEAT_MBYTE
298 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000299 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 len -= trs_len;
301 else
302# endif
303 {
304 trs = transchar_byte(*buf);
305 trs_len = (int)STRLEN(trs);
306 if (trs_len > 1)
307 {
308 room -= trs_len - 1;
309 if (room <= 0)
310 return;
311 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
312 }
313 mch_memmove(buf, trs, (size_t)trs_len);
314 --len;
315 }
316 buf += trs_len;
317 }
318}
319
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000320#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
321 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000322/*
323 * Translate a string into allocated memory, replacing special chars with
324 * printable chars. Returns NULL when out of memory.
325 */
326 char_u *
327transstr(s)
328 char_u *s;
329{
330 char_u *res;
331 char_u *p;
332#ifdef FEAT_MBYTE
333 int l, len, c;
334 char_u hexbuf[11];
335#endif
336
337#ifdef FEAT_MBYTE
338 if (has_mbyte)
339 {
340 /* Compute the length of the result, taking account of unprintable
341 * multi-byte characters. */
342 len = 0;
343 p = s;
344 while (*p != NUL)
345 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000346 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000347 {
348 c = (*mb_ptr2char)(p);
349 p += l;
350 if (vim_isprintc(c))
351 len += l;
352 else
353 {
354 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000355 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356 }
357 }
358 else
359 {
360 l = byte2cells(*p++);
361 if (l > 0)
362 len += l;
363 else
364 len += 4; /* illegal byte sequence */
365 }
366 }
367 res = alloc((unsigned)(len + 1));
368 }
369 else
370#endif
371 res = alloc((unsigned)(vim_strsize(s) + 1));
372 if (res != NULL)
373 {
374 *res = NUL;
375 p = s;
376 while (*p != NUL)
377 {
378#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000379 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380 {
381 c = (*mb_ptr2char)(p);
382 if (vim_isprintc(c))
383 STRNCAT(res, p, l); /* append printable multi-byte char */
384 else
385 transchar_hex(res + STRLEN(res), c);
386 p += l;
387 }
388 else
389#endif
390 STRCAT(res, transchar_byte(*p++));
391 }
392 }
393 return res;
394}
395#endif
396
397#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
398/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000399 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
400 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000401 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
402 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000403 */
404 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000405str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000406 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000407 int orglen;
408 char_u *buf;
409 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410{
411 garray_T ga;
412 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000413 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000414
415#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
416#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000417#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
418#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000419
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000420 /* Copy "str" into "buf" or allocated memory, unmodified. */
421 if (buf == NULL)
422 {
423 ga_init2(&ga, 1, 10);
424 if (ga_grow(&ga, len + 1) == FAIL)
425 return NULL;
426 mch_memmove(ga.ga_data, str, (size_t)len);
427 ga.ga_len = len;
428 }
429 else
430 {
431 if (len >= buflen) /* Ugly! */
432 len = buflen - 1;
433 mch_memmove(buf, str, (size_t)len);
434 }
435 if (buf == NULL)
436 GA_CHAR(len) = NUL;
437 else
438 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000439
440 /* Make each character lower case. */
441 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000442 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000443 {
444#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000445 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 {
447 if (enc_utf8)
448 {
449 int c, lc;
450
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000451 c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000452 lc = utf_tolower(c);
453 if (c != lc)
454 {
455 int ol = utf_char2len(c);
456 int nl = utf_char2len(lc);
457
458 /* If the byte length changes need to shift the following
459 * characters forward or backward. */
460 if (ol != nl)
461 {
462 if (nl > ol)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000463 {
464 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
465 : len + nl - ol >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000466 {
467 /* out of memory, keep old char */
468 lc = c;
469 nl = ol;
470 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000471 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000472 if (ol != nl)
473 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000474 if (buf == NULL)
475 {
476 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000477 STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000478 ga.ga_len += nl - ol;
479 }
480 else
481 {
482 mch_memmove(buf + i + nl, buf + i + ol,
483 STRLEN(buf + i + ol) + 1);
484 len += nl - ol;
485 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000486 }
487 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000488 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000489 }
490 }
491 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000492 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000493 }
494 else
495#endif
496 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000497 if (buf == NULL)
498 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
499 else
500 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000501 ++i;
502 }
503 }
504
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000505 if (buf == NULL)
506 return (char_u *)ga.ga_data;
507 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508}
509#endif
510
511/*
512 * Catch 22: chartab[] can't be initialized before the options are
513 * initialized, and initializing options may cause transchar() to be called!
514 * When chartab_initialized == FALSE don't use chartab[].
515 * Does NOT work for multi-byte characters, c must be <= 255.
516 * Also doesn't work for the first byte of a multi-byte, "c" must be a
517 * character!
518 */
519static char_u transchar_buf[7];
520
521 char_u *
522transchar(c)
523 int c;
524{
525 int i;
526
527 i = 0;
528 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
529 {
530 transchar_buf[0] = '~';
531 transchar_buf[1] = '@';
532 i = 2;
533 c = K_SECOND(c);
534 }
535
536 if ((!chartab_initialized && (
537#ifdef EBCDIC
538 (c >= 64 && c < 255)
539#else
540 (c >= ' ' && c <= '~')
541#endif
542#ifdef FEAT_FKMAP
543 || F_ischar(c)
544#endif
545 )) || (c < 256 && vim_isprintc_strict(c)))
546 {
547 /* printable character */
548 transchar_buf[i] = c;
549 transchar_buf[i + 1] = NUL;
550 }
551 else
552 transchar_nonprint(transchar_buf + i, c);
553 return transchar_buf;
554}
555
556#if defined(FEAT_MBYTE) || defined(PROTO)
557/*
558 * Like transchar(), but called with a byte instead of a character. Checks
559 * for an illegal UTF-8 byte.
560 */
561 char_u *
562transchar_byte(c)
563 int c;
564{
565 if (enc_utf8 && c >= 0x80)
566 {
567 transchar_nonprint(transchar_buf, c);
568 return transchar_buf;
569 }
570 return transchar(c);
571}
572#endif
573
574/*
575 * Convert non-printable character to two or more printable characters in
576 * "buf[]". "buf" needs to be able to hold five bytes.
577 * Does NOT work for multi-byte characters, c must be <= 255.
578 */
579 void
580transchar_nonprint(buf, c)
581 char_u *buf;
582 int c;
583{
584 if (c == NL)
585 c = NUL; /* we use newline in place of a NUL */
586 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
587 c = NL; /* we use CR in place of NL in this case */
588
589 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
590 transchar_hex(buf, c);
591
592#ifdef EBCDIC
593 /* For EBCDIC only the characters 0-63 and 255 are not printable */
594 else if (CtrlChar(c) != 0 || c == DEL)
595#else
596 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
597#endif
598 {
599 buf[0] = '^';
600#ifdef EBCDIC
601 if (c == DEL)
602 buf[1] = '?'; /* DEL displayed as ^? */
603 else
604 buf[1] = CtrlChar(c);
605#else
606 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
607#endif
608
609 buf[2] = NUL;
610 }
611#ifdef FEAT_MBYTE
612 else if (enc_utf8 && c >= 0x80)
613 {
614 transchar_hex(buf, c);
615 }
616#endif
617#ifndef EBCDIC
618 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
619 {
620 buf[0] = '|';
621 buf[1] = c - 0x80;
622 buf[2] = NUL;
623 }
624#else
625 else if (c < 64)
626 {
627 buf[0] = '~';
628 buf[1] = MetaChar(c);
629 buf[2] = NUL;
630 }
631#endif
632 else /* 0x80 - 0x9f and 0xff */
633 {
634 /*
635 * TODO: EBCDIC I don't know what to do with this chars, so I display
636 * them as '~?' for now
637 */
638 buf[0] = '~';
639#ifdef EBCDIC
640 buf[1] = '?'; /* 0xff displayed as ~? */
641#else
642 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
643#endif
644 buf[2] = NUL;
645 }
646}
647
648 void
649transchar_hex(buf, c)
650 char_u *buf;
651 int c;
652{
653 int i = 0;
654
655 buf[0] = '<';
656#ifdef FEAT_MBYTE
657 if (c > 255)
658 {
659 buf[++i] = nr2hex((unsigned)c >> 12);
660 buf[++i] = nr2hex((unsigned)c >> 8);
661 }
662#endif
663 buf[++i] = nr2hex((unsigned)c >> 4);
664 buf[++i] = nr2hex(c);
665 buf[++i] = '>';
666 buf[++i] = NUL;
667}
668
669/*
670 * Convert the lower 4 bits of byte "c" to its hex character.
671 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
672 * function key 1.
673 */
674 static int
675nr2hex(c)
676 int c;
677{
678 if ((c & 0xf) <= 9)
679 return (c & 0xf) + '0';
680 return (c & 0xf) - 10 + 'a';
681}
682
683/*
684 * Return number of display cells occupied by byte "b".
685 * Caller must make sure 0 <= b <= 255.
686 * For multi-byte mode "b" must be the first byte of a character.
687 * A TAB is counted as two cells: "^I".
688 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
689 * cells depends on further bytes.
690 */
691 int
692byte2cells(b)
693 int b;
694{
695#ifdef FEAT_MBYTE
696 if (enc_utf8 && b >= 0x80)
697 return 0;
698#endif
699 return (chartab[b] & CT_CELL_MASK);
700}
701
702/*
703 * Return number of display cells occupied by character "c".
704 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
705 * A TAB is counted as two cells: "^I" or four: "<09>".
706 */
707 int
708char2cells(c)
709 int c;
710{
711 if (IS_SPECIAL(c))
712 return char2cells(K_SECOND(c)) + 2;
713#ifdef FEAT_MBYTE
714 if (c >= 0x80)
715 {
716 /* UTF-8: above 0x80 need to check the value */
717 if (enc_utf8)
718 return utf_char2cells(c);
719 /* DBCS: double-byte means double-width, except for euc-jp with first
720 * byte 0x8e */
721 if (enc_dbcs != 0 && c >= 0x100)
722 {
723 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
724 return 1;
725 return 2;
726 }
727 }
728#endif
729 return (chartab[c & 0xff] & CT_CELL_MASK);
730}
731
732/*
733 * Return number of display cells occupied by character at "*p".
734 * A TAB is counted as two cells: "^I" or four: "<09>".
735 */
736 int
737ptr2cells(p)
738 char_u *p;
739{
740#ifdef FEAT_MBYTE
741 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
742 if (enc_utf8 && *p >= 0x80)
743 return utf_ptr2cells(p);
744 /* For DBCS we can tell the cell count from the first byte. */
745#endif
746 return (chartab[*p] & CT_CELL_MASK);
747}
748
749/*
750 * Return the number of characters string "s" will take on the screen,
751 * counting TABs as two characters: "^I".
752 */
753 int
754vim_strsize(s)
755 char_u *s;
756{
757 return vim_strnsize(s, (int)MAXCOL);
758}
759
760/*
761 * Return the number of characters string "s[len]" will take on the screen,
762 * counting TABs as two characters: "^I".
763 */
764 int
765vim_strnsize(s, len)
766 char_u *s;
767 int len;
768{
769 int size = 0;
770
771 while (*s != NUL && --len >= 0)
772 {
773#ifdef FEAT_MBYTE
774 if (has_mbyte)
775 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000776 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777
778 size += ptr2cells(s);
779 s += l;
780 len -= l - 1;
781 }
782 else
783#endif
784 size += byte2cells(*s++);
785 }
786 return size;
787}
788
789/*
790 * Return the number of characters 'c' will take on the screen, taking
791 * into account the size of a tab.
792 * Use a define to make it fast, this is used very often!!!
793 * Also see getvcol() below.
794 */
795
796#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
797 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
798 { \
799 int ts; \
800 ts = (buf)->b_p_ts; \
801 return (int)(ts - (col % ts)); \
802 } \
803 else \
804 return ptr2cells(p);
805
806#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
807 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
808 int
809chartabsize(p, col)
810 char_u *p;
811 colnr_T col;
812{
813 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
814}
815#endif
816
817#ifdef FEAT_LINEBREAK
818 static int
819win_chartabsize(wp, p, col)
820 win_T *wp;
821 char_u *p;
822 colnr_T col;
823{
824 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
825}
826#endif
827
828/*
829 * return the number of characters the string 's' will take on the screen,
830 * taking into account the size of a tab
831 */
832 int
833linetabsize(s)
834 char_u *s;
835{
836 colnr_T col = 0;
837
838 while (*s != NUL)
839 col += lbr_chartabsize_adv(&s, col);
840 return (int)col;
841}
842
843/*
844 * Like linetabsize(), but for a given window instead of the current one.
845 */
846 int
847win_linetabsize(wp, p, len)
848 win_T *wp;
849 char_u *p;
850 colnr_T len;
851{
852 colnr_T col = 0;
853 char_u *s;
854
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000855 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000856 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000857 return (int)col;
858}
859
860/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000861 * Return TRUE if 'c' is a normal identifier character:
862 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000863 */
864 int
865vim_isIDc(c)
866 int c;
867{
868 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
869}
870
871/*
872 * return TRUE if 'c' is a keyword character: Letters and characters from
873 * 'iskeyword' option for current buffer.
874 * For multi-byte characters mb_get_class() is used (builtin rules).
875 */
876 int
877vim_iswordc(c)
878 int c;
879{
880#ifdef FEAT_MBYTE
881 if (c >= 0x100)
882 {
883 if (enc_dbcs != 0)
884 return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
885 if (enc_utf8)
886 return utf_class(c) >= 2;
887 }
888#endif
889 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
890}
891
892/*
893 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
894 */
895 int
896vim_iswordp(p)
897 char_u *p;
898{
899#ifdef FEAT_MBYTE
900 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
901 return mb_get_class(p) >= 2;
902#endif
903 return GET_CHARTAB(curbuf, *p) != 0;
904}
905
906#if defined(FEAT_SYN_HL) || defined(PROTO)
907 int
908vim_iswordc_buf(p, buf)
909 char_u *p;
910 buf_T *buf;
911{
912# ifdef FEAT_MBYTE
913 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
914 return mb_get_class(p) >= 2;
915# endif
916 return (GET_CHARTAB(buf, *p) != 0);
917}
Bram Moolenaarc4956c82006-03-12 21:58:43 +0000918#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000919
920/*
921 * return TRUE if 'c' is a valid file-name character
922 * Assume characters above 0x100 are valid (multi-byte).
923 */
924 int
925vim_isfilec(c)
926 int c;
927{
928 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
929}
930
931/*
932 * return TRUE if 'c' is a printable character
933 * Assume characters above 0x100 are printable (multi-byte), except for
934 * Unicode.
935 */
936 int
937vim_isprintc(c)
938 int c;
939{
940#ifdef FEAT_MBYTE
941 if (enc_utf8 && c >= 0x100)
942 return utf_printable(c);
943#endif
944 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
945}
946
947/*
948 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
949 * byte of a double-byte character.
950 */
951 int
952vim_isprintc_strict(c)
953 int c;
954{
955#ifdef FEAT_MBYTE
956 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
957 return FALSE;
958 if (enc_utf8 && c >= 0x100)
959 return utf_printable(c);
960#endif
961 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
962}
963
964/*
965 * like chartabsize(), but also check for line breaks on the screen
966 */
967 int
968lbr_chartabsize(s, col)
969 unsigned char *s;
970 colnr_T col;
971{
972#ifdef FEAT_LINEBREAK
973 if (!curwin->w_p_lbr && *p_sbr == NUL)
974 {
975#endif
976#ifdef FEAT_MBYTE
977 if (curwin->w_p_wrap)
978 return win_nolbr_chartabsize(curwin, s, col, NULL);
979#endif
980 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
981#ifdef FEAT_LINEBREAK
982 }
983 return win_lbr_chartabsize(curwin, s, col, NULL);
984#endif
985}
986
987/*
988 * Call lbr_chartabsize() and advance the pointer.
989 */
990 int
991lbr_chartabsize_adv(s, col)
992 char_u **s;
993 colnr_T col;
994{
995 int retval;
996
997 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +0000998 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000999 return retval;
1000}
1001
1002/*
1003 * This function is used very often, keep it fast!!!!
1004 *
1005 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1006 * string at start of line. Warning: *headp is only set if it's a non-zero
1007 * value, init to 0 before calling.
1008 */
1009/*ARGSUSED*/
1010 int
1011win_lbr_chartabsize(wp, s, col, headp)
1012 win_T *wp;
1013 char_u *s;
1014 colnr_T col;
1015 int *headp;
1016{
1017#ifdef FEAT_LINEBREAK
1018 int c;
1019 int size;
1020 colnr_T col2;
1021 colnr_T colmax;
1022 int added;
1023# ifdef FEAT_MBYTE
1024 int mb_added = 0;
1025# else
1026# define mb_added 0
1027# endif
1028 int numberextra;
1029 char_u *ps;
1030 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001031 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001032
1033 /*
1034 * No 'linebreak' and 'showbreak': return quickly.
1035 */
1036 if (!wp->w_p_lbr && *p_sbr == NUL)
1037#endif
1038 {
1039#ifdef FEAT_MBYTE
1040 if (wp->w_p_wrap)
1041 return win_nolbr_chartabsize(wp, s, col, headp);
1042#endif
1043 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1044 }
1045
1046#ifdef FEAT_LINEBREAK
1047 /*
1048 * First get normal size, without 'linebreak'
1049 */
1050 size = win_chartabsize(wp, s, col);
1051 c = *s;
1052
1053 /*
1054 * If 'linebreak' set check at a blank before a non-blank if the line
1055 * needs a break here
1056 */
1057 if (wp->w_p_lbr
1058 && vim_isbreak(c)
1059 && !vim_isbreak(s[1])
1060 && !wp->w_p_list
1061 && wp->w_p_wrap
1062# ifdef FEAT_VERTSPLIT
1063 && wp->w_width != 0
1064# endif
1065 )
1066 {
1067 /*
1068 * Count all characters from first non-blank after a blank up to next
1069 * non-blank after a blank.
1070 */
1071 numberextra = win_col_off(wp);
1072 col2 = col;
1073 colmax = W_WIDTH(wp) - numberextra;
1074 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001075 {
1076 n = colmax + win_col_off2(wp);
1077 if (n > 0)
1078 colmax += (((col - colmax) / n) + 1) * n;
1079 }
1080
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 for (;;)
1082 {
1083 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001084 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001085 c = *s;
1086 if (!(c != NUL
1087 && (vim_isbreak(c)
1088 || (!vim_isbreak(c)
1089 && (col2 == col || !vim_isbreak(*ps))))))
1090 break;
1091
1092 col2 += win_chartabsize(wp, s, col2);
1093 if (col2 >= colmax) /* doesn't fit */
1094 {
1095 size = colmax - col;
1096 tab_corr = FALSE;
1097 break;
1098 }
1099 }
1100 }
1101# ifdef FEAT_MBYTE
1102 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1103 && wp->w_p_wrap && in_win_border(wp, col))
1104 {
1105 ++size; /* Count the ">" in the last column. */
1106 mb_added = 1;
1107 }
1108# endif
1109
1110 /*
1111 * May have to add something for 'showbreak' string at start of line
1112 * Set *headp to the size of what we add.
1113 */
1114 added = 0;
1115 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1116 {
1117 numberextra = win_col_off(wp);
1118 col += numberextra + mb_added;
1119 if (col >= (colnr_T)W_WIDTH(wp))
1120 {
1121 col -= W_WIDTH(wp);
1122 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1123 if (numberextra > 0)
1124 col = col % numberextra;
1125 }
1126 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1127 {
1128 added = vim_strsize(p_sbr);
1129 if (tab_corr)
1130 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1131 else
1132 size += added;
1133 if (col != 0)
1134 added = 0;
1135 }
1136 }
1137 if (headp != NULL)
1138 *headp = added + mb_added;
1139 return size;
1140#endif
1141}
1142
1143#if defined(FEAT_MBYTE) || defined(PROTO)
1144/*
1145 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1146 * 'wrap' is on. This means we need to check for a double-byte character that
1147 * doesn't fit at the end of the screen line.
1148 */
1149 static int
1150win_nolbr_chartabsize(wp, s, col, headp)
1151 win_T *wp;
1152 char_u *s;
1153 colnr_T col;
1154 int *headp;
1155{
1156 int n;
1157
1158 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1159 {
1160 n = wp->w_buffer->b_p_ts;
1161 return (int)(n - (col % n));
1162 }
1163 n = ptr2cells(s);
1164 /* Add one cell for a double-width character in the last column of the
1165 * window, displayed with a ">". */
1166 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1167 {
1168 if (headp != NULL)
1169 *headp = 1;
1170 return 3;
1171 }
1172 return n;
1173}
1174
1175/*
1176 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1177 * "wp".
1178 */
1179 int
1180in_win_border(wp, vcol)
1181 win_T *wp;
1182 colnr_T vcol;
1183{
1184 colnr_T width1; /* width of first line (after line number) */
1185 colnr_T width2; /* width of further lines */
1186
1187#ifdef FEAT_VERTSPLIT
1188 if (wp->w_width == 0) /* there is no border */
1189 return FALSE;
1190#endif
1191 width1 = W_WIDTH(wp) - win_col_off(wp);
1192 if (vcol < width1 - 1)
1193 return FALSE;
1194 if (vcol == width1 - 1)
1195 return TRUE;
1196 width2 = width1 + win_col_off2(wp);
1197 return ((vcol - width1) % width2 == width2 - 1);
1198}
1199#endif /* FEAT_MBYTE */
1200
1201/*
1202 * Get virtual column number of pos.
1203 * start: on the first position of this character (TAB, ctrl)
1204 * cursor: where the cursor is on this character (first char, except for TAB)
1205 * end: on the last position of this character (TAB, ctrl)
1206 *
1207 * This is used very often, keep it fast!
1208 */
1209 void
1210getvcol(wp, pos, start, cursor, end)
1211 win_T *wp;
1212 pos_T *pos;
1213 colnr_T *start;
1214 colnr_T *cursor;
1215 colnr_T *end;
1216{
1217 colnr_T vcol;
1218 char_u *ptr; /* points to current char */
1219 char_u *posptr; /* points to char at pos->col */
1220 int incr;
1221 int head;
1222 int ts = wp->w_buffer->b_p_ts;
1223 int c;
1224
1225 vcol = 0;
1226 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1227 posptr = ptr + pos->col;
1228
1229 /*
1230 * This function is used very often, do some speed optimizations.
1231 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1232 * Also use this when 'list' is set but tabs take their normal size.
1233 */
1234 if ((!wp->w_p_list || lcs_tab1 != NUL)
1235#ifdef FEAT_LINEBREAK
1236 && !wp->w_p_lbr && *p_sbr == NUL
1237#endif
1238 )
1239 {
1240#ifndef FEAT_MBYTE
1241 head = 0;
1242#endif
1243 for (;;)
1244 {
1245#ifdef FEAT_MBYTE
1246 head = 0;
1247#endif
1248 c = *ptr;
1249 /* make sure we don't go past the end of the line */
1250 if (c == NUL)
1251 {
1252 incr = 1; /* NUL at end of line only takes one column */
1253 break;
1254 }
1255 /* A tab gets expanded, depending on the current column */
1256 if (c == TAB)
1257 incr = ts - (vcol % ts);
1258 else
1259 {
1260#ifdef FEAT_MBYTE
1261 if (has_mbyte)
1262 {
1263 /* For utf-8, if the byte is >= 0x80, need to look at
1264 * further bytes to find the cell width. */
1265 if (enc_utf8 && c >= 0x80)
1266 incr = utf_ptr2cells(ptr);
1267 else
1268 incr = CHARSIZE(c);
1269
1270 /* If a double-cell char doesn't fit at the end of a line
1271 * it wraps to the next line, it's like this char is three
1272 * cells wide. */
1273 if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1274 {
1275 ++incr;
1276 head = 1;
1277 }
1278 }
1279 else
1280#endif
1281 incr = CHARSIZE(c);
1282 }
1283
1284 if (ptr >= posptr) /* character at pos->col */
1285 break;
1286
1287 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001288 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001289 }
1290 }
1291 else
1292 {
1293 for (;;)
1294 {
1295 /* A tab gets expanded, depending on the current column */
1296 head = 0;
1297 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1298 /* make sure we don't go past the end of the line */
1299 if (*ptr == NUL)
1300 {
1301 incr = 1; /* NUL at end of line only takes one column */
1302 break;
1303 }
1304
1305 if (ptr >= posptr) /* character at pos->col */
1306 break;
1307
1308 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001309 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310 }
1311 }
1312 if (start != NULL)
1313 *start = vcol + head;
1314 if (end != NULL)
1315 *end = vcol + incr - 1;
1316 if (cursor != NULL)
1317 {
1318 if (*ptr == TAB
1319 && (State & NORMAL)
1320 && !wp->w_p_list
1321 && !virtual_active()
1322#ifdef FEAT_VISUAL
1323 && !(VIsual_active
1324 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1325#endif
1326 )
1327 *cursor = vcol + incr - 1; /* cursor at end */
1328 else
1329 *cursor = vcol + head; /* cursor at start */
1330 }
1331}
1332
1333/*
1334 * Get virtual cursor column in the current window, pretending 'list' is off.
1335 */
1336 colnr_T
1337getvcol_nolist(posp)
1338 pos_T *posp;
1339{
1340 int list_save = curwin->w_p_list;
1341 colnr_T vcol;
1342
1343 curwin->w_p_list = FALSE;
1344 getvcol(curwin, posp, NULL, &vcol, NULL);
1345 curwin->w_p_list = list_save;
1346 return vcol;
1347}
1348
1349#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1350/*
1351 * Get virtual column in virtual mode.
1352 */
1353 void
1354getvvcol(wp, pos, start, cursor, end)
1355 win_T *wp;
1356 pos_T *pos;
1357 colnr_T *start;
1358 colnr_T *cursor;
1359 colnr_T *end;
1360{
1361 colnr_T col;
1362 colnr_T coladd;
1363 colnr_T endadd;
1364# ifdef FEAT_MBYTE
1365 char_u *ptr;
1366# endif
1367
1368 if (virtual_active())
1369 {
1370 /* For virtual mode, only want one value */
1371 getvcol(wp, pos, &col, NULL, NULL);
1372
1373 coladd = pos->coladd;
1374 endadd = 0;
1375# ifdef FEAT_MBYTE
1376 /* Cannot put the cursor on part of a wide character. */
1377 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1378 if (pos->col < STRLEN(ptr))
1379 {
1380 int c = (*mb_ptr2char)(ptr + pos->col);
1381
1382 if (c != TAB && vim_isprintc(c))
1383 {
1384 endadd = char2cells(c) - 1;
Bram Moolenaara5792f52005-11-23 21:25:05 +00001385 if (coladd > endadd) /* past end of line */
1386 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387 else
1388 coladd = 0;
1389 }
1390 }
1391# endif
1392 col += coladd;
1393 if (start != NULL)
1394 *start = col;
1395 if (cursor != NULL)
1396 *cursor = col;
1397 if (end != NULL)
1398 *end = col + endadd;
1399 }
1400 else
1401 getvcol(wp, pos, start, cursor, end);
1402}
1403#endif
1404
1405#if defined(FEAT_VISUAL) || defined(PROTO)
1406/*
1407 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1408 * Used for Visual block mode.
1409 */
1410 void
1411getvcols(wp, pos1, pos2, left, right)
1412 win_T *wp;
1413 pos_T *pos1, *pos2;
1414 colnr_T *left, *right;
1415{
1416 colnr_T from1, from2, to1, to2;
1417
1418 if (ltp(pos1, pos2))
1419 {
1420 getvvcol(wp, pos1, &from1, NULL, &to1);
1421 getvvcol(wp, pos2, &from2, NULL, &to2);
1422 }
1423 else
1424 {
1425 getvvcol(wp, pos2, &from1, NULL, &to1);
1426 getvvcol(wp, pos1, &from2, NULL, &to2);
1427 }
1428 if (from2 < from1)
1429 *left = from2;
1430 else
1431 *left = from1;
1432 if (to2 > to1)
1433 {
1434 if (*p_sel == 'e' && from2 - 1 >= to1)
1435 *right = from2 - 1;
1436 else
1437 *right = to2;
1438 }
1439 else
1440 *right = to1;
1441}
1442#endif
1443
1444/*
1445 * skipwhite: skip over ' ' and '\t'.
1446 */
1447 char_u *
1448skipwhite(p)
1449 char_u *p;
1450{
1451 while (vim_iswhite(*p)) /* skip to next non-white */
1452 ++p;
1453 return p;
1454}
1455
1456/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001457 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001458 */
1459 char_u *
1460skipdigits(p)
1461 char_u *p;
1462{
1463 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1464 ++p;
1465 return p;
1466}
1467
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001468#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001469/*
1470 * skip over digits and hex characters
1471 */
1472 char_u *
1473skiphex(p)
1474 char_u *p;
1475{
1476 while (vim_isxdigit(*p)) /* skip to next non-digit */
1477 ++p;
1478 return p;
1479}
1480#endif
1481
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001482#if defined(FEAT_EX_EXTRA) || defined(PROTO)
1483/*
1484 * skip to digit (or NUL after the string)
1485 */
1486 char_u *
1487skiptodigit(p)
1488 char_u *p;
1489{
1490 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1491 ++p;
1492 return p;
1493}
1494
1495/*
1496 * skip to hex character (or NUL after the string)
1497 */
1498 char_u *
1499skiptohex(p)
1500 char_u *p;
1501{
1502 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1503 ++p;
1504 return p;
1505}
1506#endif
1507
Bram Moolenaar071d4272004-06-13 20:20:40 +00001508/*
1509 * Variant of isdigit() that can handle characters > 0x100.
1510 * We don't use isdigit() here, because on some systems it also considers
1511 * superscript 1 to be a digit.
1512 * Use the VIM_ISDIGIT() macro for simple arguments.
1513 */
1514 int
1515vim_isdigit(c)
1516 int c;
1517{
1518 return (c >= '0' && c <= '9');
1519}
1520
1521/*
1522 * Variant of isxdigit() that can handle characters > 0x100.
1523 * We don't use isxdigit() here, because on some systems it also considers
1524 * superscript 1 to be a digit.
1525 */
1526 int
1527vim_isxdigit(c)
1528 int c;
1529{
1530 return (c >= '0' && c <= '9')
1531 || (c >= 'a' && c <= 'f')
1532 || (c >= 'A' && c <= 'F');
1533}
1534
Bram Moolenaar78622822005-08-23 21:00:13 +00001535#if defined(FEAT_MBYTE) || defined(PROTO)
1536/*
1537 * Vim's own character class functions. These exist because many library
1538 * islower()/toupper() etc. do not work properly: they crash when used with
1539 * invalid values or can't handle latin1 when the locale is C.
1540 * Speed is most important here.
1541 */
1542#define LATIN1LOWER 'l'
1543#define LATIN1UPPER 'U'
1544
1545/* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001546static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1547static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1548static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
Bram Moolenaar78622822005-08-23 21:00:13 +00001549
1550 int
1551vim_islower(c)
1552 int c;
1553{
1554 if (c <= '@')
1555 return FALSE;
1556 if (c >= 0x80)
1557 {
1558 if (enc_utf8)
1559 return utf_islower(c);
1560 if (c >= 0x100)
1561 {
1562#ifdef HAVE_ISWLOWER
1563 if (has_mbyte)
1564 return iswlower(c);
1565#endif
1566 /* islower() can't handle these chars and may crash */
1567 return FALSE;
1568 }
1569 if (enc_latin1like)
1570 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1571 }
1572 return islower(c);
1573}
1574
1575 int
1576vim_isupper(c)
1577 int c;
1578{
1579 if (c <= '@')
1580 return FALSE;
1581 if (c >= 0x80)
1582 {
1583 if (enc_utf8)
1584 return utf_isupper(c);
1585 if (c >= 0x100)
1586 {
1587#ifdef HAVE_ISWUPPER
1588 if (has_mbyte)
1589 return iswupper(c);
1590#endif
1591 /* islower() can't handle these chars and may crash */
1592 return FALSE;
1593 }
1594 if (enc_latin1like)
1595 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1596 }
1597 return isupper(c);
1598}
1599
1600 int
1601vim_toupper(c)
1602 int c;
1603{
1604 if (c <= '@')
1605 return c;
1606 if (c >= 0x80)
1607 {
1608 if (enc_utf8)
1609 return utf_toupper(c);
1610 if (c >= 0x100)
1611 {
1612#ifdef HAVE_TOWUPPER
1613 if (has_mbyte)
1614 return towupper(c);
1615#endif
1616 /* toupper() can't handle these chars and may crash */
1617 return c;
1618 }
1619 if (enc_latin1like)
1620 return latin1upper[c];
1621 }
1622 return TOUPPER_LOC(c);
1623}
1624
1625 int
1626vim_tolower(c)
1627 int c;
1628{
1629 if (c <= '@')
1630 return c;
1631 if (c >= 0x80)
1632 {
1633 if (enc_utf8)
1634 return utf_tolower(c);
1635 if (c >= 0x100)
1636 {
1637#ifdef HAVE_TOWLOWER
1638 if (has_mbyte)
1639 return towlower(c);
1640#endif
1641 /* tolower() can't handle these chars and may crash */
1642 return c;
1643 }
1644 if (enc_latin1like)
1645 return latin1lower[c];
1646 }
1647 return TOLOWER_LOC(c);
1648}
1649#endif
1650
Bram Moolenaar071d4272004-06-13 20:20:40 +00001651/*
1652 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1653 */
1654 char_u *
1655skiptowhite(p)
1656 char_u *p;
1657{
1658 while (*p != ' ' && *p != '\t' && *p != NUL)
1659 ++p;
1660 return p;
1661}
1662
1663#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1664 || defined(PROTO)
1665/*
1666 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1667 */
1668 char_u *
1669skiptowhite_esc(p)
1670 char_u *p;
1671{
1672 while (*p != ' ' && *p != '\t' && *p != NUL)
1673 {
1674 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1675 ++p;
1676 ++p;
1677 }
1678 return p;
1679}
1680#endif
1681
1682/*
1683 * Getdigits: Get a number from a string and skip over it.
1684 * Note: the argument is a pointer to a char_u pointer!
1685 */
1686 long
1687getdigits(pp)
1688 char_u **pp;
1689{
1690 char_u *p;
1691 long retval;
1692
1693 p = *pp;
1694 retval = atol((char *)p);
1695 if (*p == '-') /* skip negative sign */
1696 ++p;
1697 p = skipdigits(p); /* skip to next non-digit */
1698 *pp = p;
1699 return retval;
1700}
1701
1702/*
1703 * Return TRUE if "lbuf" is empty or only contains blanks.
1704 */
1705 int
1706vim_isblankline(lbuf)
1707 char_u *lbuf;
1708{
1709 char_u *p;
1710
1711 p = skipwhite(lbuf);
1712 return (*p == NUL || *p == '\r' || *p == '\n');
1713}
1714
1715/*
1716 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001717 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001718 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1719 * 0 decimal
1720 * '0' octal
1721 * 'X' hex
1722 * 'x' hex
1723 * If "len" is not NULL, the length of the number in characters is returned.
1724 * If "nptr" is not NULL, the signed result is returned in it.
1725 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001726 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001727 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1728 * octal number.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001729 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001730 * hex number.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001731 */
1732 void
1733vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1734 char_u *start;
1735 int *hexp; /* return: type of number 0 = decimal, 'x'
1736 or 'X' is hex, '0' = octal */
1737 int *len; /* return: detected length of number */
1738 int dooct; /* recognize octal number */
1739 int dohex; /* recognize hex number */
1740 long *nptr; /* return: signed result */
1741 unsigned long *unptr; /* return: unsigned result */
1742{
1743 char_u *ptr = start;
1744 int hex = 0; /* default is decimal */
1745 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001747 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001748
1749 if (ptr[0] == '-')
1750 {
1751 negative = TRUE;
1752 ++ptr;
1753 }
1754
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001755 /* Recognize hex and octal. */
1756 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001757 {
1758 hex = ptr[1];
1759 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1760 ptr += 2; /* hexadecimal */
1761 else
1762 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001763 hex = 0; /* default is decimal */
1764 if (dooct)
1765 {
1766 /* Don't interpret "0", "08" or "0129" as octal. */
1767 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1768 {
1769 if (ptr[n] > '7')
1770 {
1771 hex = 0; /* can't be octal */
1772 break;
1773 }
1774 if (ptr[n] > '0')
1775 hex = '0'; /* assume octal */
1776 }
1777 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001778 }
1779 }
1780
1781 /*
1782 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1783 */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001784 if (hex == '0' || dooct > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001785 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001786 /* octal */
1787 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001788 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001789 un = 8 * un + (unsigned long)(*ptr - '0');
1790 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001791 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001792 }
1793 else if (hex != 0 || dohex > 1)
1794 {
1795 /* hex */
1796 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001797 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001798 un = 16 * un + (unsigned long)hex2nr(*ptr);
1799 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001800 }
1801 }
1802 else
1803 {
1804 /* decimal */
1805 while (VIM_ISDIGIT(*ptr))
1806 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001807 un = 10 * un + (unsigned long)(*ptr - '0');
1808 ++ptr;
1809 }
1810 }
1811
Bram Moolenaar071d4272004-06-13 20:20:40 +00001812 if (hexp != NULL)
1813 *hexp = hex;
1814 if (len != NULL)
1815 *len = (int)(ptr - start);
1816 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001817 {
1818 if (negative) /* account for leading '-' for decimal numbers */
1819 *nptr = -(long)un;
1820 else
1821 *nptr = (long)un;
1822 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001823 if (unptr != NULL)
1824 *unptr = un;
1825}
1826
1827/*
1828 * Return the value of a single hex character.
1829 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1830 */
1831 int
1832hex2nr(c)
1833 int c;
1834{
1835 if (c >= 'a' && c <= 'f')
1836 return c - 'a' + 10;
1837 if (c >= 'A' && c <= 'F')
1838 return c - 'A' + 10;
1839 return c - '0';
1840}
1841
1842#if defined(FEAT_TERMRESPONSE) \
1843 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1844/*
1845 * Convert two hex characters to a byte.
1846 * Return -1 if one of the characters is not hex.
1847 */
1848 int
1849hexhex2nr(p)
1850 char_u *p;
1851{
1852 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1853 return -1;
1854 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1855}
1856#endif
1857
1858/*
1859 * Return TRUE if "str" starts with a backslash that should be removed.
1860 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1861 * backslash is not a normal file name character.
1862 * '$' is a valid file name character, we don't remove the backslash before
1863 * it. This means it is not possible to use an environment variable after a
1864 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1865 * Although "\ name" is valid, the backslash in "Program\ files" must be
1866 * removed. Assume a file name doesn't start with a space.
1867 * For multi-byte names, never remove a backslash before a non-ascii
1868 * character, assume that all multi-byte characters are valid file name
1869 * characters.
1870 */
1871 int
1872rem_backslash(str)
1873 char_u *str;
1874{
1875#ifdef BACKSLASH_IN_FILENAME
1876 return (str[0] == '\\'
1877# ifdef FEAT_MBYTE
1878 && str[1] < 0x80
1879# endif
1880 && (str[1] == ' '
1881 || (str[1] != NUL
1882 && str[1] != '*'
1883 && str[1] != '?'
1884 && !vim_isfilec(str[1]))));
1885#else
1886 return (str[0] == '\\' && str[1] != NUL);
1887#endif
1888}
1889
1890/*
1891 * Halve the number of backslashes in a file name argument.
1892 * For MS-DOS we only do this if the character after the backslash
1893 * is not a normal file character.
1894 */
1895 void
1896backslash_halve(p)
1897 char_u *p;
1898{
1899 for ( ; *p; ++p)
1900 if (rem_backslash(p))
1901 STRCPY(p, p + 1);
1902}
1903
1904/*
1905 * backslash_halve() plus save the result in allocated memory.
1906 */
1907 char_u *
1908backslash_halve_save(p)
1909 char_u *p;
1910{
1911 char_u *res;
1912
1913 res = vim_strsave(p);
1914 if (res == NULL)
1915 return p;
1916 backslash_halve(res);
1917 return res;
1918}
1919
1920#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1921/*
1922 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1923 * The first 64 entries have been added to map control characters defined in
1924 * ascii.h
1925 */
1926static char_u ebcdic2ascii_tab[256] =
1927{
1928 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1929 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1930 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1931 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1932 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1933 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1934 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1935 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1936 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1937 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1938 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1939 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1940 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1941 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1942 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1943 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1944 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1945 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1946 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1947 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1948 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1949 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1950 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1951 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1952 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1953 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1954 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1955 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1956 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1957 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1958 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1959 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1960};
1961
1962/*
1963 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1964 * wanting 7-bit ASCII characters out the other end.
1965 */
1966 void
1967ebcdic2ascii(buffer, len)
1968 char_u *buffer;
1969 int len;
1970{
1971 int i;
1972
1973 for (i = 0; i < len; i++)
1974 buffer[i] = ebcdic2ascii_tab[buffer[i]];
1975}
1976#endif