blob: 6ad8ae587656c895a8a857616b0883541a8b5ff1 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
17static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18#endif
19
20static int nr2hex __ARGS((int c));
21
22static int chartab_initialized = FALSE;
23
24/* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
29
30/*
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
33 *
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
36 *
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
43 *
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
52 *
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
55 */
56 int
57init_chartab()
58{
59 return buf_init_chartab(curbuf, TRUE);
60}
61
62 int
63buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
66{
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
73
74 if (global)
75 {
76 /*
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
80 *
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
83 */
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87#ifdef EBCDIC
88 while (c < 255)
89#else
90 while (c <= '~')
91#endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93#ifdef FEAT_FKMAP
94 if (p_altkeymap)
95 {
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
98 }
99#endif
100 while (c < 256)
101 {
102#ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113#endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
116 }
117
118#ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125#endif
126 }
127
128 /*
129 * Init word char flags all to FALSE
130 */
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
135 {
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
139 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140#endif
141
142#ifdef FEAT_LISP
143 /*
144 * In lisp mode the '-' character is included in keywords.
145 */
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148#endif
149
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
153 */
154 for (i = global ? 0 : 3; i <= 3; ++i)
155 {
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
164
165 while (*p)
166 {
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
170 {
171 tilde = TRUE;
172 ++p;
173 }
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
185 c2 = *p++;
186 }
187 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
191 if (c2 == -1) /* not a range */
192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
210 if (!do_isalpha || isalpha(c)
211#ifdef FEAT_FKMAP
212 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
213#endif
214 )
215 {
216 if (i == 0) /* (re)set ID flag */
217 {
218 if (tilde)
219 chartab[c] &= ~CT_ID_CHAR;
220 else
221 chartab[c] |= CT_ID_CHAR;
222 }
223 else if (i == 1) /* (re)set printable */
224 {
225 if ((c < ' '
226#ifndef EBCDIC
227 || c > '~'
228#endif
229#ifdef FEAT_FKMAP
230 || (p_altkeymap
231 && (F_isalpha(c) || F_isdigit(c)))
232#endif
233 )
234#ifdef FEAT_MBYTE
235 /* For double-byte we keep the cell width, so
236 * that we can detect it from the first byte. */
237 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
238#endif
239 )
240 {
241 if (tilde)
242 {
243 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
244 + ((dy_flags & DY_UHEX) ? 4 : 2);
245 chartab[c] &= ~CT_PRINT_CHAR;
246 }
247 else
248 {
249 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
250 chartab[c] |= CT_PRINT_CHAR;
251 }
252 }
253 }
254 else if (i == 2) /* (re)set fname flag */
255 {
256 if (tilde)
257 chartab[c] &= ~CT_FNAME_CHAR;
258 else
259 chartab[c] |= CT_FNAME_CHAR;
260 }
261 else /* i == 3 */ /* (re)set keyword flag */
262 {
263 if (tilde)
264 RESET_CHARTAB(buf, c);
265 else
266 SET_CHARTAB(buf, c);
267 }
268 }
269 ++c;
270 }
271 p = skip_to_option_part(p);
272 }
273 }
274 chartab_initialized = TRUE;
275 return OK;
276}
277
278/*
279 * Translate any special characters in buf[bufsize] in-place.
280 * The result is a string with only printable characters, but if there is not
281 * enough room, not all characters will be translated.
282 */
283 void
284trans_characters(buf, bufsize)
285 char_u *buf;
286 int bufsize;
287{
288 int len; /* length of string needing translation */
289 int room; /* room in buffer after string */
290 char_u *trs; /* translated character */
291 int trs_len; /* length of trs[] */
292
293 len = (int)STRLEN(buf);
294 room = bufsize - len;
295 while (*buf != 0)
296 {
297# ifdef FEAT_MBYTE
298 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000299 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 len -= trs_len;
301 else
302# endif
303 {
304 trs = transchar_byte(*buf);
305 trs_len = (int)STRLEN(trs);
306 if (trs_len > 1)
307 {
308 room -= trs_len - 1;
309 if (room <= 0)
310 return;
311 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
312 }
313 mch_memmove(buf, trs, (size_t)trs_len);
314 --len;
315 }
316 buf += trs_len;
317 }
318}
319
320#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(PROTO)
321/*
322 * Translate a string into allocated memory, replacing special chars with
323 * printable chars. Returns NULL when out of memory.
324 */
325 char_u *
326transstr(s)
327 char_u *s;
328{
329 char_u *res;
330 char_u *p;
331#ifdef FEAT_MBYTE
332 int l, len, c;
333 char_u hexbuf[11];
334#endif
335
336#ifdef FEAT_MBYTE
337 if (has_mbyte)
338 {
339 /* Compute the length of the result, taking account of unprintable
340 * multi-byte characters. */
341 len = 0;
342 p = s;
343 while (*p != NUL)
344 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000345 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346 {
347 c = (*mb_ptr2char)(p);
348 p += l;
349 if (vim_isprintc(c))
350 len += l;
351 else
352 {
353 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000354 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000355 }
356 }
357 else
358 {
359 l = byte2cells(*p++);
360 if (l > 0)
361 len += l;
362 else
363 len += 4; /* illegal byte sequence */
364 }
365 }
366 res = alloc((unsigned)(len + 1));
367 }
368 else
369#endif
370 res = alloc((unsigned)(vim_strsize(s) + 1));
371 if (res != NULL)
372 {
373 *res = NUL;
374 p = s;
375 while (*p != NUL)
376 {
377#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000378 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000379 {
380 c = (*mb_ptr2char)(p);
381 if (vim_isprintc(c))
382 STRNCAT(res, p, l); /* append printable multi-byte char */
383 else
384 transchar_hex(res + STRLEN(res), c);
385 p += l;
386 }
387 else
388#endif
389 STRCAT(res, transchar_byte(*p++));
390 }
391 }
392 return res;
393}
394#endif
395
396#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
397/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000398 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
399 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000400 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
401 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402 */
403 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000405 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000406 int orglen;
407 char_u *buf;
408 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000409{
410 garray_T ga;
411 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000412 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413
414#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
415#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
417#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000418
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000419 /* Copy "str" into "buf" or allocated memory, unmodified. */
420 if (buf == NULL)
421 {
422 ga_init2(&ga, 1, 10);
423 if (ga_grow(&ga, len + 1) == FAIL)
424 return NULL;
425 mch_memmove(ga.ga_data, str, (size_t)len);
426 ga.ga_len = len;
427 }
428 else
429 {
430 if (len >= buflen) /* Ugly! */
431 len = buflen - 1;
432 mch_memmove(buf, str, (size_t)len);
433 }
434 if (buf == NULL)
435 GA_CHAR(len) = NUL;
436 else
437 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000438
439 /* Make each character lower case. */
440 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000441 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442 {
443#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000444 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445 {
446 if (enc_utf8)
447 {
448 int c, lc;
449
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000450 c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 lc = utf_tolower(c);
452 if (c != lc)
453 {
454 int ol = utf_char2len(c);
455 int nl = utf_char2len(lc);
456
457 /* If the byte length changes need to shift the following
458 * characters forward or backward. */
459 if (ol != nl)
460 {
461 if (nl > ol)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000462 {
463 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
464 : len + nl - ol >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000465 {
466 /* out of memory, keep old char */
467 lc = c;
468 nl = ol;
469 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000470 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 if (ol != nl)
472 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000473 if (buf == NULL)
474 {
475 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000476 STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000477 ga.ga_len += nl - ol;
478 }
479 else
480 {
481 mch_memmove(buf + i + nl, buf + i + ol,
482 STRLEN(buf + i + ol) + 1);
483 len += nl - ol;
484 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000485 }
486 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000487 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000488 }
489 }
490 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000491 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492 }
493 else
494#endif
495 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000496 if (buf == NULL)
497 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
498 else
499 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 ++i;
501 }
502 }
503
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000504 if (buf == NULL)
505 return (char_u *)ga.ga_data;
506 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000507}
508#endif
509
510/*
511 * Catch 22: chartab[] can't be initialized before the options are
512 * initialized, and initializing options may cause transchar() to be called!
513 * When chartab_initialized == FALSE don't use chartab[].
514 * Does NOT work for multi-byte characters, c must be <= 255.
515 * Also doesn't work for the first byte of a multi-byte, "c" must be a
516 * character!
517 */
518static char_u transchar_buf[7];
519
520 char_u *
521transchar(c)
522 int c;
523{
524 int i;
525
526 i = 0;
527 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
528 {
529 transchar_buf[0] = '~';
530 transchar_buf[1] = '@';
531 i = 2;
532 c = K_SECOND(c);
533 }
534
535 if ((!chartab_initialized && (
536#ifdef EBCDIC
537 (c >= 64 && c < 255)
538#else
539 (c >= ' ' && c <= '~')
540#endif
541#ifdef FEAT_FKMAP
542 || F_ischar(c)
543#endif
544 )) || (c < 256 && vim_isprintc_strict(c)))
545 {
546 /* printable character */
547 transchar_buf[i] = c;
548 transchar_buf[i + 1] = NUL;
549 }
550 else
551 transchar_nonprint(transchar_buf + i, c);
552 return transchar_buf;
553}
554
555#if defined(FEAT_MBYTE) || defined(PROTO)
556/*
557 * Like transchar(), but called with a byte instead of a character. Checks
558 * for an illegal UTF-8 byte.
559 */
560 char_u *
561transchar_byte(c)
562 int c;
563{
564 if (enc_utf8 && c >= 0x80)
565 {
566 transchar_nonprint(transchar_buf, c);
567 return transchar_buf;
568 }
569 return transchar(c);
570}
571#endif
572
573/*
574 * Convert non-printable character to two or more printable characters in
575 * "buf[]". "buf" needs to be able to hold five bytes.
576 * Does NOT work for multi-byte characters, c must be <= 255.
577 */
578 void
579transchar_nonprint(buf, c)
580 char_u *buf;
581 int c;
582{
583 if (c == NL)
584 c = NUL; /* we use newline in place of a NUL */
585 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
586 c = NL; /* we use CR in place of NL in this case */
587
588 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
589 transchar_hex(buf, c);
590
591#ifdef EBCDIC
592 /* For EBCDIC only the characters 0-63 and 255 are not printable */
593 else if (CtrlChar(c) != 0 || c == DEL)
594#else
595 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
596#endif
597 {
598 buf[0] = '^';
599#ifdef EBCDIC
600 if (c == DEL)
601 buf[1] = '?'; /* DEL displayed as ^? */
602 else
603 buf[1] = CtrlChar(c);
604#else
605 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
606#endif
607
608 buf[2] = NUL;
609 }
610#ifdef FEAT_MBYTE
611 else if (enc_utf8 && c >= 0x80)
612 {
613 transchar_hex(buf, c);
614 }
615#endif
616#ifndef EBCDIC
617 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
618 {
619 buf[0] = '|';
620 buf[1] = c - 0x80;
621 buf[2] = NUL;
622 }
623#else
624 else if (c < 64)
625 {
626 buf[0] = '~';
627 buf[1] = MetaChar(c);
628 buf[2] = NUL;
629 }
630#endif
631 else /* 0x80 - 0x9f and 0xff */
632 {
633 /*
634 * TODO: EBCDIC I don't know what to do with this chars, so I display
635 * them as '~?' for now
636 */
637 buf[0] = '~';
638#ifdef EBCDIC
639 buf[1] = '?'; /* 0xff displayed as ~? */
640#else
641 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
642#endif
643 buf[2] = NUL;
644 }
645}
646
647 void
648transchar_hex(buf, c)
649 char_u *buf;
650 int c;
651{
652 int i = 0;
653
654 buf[0] = '<';
655#ifdef FEAT_MBYTE
656 if (c > 255)
657 {
658 buf[++i] = nr2hex((unsigned)c >> 12);
659 buf[++i] = nr2hex((unsigned)c >> 8);
660 }
661#endif
662 buf[++i] = nr2hex((unsigned)c >> 4);
663 buf[++i] = nr2hex(c);
664 buf[++i] = '>';
665 buf[++i] = NUL;
666}
667
668/*
669 * Convert the lower 4 bits of byte "c" to its hex character.
670 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
671 * function key 1.
672 */
673 static int
674nr2hex(c)
675 int c;
676{
677 if ((c & 0xf) <= 9)
678 return (c & 0xf) + '0';
679 return (c & 0xf) - 10 + 'a';
680}
681
682/*
683 * Return number of display cells occupied by byte "b".
684 * Caller must make sure 0 <= b <= 255.
685 * For multi-byte mode "b" must be the first byte of a character.
686 * A TAB is counted as two cells: "^I".
687 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
688 * cells depends on further bytes.
689 */
690 int
691byte2cells(b)
692 int b;
693{
694#ifdef FEAT_MBYTE
695 if (enc_utf8 && b >= 0x80)
696 return 0;
697#endif
698 return (chartab[b] & CT_CELL_MASK);
699}
700
701/*
702 * Return number of display cells occupied by character "c".
703 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
704 * A TAB is counted as two cells: "^I" or four: "<09>".
705 */
706 int
707char2cells(c)
708 int c;
709{
710 if (IS_SPECIAL(c))
711 return char2cells(K_SECOND(c)) + 2;
712#ifdef FEAT_MBYTE
713 if (c >= 0x80)
714 {
715 /* UTF-8: above 0x80 need to check the value */
716 if (enc_utf8)
717 return utf_char2cells(c);
718 /* DBCS: double-byte means double-width, except for euc-jp with first
719 * byte 0x8e */
720 if (enc_dbcs != 0 && c >= 0x100)
721 {
722 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
723 return 1;
724 return 2;
725 }
726 }
727#endif
728 return (chartab[c & 0xff] & CT_CELL_MASK);
729}
730
731/*
732 * Return number of display cells occupied by character at "*p".
733 * A TAB is counted as two cells: "^I" or four: "<09>".
734 */
735 int
736ptr2cells(p)
737 char_u *p;
738{
739#ifdef FEAT_MBYTE
740 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
741 if (enc_utf8 && *p >= 0x80)
742 return utf_ptr2cells(p);
743 /* For DBCS we can tell the cell count from the first byte. */
744#endif
745 return (chartab[*p] & CT_CELL_MASK);
746}
747
748/*
749 * Return the number of characters string "s" will take on the screen,
750 * counting TABs as two characters: "^I".
751 */
752 int
753vim_strsize(s)
754 char_u *s;
755{
756 return vim_strnsize(s, (int)MAXCOL);
757}
758
759/*
760 * Return the number of characters string "s[len]" will take on the screen,
761 * counting TABs as two characters: "^I".
762 */
763 int
764vim_strnsize(s, len)
765 char_u *s;
766 int len;
767{
768 int size = 0;
769
770 while (*s != NUL && --len >= 0)
771 {
772#ifdef FEAT_MBYTE
773 if (has_mbyte)
774 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000775 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000776
777 size += ptr2cells(s);
778 s += l;
779 len -= l - 1;
780 }
781 else
782#endif
783 size += byte2cells(*s++);
784 }
785 return size;
786}
787
788/*
789 * Return the number of characters 'c' will take on the screen, taking
790 * into account the size of a tab.
791 * Use a define to make it fast, this is used very often!!!
792 * Also see getvcol() below.
793 */
794
795#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
796 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
797 { \
798 int ts; \
799 ts = (buf)->b_p_ts; \
800 return (int)(ts - (col % ts)); \
801 } \
802 else \
803 return ptr2cells(p);
804
805#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
806 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
807 int
808chartabsize(p, col)
809 char_u *p;
810 colnr_T col;
811{
812 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
813}
814#endif
815
816#ifdef FEAT_LINEBREAK
817 static int
818win_chartabsize(wp, p, col)
819 win_T *wp;
820 char_u *p;
821 colnr_T col;
822{
823 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
824}
825#endif
826
827/*
828 * return the number of characters the string 's' will take on the screen,
829 * taking into account the size of a tab
830 */
831 int
832linetabsize(s)
833 char_u *s;
834{
835 colnr_T col = 0;
836
837 while (*s != NUL)
838 col += lbr_chartabsize_adv(&s, col);
839 return (int)col;
840}
841
842/*
843 * Like linetabsize(), but for a given window instead of the current one.
844 */
845 int
846win_linetabsize(wp, p, len)
847 win_T *wp;
848 char_u *p;
849 colnr_T len;
850{
851 colnr_T col = 0;
852 char_u *s;
853
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000854 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000856 return (int)col;
857}
858
859/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000860 * Return TRUE if 'c' is a normal identifier character:
861 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000862 */
863 int
864vim_isIDc(c)
865 int c;
866{
867 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
868}
869
870/*
871 * return TRUE if 'c' is a keyword character: Letters and characters from
872 * 'iskeyword' option for current buffer.
873 * For multi-byte characters mb_get_class() is used (builtin rules).
874 */
875 int
876vim_iswordc(c)
877 int c;
878{
879#ifdef FEAT_MBYTE
880 if (c >= 0x100)
881 {
882 if (enc_dbcs != 0)
883 return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
884 if (enc_utf8)
885 return utf_class(c) >= 2;
886 }
887#endif
888 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
889}
890
891/*
892 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
893 */
894 int
895vim_iswordp(p)
896 char_u *p;
897{
898#ifdef FEAT_MBYTE
899 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
900 return mb_get_class(p) >= 2;
901#endif
902 return GET_CHARTAB(curbuf, *p) != 0;
903}
904
905#if defined(FEAT_SYN_HL) || defined(PROTO)
906 int
907vim_iswordc_buf(p, buf)
908 char_u *p;
909 buf_T *buf;
910{
911# ifdef FEAT_MBYTE
912 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
913 return mb_get_class(p) >= 2;
914# endif
915 return (GET_CHARTAB(buf, *p) != 0);
916}
Bram Moolenaarc4956c82006-03-12 21:58:43 +0000917#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000918
919/*
920 * return TRUE if 'c' is a valid file-name character
921 * Assume characters above 0x100 are valid (multi-byte).
922 */
923 int
924vim_isfilec(c)
925 int c;
926{
927 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
928}
929
930/*
931 * return TRUE if 'c' is a printable character
932 * Assume characters above 0x100 are printable (multi-byte), except for
933 * Unicode.
934 */
935 int
936vim_isprintc(c)
937 int c;
938{
939#ifdef FEAT_MBYTE
940 if (enc_utf8 && c >= 0x100)
941 return utf_printable(c);
942#endif
943 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
944}
945
946/*
947 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
948 * byte of a double-byte character.
949 */
950 int
951vim_isprintc_strict(c)
952 int c;
953{
954#ifdef FEAT_MBYTE
955 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
956 return FALSE;
957 if (enc_utf8 && c >= 0x100)
958 return utf_printable(c);
959#endif
960 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
961}
962
963/*
964 * like chartabsize(), but also check for line breaks on the screen
965 */
966 int
967lbr_chartabsize(s, col)
968 unsigned char *s;
969 colnr_T col;
970{
971#ifdef FEAT_LINEBREAK
972 if (!curwin->w_p_lbr && *p_sbr == NUL)
973 {
974#endif
975#ifdef FEAT_MBYTE
976 if (curwin->w_p_wrap)
977 return win_nolbr_chartabsize(curwin, s, col, NULL);
978#endif
979 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
980#ifdef FEAT_LINEBREAK
981 }
982 return win_lbr_chartabsize(curwin, s, col, NULL);
983#endif
984}
985
986/*
987 * Call lbr_chartabsize() and advance the pointer.
988 */
989 int
990lbr_chartabsize_adv(s, col)
991 char_u **s;
992 colnr_T col;
993{
994 int retval;
995
996 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +0000997 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000998 return retval;
999}
1000
1001/*
1002 * This function is used very often, keep it fast!!!!
1003 *
1004 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1005 * string at start of line. Warning: *headp is only set if it's a non-zero
1006 * value, init to 0 before calling.
1007 */
1008/*ARGSUSED*/
1009 int
1010win_lbr_chartabsize(wp, s, col, headp)
1011 win_T *wp;
1012 char_u *s;
1013 colnr_T col;
1014 int *headp;
1015{
1016#ifdef FEAT_LINEBREAK
1017 int c;
1018 int size;
1019 colnr_T col2;
1020 colnr_T colmax;
1021 int added;
1022# ifdef FEAT_MBYTE
1023 int mb_added = 0;
1024# else
1025# define mb_added 0
1026# endif
1027 int numberextra;
1028 char_u *ps;
1029 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001030 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001031
1032 /*
1033 * No 'linebreak' and 'showbreak': return quickly.
1034 */
1035 if (!wp->w_p_lbr && *p_sbr == NUL)
1036#endif
1037 {
1038#ifdef FEAT_MBYTE
1039 if (wp->w_p_wrap)
1040 return win_nolbr_chartabsize(wp, s, col, headp);
1041#endif
1042 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1043 }
1044
1045#ifdef FEAT_LINEBREAK
1046 /*
1047 * First get normal size, without 'linebreak'
1048 */
1049 size = win_chartabsize(wp, s, col);
1050 c = *s;
1051
1052 /*
1053 * If 'linebreak' set check at a blank before a non-blank if the line
1054 * needs a break here
1055 */
1056 if (wp->w_p_lbr
1057 && vim_isbreak(c)
1058 && !vim_isbreak(s[1])
1059 && !wp->w_p_list
1060 && wp->w_p_wrap
1061# ifdef FEAT_VERTSPLIT
1062 && wp->w_width != 0
1063# endif
1064 )
1065 {
1066 /*
1067 * Count all characters from first non-blank after a blank up to next
1068 * non-blank after a blank.
1069 */
1070 numberextra = win_col_off(wp);
1071 col2 = col;
1072 colmax = W_WIDTH(wp) - numberextra;
1073 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001074 {
1075 n = colmax + win_col_off2(wp);
1076 if (n > 0)
1077 colmax += (((col - colmax) / n) + 1) * n;
1078 }
1079
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080 for (;;)
1081 {
1082 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001083 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001084 c = *s;
1085 if (!(c != NUL
1086 && (vim_isbreak(c)
1087 || (!vim_isbreak(c)
1088 && (col2 == col || !vim_isbreak(*ps))))))
1089 break;
1090
1091 col2 += win_chartabsize(wp, s, col2);
1092 if (col2 >= colmax) /* doesn't fit */
1093 {
1094 size = colmax - col;
1095 tab_corr = FALSE;
1096 break;
1097 }
1098 }
1099 }
1100# ifdef FEAT_MBYTE
1101 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1102 && wp->w_p_wrap && in_win_border(wp, col))
1103 {
1104 ++size; /* Count the ">" in the last column. */
1105 mb_added = 1;
1106 }
1107# endif
1108
1109 /*
1110 * May have to add something for 'showbreak' string at start of line
1111 * Set *headp to the size of what we add.
1112 */
1113 added = 0;
1114 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1115 {
1116 numberextra = win_col_off(wp);
1117 col += numberextra + mb_added;
1118 if (col >= (colnr_T)W_WIDTH(wp))
1119 {
1120 col -= W_WIDTH(wp);
1121 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1122 if (numberextra > 0)
1123 col = col % numberextra;
1124 }
1125 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1126 {
1127 added = vim_strsize(p_sbr);
1128 if (tab_corr)
1129 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1130 else
1131 size += added;
1132 if (col != 0)
1133 added = 0;
1134 }
1135 }
1136 if (headp != NULL)
1137 *headp = added + mb_added;
1138 return size;
1139#endif
1140}
1141
1142#if defined(FEAT_MBYTE) || defined(PROTO)
1143/*
1144 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1145 * 'wrap' is on. This means we need to check for a double-byte character that
1146 * doesn't fit at the end of the screen line.
1147 */
1148 static int
1149win_nolbr_chartabsize(wp, s, col, headp)
1150 win_T *wp;
1151 char_u *s;
1152 colnr_T col;
1153 int *headp;
1154{
1155 int n;
1156
1157 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1158 {
1159 n = wp->w_buffer->b_p_ts;
1160 return (int)(n - (col % n));
1161 }
1162 n = ptr2cells(s);
1163 /* Add one cell for a double-width character in the last column of the
1164 * window, displayed with a ">". */
1165 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1166 {
1167 if (headp != NULL)
1168 *headp = 1;
1169 return 3;
1170 }
1171 return n;
1172}
1173
1174/*
1175 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1176 * "wp".
1177 */
1178 int
1179in_win_border(wp, vcol)
1180 win_T *wp;
1181 colnr_T vcol;
1182{
1183 colnr_T width1; /* width of first line (after line number) */
1184 colnr_T width2; /* width of further lines */
1185
1186#ifdef FEAT_VERTSPLIT
1187 if (wp->w_width == 0) /* there is no border */
1188 return FALSE;
1189#endif
1190 width1 = W_WIDTH(wp) - win_col_off(wp);
1191 if (vcol < width1 - 1)
1192 return FALSE;
1193 if (vcol == width1 - 1)
1194 return TRUE;
1195 width2 = width1 + win_col_off2(wp);
1196 return ((vcol - width1) % width2 == width2 - 1);
1197}
1198#endif /* FEAT_MBYTE */
1199
1200/*
1201 * Get virtual column number of pos.
1202 * start: on the first position of this character (TAB, ctrl)
1203 * cursor: where the cursor is on this character (first char, except for TAB)
1204 * end: on the last position of this character (TAB, ctrl)
1205 *
1206 * This is used very often, keep it fast!
1207 */
1208 void
1209getvcol(wp, pos, start, cursor, end)
1210 win_T *wp;
1211 pos_T *pos;
1212 colnr_T *start;
1213 colnr_T *cursor;
1214 colnr_T *end;
1215{
1216 colnr_T vcol;
1217 char_u *ptr; /* points to current char */
1218 char_u *posptr; /* points to char at pos->col */
1219 int incr;
1220 int head;
1221 int ts = wp->w_buffer->b_p_ts;
1222 int c;
1223
1224 vcol = 0;
1225 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1226 posptr = ptr + pos->col;
1227
1228 /*
1229 * This function is used very often, do some speed optimizations.
1230 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1231 * Also use this when 'list' is set but tabs take their normal size.
1232 */
1233 if ((!wp->w_p_list || lcs_tab1 != NUL)
1234#ifdef FEAT_LINEBREAK
1235 && !wp->w_p_lbr && *p_sbr == NUL
1236#endif
1237 )
1238 {
1239#ifndef FEAT_MBYTE
1240 head = 0;
1241#endif
1242 for (;;)
1243 {
1244#ifdef FEAT_MBYTE
1245 head = 0;
1246#endif
1247 c = *ptr;
1248 /* make sure we don't go past the end of the line */
1249 if (c == NUL)
1250 {
1251 incr = 1; /* NUL at end of line only takes one column */
1252 break;
1253 }
1254 /* A tab gets expanded, depending on the current column */
1255 if (c == TAB)
1256 incr = ts - (vcol % ts);
1257 else
1258 {
1259#ifdef FEAT_MBYTE
1260 if (has_mbyte)
1261 {
1262 /* For utf-8, if the byte is >= 0x80, need to look at
1263 * further bytes to find the cell width. */
1264 if (enc_utf8 && c >= 0x80)
1265 incr = utf_ptr2cells(ptr);
1266 else
1267 incr = CHARSIZE(c);
1268
1269 /* If a double-cell char doesn't fit at the end of a line
1270 * it wraps to the next line, it's like this char is three
1271 * cells wide. */
1272 if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1273 {
1274 ++incr;
1275 head = 1;
1276 }
1277 }
1278 else
1279#endif
1280 incr = CHARSIZE(c);
1281 }
1282
1283 if (ptr >= posptr) /* character at pos->col */
1284 break;
1285
1286 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001287 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001288 }
1289 }
1290 else
1291 {
1292 for (;;)
1293 {
1294 /* A tab gets expanded, depending on the current column */
1295 head = 0;
1296 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1297 /* make sure we don't go past the end of the line */
1298 if (*ptr == NUL)
1299 {
1300 incr = 1; /* NUL at end of line only takes one column */
1301 break;
1302 }
1303
1304 if (ptr >= posptr) /* character at pos->col */
1305 break;
1306
1307 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001308 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001309 }
1310 }
1311 if (start != NULL)
1312 *start = vcol + head;
1313 if (end != NULL)
1314 *end = vcol + incr - 1;
1315 if (cursor != NULL)
1316 {
1317 if (*ptr == TAB
1318 && (State & NORMAL)
1319 && !wp->w_p_list
1320 && !virtual_active()
1321#ifdef FEAT_VISUAL
1322 && !(VIsual_active
1323 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1324#endif
1325 )
1326 *cursor = vcol + incr - 1; /* cursor at end */
1327 else
1328 *cursor = vcol + head; /* cursor at start */
1329 }
1330}
1331
1332/*
1333 * Get virtual cursor column in the current window, pretending 'list' is off.
1334 */
1335 colnr_T
1336getvcol_nolist(posp)
1337 pos_T *posp;
1338{
1339 int list_save = curwin->w_p_list;
1340 colnr_T vcol;
1341
1342 curwin->w_p_list = FALSE;
1343 getvcol(curwin, posp, NULL, &vcol, NULL);
1344 curwin->w_p_list = list_save;
1345 return vcol;
1346}
1347
1348#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1349/*
1350 * Get virtual column in virtual mode.
1351 */
1352 void
1353getvvcol(wp, pos, start, cursor, end)
1354 win_T *wp;
1355 pos_T *pos;
1356 colnr_T *start;
1357 colnr_T *cursor;
1358 colnr_T *end;
1359{
1360 colnr_T col;
1361 colnr_T coladd;
1362 colnr_T endadd;
1363# ifdef FEAT_MBYTE
1364 char_u *ptr;
1365# endif
1366
1367 if (virtual_active())
1368 {
1369 /* For virtual mode, only want one value */
1370 getvcol(wp, pos, &col, NULL, NULL);
1371
1372 coladd = pos->coladd;
1373 endadd = 0;
1374# ifdef FEAT_MBYTE
1375 /* Cannot put the cursor on part of a wide character. */
1376 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1377 if (pos->col < STRLEN(ptr))
1378 {
1379 int c = (*mb_ptr2char)(ptr + pos->col);
1380
1381 if (c != TAB && vim_isprintc(c))
1382 {
1383 endadd = char2cells(c) - 1;
Bram Moolenaara5792f52005-11-23 21:25:05 +00001384 if (coladd > endadd) /* past end of line */
1385 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386 else
1387 coladd = 0;
1388 }
1389 }
1390# endif
1391 col += coladd;
1392 if (start != NULL)
1393 *start = col;
1394 if (cursor != NULL)
1395 *cursor = col;
1396 if (end != NULL)
1397 *end = col + endadd;
1398 }
1399 else
1400 getvcol(wp, pos, start, cursor, end);
1401}
1402#endif
1403
1404#if defined(FEAT_VISUAL) || defined(PROTO)
1405/*
1406 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1407 * Used for Visual block mode.
1408 */
1409 void
1410getvcols(wp, pos1, pos2, left, right)
1411 win_T *wp;
1412 pos_T *pos1, *pos2;
1413 colnr_T *left, *right;
1414{
1415 colnr_T from1, from2, to1, to2;
1416
1417 if (ltp(pos1, pos2))
1418 {
1419 getvvcol(wp, pos1, &from1, NULL, &to1);
1420 getvvcol(wp, pos2, &from2, NULL, &to2);
1421 }
1422 else
1423 {
1424 getvvcol(wp, pos2, &from1, NULL, &to1);
1425 getvvcol(wp, pos1, &from2, NULL, &to2);
1426 }
1427 if (from2 < from1)
1428 *left = from2;
1429 else
1430 *left = from1;
1431 if (to2 > to1)
1432 {
1433 if (*p_sel == 'e' && from2 - 1 >= to1)
1434 *right = from2 - 1;
1435 else
1436 *right = to2;
1437 }
1438 else
1439 *right = to1;
1440}
1441#endif
1442
1443/*
1444 * skipwhite: skip over ' ' and '\t'.
1445 */
1446 char_u *
1447skipwhite(p)
1448 char_u *p;
1449{
1450 while (vim_iswhite(*p)) /* skip to next non-white */
1451 ++p;
1452 return p;
1453}
1454
1455/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001456 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001457 */
1458 char_u *
1459skipdigits(p)
1460 char_u *p;
1461{
1462 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1463 ++p;
1464 return p;
1465}
1466
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001467#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001468/*
1469 * skip over digits and hex characters
1470 */
1471 char_u *
1472skiphex(p)
1473 char_u *p;
1474{
1475 while (vim_isxdigit(*p)) /* skip to next non-digit */
1476 ++p;
1477 return p;
1478}
1479#endif
1480
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001481#if defined(FEAT_EX_EXTRA) || defined(PROTO)
1482/*
1483 * skip to digit (or NUL after the string)
1484 */
1485 char_u *
1486skiptodigit(p)
1487 char_u *p;
1488{
1489 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1490 ++p;
1491 return p;
1492}
1493
1494/*
1495 * skip to hex character (or NUL after the string)
1496 */
1497 char_u *
1498skiptohex(p)
1499 char_u *p;
1500{
1501 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1502 ++p;
1503 return p;
1504}
1505#endif
1506
Bram Moolenaar071d4272004-06-13 20:20:40 +00001507/*
1508 * Variant of isdigit() that can handle characters > 0x100.
1509 * We don't use isdigit() here, because on some systems it also considers
1510 * superscript 1 to be a digit.
1511 * Use the VIM_ISDIGIT() macro for simple arguments.
1512 */
1513 int
1514vim_isdigit(c)
1515 int c;
1516{
1517 return (c >= '0' && c <= '9');
1518}
1519
1520/*
1521 * Variant of isxdigit() that can handle characters > 0x100.
1522 * We don't use isxdigit() here, because on some systems it also considers
1523 * superscript 1 to be a digit.
1524 */
1525 int
1526vim_isxdigit(c)
1527 int c;
1528{
1529 return (c >= '0' && c <= '9')
1530 || (c >= 'a' && c <= 'f')
1531 || (c >= 'A' && c <= 'F');
1532}
1533
Bram Moolenaar78622822005-08-23 21:00:13 +00001534#if defined(FEAT_MBYTE) || defined(PROTO)
1535/*
1536 * Vim's own character class functions. These exist because many library
1537 * islower()/toupper() etc. do not work properly: they crash when used with
1538 * invalid values or can't handle latin1 when the locale is C.
1539 * Speed is most important here.
1540 */
1541#define LATIN1LOWER 'l'
1542#define LATIN1UPPER 'U'
1543
1544/* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001545static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1546static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1547static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
Bram Moolenaar78622822005-08-23 21:00:13 +00001548
1549 int
1550vim_islower(c)
1551 int c;
1552{
1553 if (c <= '@')
1554 return FALSE;
1555 if (c >= 0x80)
1556 {
1557 if (enc_utf8)
1558 return utf_islower(c);
1559 if (c >= 0x100)
1560 {
1561#ifdef HAVE_ISWLOWER
1562 if (has_mbyte)
1563 return iswlower(c);
1564#endif
1565 /* islower() can't handle these chars and may crash */
1566 return FALSE;
1567 }
1568 if (enc_latin1like)
1569 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1570 }
1571 return islower(c);
1572}
1573
1574 int
1575vim_isupper(c)
1576 int c;
1577{
1578 if (c <= '@')
1579 return FALSE;
1580 if (c >= 0x80)
1581 {
1582 if (enc_utf8)
1583 return utf_isupper(c);
1584 if (c >= 0x100)
1585 {
1586#ifdef HAVE_ISWUPPER
1587 if (has_mbyte)
1588 return iswupper(c);
1589#endif
1590 /* islower() can't handle these chars and may crash */
1591 return FALSE;
1592 }
1593 if (enc_latin1like)
1594 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1595 }
1596 return isupper(c);
1597}
1598
1599 int
1600vim_toupper(c)
1601 int c;
1602{
1603 if (c <= '@')
1604 return c;
1605 if (c >= 0x80)
1606 {
1607 if (enc_utf8)
1608 return utf_toupper(c);
1609 if (c >= 0x100)
1610 {
1611#ifdef HAVE_TOWUPPER
1612 if (has_mbyte)
1613 return towupper(c);
1614#endif
1615 /* toupper() can't handle these chars and may crash */
1616 return c;
1617 }
1618 if (enc_latin1like)
1619 return latin1upper[c];
1620 }
1621 return TOUPPER_LOC(c);
1622}
1623
1624 int
1625vim_tolower(c)
1626 int c;
1627{
1628 if (c <= '@')
1629 return c;
1630 if (c >= 0x80)
1631 {
1632 if (enc_utf8)
1633 return utf_tolower(c);
1634 if (c >= 0x100)
1635 {
1636#ifdef HAVE_TOWLOWER
1637 if (has_mbyte)
1638 return towlower(c);
1639#endif
1640 /* tolower() can't handle these chars and may crash */
1641 return c;
1642 }
1643 if (enc_latin1like)
1644 return latin1lower[c];
1645 }
1646 return TOLOWER_LOC(c);
1647}
1648#endif
1649
Bram Moolenaar071d4272004-06-13 20:20:40 +00001650/*
1651 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1652 */
1653 char_u *
1654skiptowhite(p)
1655 char_u *p;
1656{
1657 while (*p != ' ' && *p != '\t' && *p != NUL)
1658 ++p;
1659 return p;
1660}
1661
1662#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1663 || defined(PROTO)
1664/*
1665 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1666 */
1667 char_u *
1668skiptowhite_esc(p)
1669 char_u *p;
1670{
1671 while (*p != ' ' && *p != '\t' && *p != NUL)
1672 {
1673 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1674 ++p;
1675 ++p;
1676 }
1677 return p;
1678}
1679#endif
1680
1681/*
1682 * Getdigits: Get a number from a string and skip over it.
1683 * Note: the argument is a pointer to a char_u pointer!
1684 */
1685 long
1686getdigits(pp)
1687 char_u **pp;
1688{
1689 char_u *p;
1690 long retval;
1691
1692 p = *pp;
1693 retval = atol((char *)p);
1694 if (*p == '-') /* skip negative sign */
1695 ++p;
1696 p = skipdigits(p); /* skip to next non-digit */
1697 *pp = p;
1698 return retval;
1699}
1700
1701/*
1702 * Return TRUE if "lbuf" is empty or only contains blanks.
1703 */
1704 int
1705vim_isblankline(lbuf)
1706 char_u *lbuf;
1707{
1708 char_u *p;
1709
1710 p = skipwhite(lbuf);
1711 return (*p == NUL || *p == '\r' || *p == '\n');
1712}
1713
1714/*
1715 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001716 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001717 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1718 * 0 decimal
1719 * '0' octal
1720 * 'X' hex
1721 * 'x' hex
1722 * If "len" is not NULL, the length of the number in characters is returned.
1723 * If "nptr" is not NULL, the signed result is returned in it.
1724 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001725 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001726 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1727 * octal number.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001728 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001729 * hex number.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001730 */
1731 void
1732vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1733 char_u *start;
1734 int *hexp; /* return: type of number 0 = decimal, 'x'
1735 or 'X' is hex, '0' = octal */
1736 int *len; /* return: detected length of number */
1737 int dooct; /* recognize octal number */
1738 int dohex; /* recognize hex number */
1739 long *nptr; /* return: signed result */
1740 unsigned long *unptr; /* return: unsigned result */
1741{
1742 char_u *ptr = start;
1743 int hex = 0; /* default is decimal */
1744 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001745 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001746 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001747
1748 if (ptr[0] == '-')
1749 {
1750 negative = TRUE;
1751 ++ptr;
1752 }
1753
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001754 /* Recognize hex and octal. */
1755 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001756 {
1757 hex = ptr[1];
1758 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1759 ptr += 2; /* hexadecimal */
1760 else
1761 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001762 hex = 0; /* default is decimal */
1763 if (dooct)
1764 {
1765 /* Don't interpret "0", "08" or "0129" as octal. */
1766 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1767 {
1768 if (ptr[n] > '7')
1769 {
1770 hex = 0; /* can't be octal */
1771 break;
1772 }
1773 if (ptr[n] > '0')
1774 hex = '0'; /* assume octal */
1775 }
1776 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001777 }
1778 }
1779
1780 /*
1781 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1782 */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001783 if (hex == '0' || dooct > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001784 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001785 /* octal */
1786 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001787 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001788 un = 8 * un + (unsigned long)(*ptr - '0');
1789 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001790 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001791 }
1792 else if (hex != 0 || dohex > 1)
1793 {
1794 /* hex */
1795 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001796 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001797 un = 16 * un + (unsigned long)hex2nr(*ptr);
1798 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001799 }
1800 }
1801 else
1802 {
1803 /* decimal */
1804 while (VIM_ISDIGIT(*ptr))
1805 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001806 un = 10 * un + (unsigned long)(*ptr - '0');
1807 ++ptr;
1808 }
1809 }
1810
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811 if (hexp != NULL)
1812 *hexp = hex;
1813 if (len != NULL)
1814 *len = (int)(ptr - start);
1815 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001816 {
1817 if (negative) /* account for leading '-' for decimal numbers */
1818 *nptr = -(long)un;
1819 else
1820 *nptr = (long)un;
1821 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001822 if (unptr != NULL)
1823 *unptr = un;
1824}
1825
1826/*
1827 * Return the value of a single hex character.
1828 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1829 */
1830 int
1831hex2nr(c)
1832 int c;
1833{
1834 if (c >= 'a' && c <= 'f')
1835 return c - 'a' + 10;
1836 if (c >= 'A' && c <= 'F')
1837 return c - 'A' + 10;
1838 return c - '0';
1839}
1840
1841#if defined(FEAT_TERMRESPONSE) \
1842 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1843/*
1844 * Convert two hex characters to a byte.
1845 * Return -1 if one of the characters is not hex.
1846 */
1847 int
1848hexhex2nr(p)
1849 char_u *p;
1850{
1851 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1852 return -1;
1853 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1854}
1855#endif
1856
1857/*
1858 * Return TRUE if "str" starts with a backslash that should be removed.
1859 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1860 * backslash is not a normal file name character.
1861 * '$' is a valid file name character, we don't remove the backslash before
1862 * it. This means it is not possible to use an environment variable after a
1863 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1864 * Although "\ name" is valid, the backslash in "Program\ files" must be
1865 * removed. Assume a file name doesn't start with a space.
1866 * For multi-byte names, never remove a backslash before a non-ascii
1867 * character, assume that all multi-byte characters are valid file name
1868 * characters.
1869 */
1870 int
1871rem_backslash(str)
1872 char_u *str;
1873{
1874#ifdef BACKSLASH_IN_FILENAME
1875 return (str[0] == '\\'
1876# ifdef FEAT_MBYTE
1877 && str[1] < 0x80
1878# endif
1879 && (str[1] == ' '
1880 || (str[1] != NUL
1881 && str[1] != '*'
1882 && str[1] != '?'
1883 && !vim_isfilec(str[1]))));
1884#else
1885 return (str[0] == '\\' && str[1] != NUL);
1886#endif
1887}
1888
1889/*
1890 * Halve the number of backslashes in a file name argument.
1891 * For MS-DOS we only do this if the character after the backslash
1892 * is not a normal file character.
1893 */
1894 void
1895backslash_halve(p)
1896 char_u *p;
1897{
1898 for ( ; *p; ++p)
1899 if (rem_backslash(p))
1900 STRCPY(p, p + 1);
1901}
1902
1903/*
1904 * backslash_halve() plus save the result in allocated memory.
1905 */
1906 char_u *
1907backslash_halve_save(p)
1908 char_u *p;
1909{
1910 char_u *res;
1911
1912 res = vim_strsave(p);
1913 if (res == NULL)
1914 return p;
1915 backslash_halve(res);
1916 return res;
1917}
1918
1919#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1920/*
1921 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1922 * The first 64 entries have been added to map control characters defined in
1923 * ascii.h
1924 */
1925static char_u ebcdic2ascii_tab[256] =
1926{
1927 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1928 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1929 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1930 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1931 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1932 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1933 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1934 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1935 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1936 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1937 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1938 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1939 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1940 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1941 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1942 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1943 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1944 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1945 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1946 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1947 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1948 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1949 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1950 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1951 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1952 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1953 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1954 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1955 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1956 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1957 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1958 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1959};
1960
1961/*
1962 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1963 * wanting 7-bit ASCII characters out the other end.
1964 */
1965 void
1966ebcdic2ascii(buffer, len)
1967 char_u *buffer;
1968 int len;
1969{
1970 int i;
1971
1972 for (i = 0; i < len; i++)
1973 buffer[i] = ebcdic2ascii_tab[buffer[i]];
1974}
1975#endif