blob: 5a2952cd6c4eb6e9ae24853ae7d61d8cb883ecde [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
17static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18#endif
19
20static int nr2hex __ARGS((int c));
21
22static int chartab_initialized = FALSE;
23
24/* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
29
30/*
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
33 *
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
36 *
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
43 *
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
52 *
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
55 */
56 int
57init_chartab()
58{
59 return buf_init_chartab(curbuf, TRUE);
60}
61
62 int
63buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
66{
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
73
74 if (global)
75 {
76 /*
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
80 *
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
83 */
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87#ifdef EBCDIC
88 while (c < 255)
89#else
90 while (c <= '~')
91#endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93#ifdef FEAT_FKMAP
94 if (p_altkeymap)
95 {
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
98 }
99#endif
100 while (c < 256)
101 {
102#ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113#endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
116 }
117
118#ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125#endif
126 }
127
128 /*
129 * Init word char flags all to FALSE
130 */
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
135 {
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
139 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140#endif
141
142#ifdef FEAT_LISP
143 /*
144 * In lisp mode the '-' character is included in keywords.
145 */
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148#endif
149
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
153 */
154 for (i = global ? 0 : 3; i <= 3; ++i)
155 {
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
164
165 while (*p)
166 {
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
170 {
171 tilde = TRUE;
172 ++p;
173 }
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
185 c2 = *p++;
186 }
187 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
191 if (c2 == -1) /* not a range */
192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000210 /* Use the MB_ functions here, because isalpha() doesn't
211 * work properly when 'encoding' is "latin1" and the locale is
212 * "C". */
213 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000214#ifdef FEAT_FKMAP
215 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
216#endif
217 )
218 {
219 if (i == 0) /* (re)set ID flag */
220 {
221 if (tilde)
222 chartab[c] &= ~CT_ID_CHAR;
223 else
224 chartab[c] |= CT_ID_CHAR;
225 }
226 else if (i == 1) /* (re)set printable */
227 {
228 if ((c < ' '
229#ifndef EBCDIC
230 || c > '~'
231#endif
232#ifdef FEAT_FKMAP
233 || (p_altkeymap
234 && (F_isalpha(c) || F_isdigit(c)))
235#endif
236 )
237#ifdef FEAT_MBYTE
238 /* For double-byte we keep the cell width, so
239 * that we can detect it from the first byte. */
240 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
241#endif
242 )
243 {
244 if (tilde)
245 {
246 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
247 + ((dy_flags & DY_UHEX) ? 4 : 2);
248 chartab[c] &= ~CT_PRINT_CHAR;
249 }
250 else
251 {
252 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
253 chartab[c] |= CT_PRINT_CHAR;
254 }
255 }
256 }
257 else if (i == 2) /* (re)set fname flag */
258 {
259 if (tilde)
260 chartab[c] &= ~CT_FNAME_CHAR;
261 else
262 chartab[c] |= CT_FNAME_CHAR;
263 }
264 else /* i == 3 */ /* (re)set keyword flag */
265 {
266 if (tilde)
267 RESET_CHARTAB(buf, c);
268 else
269 SET_CHARTAB(buf, c);
270 }
271 }
272 ++c;
273 }
274 p = skip_to_option_part(p);
275 }
276 }
277 chartab_initialized = TRUE;
278 return OK;
279}
280
281/*
282 * Translate any special characters in buf[bufsize] in-place.
283 * The result is a string with only printable characters, but if there is not
284 * enough room, not all characters will be translated.
285 */
286 void
287trans_characters(buf, bufsize)
288 char_u *buf;
289 int bufsize;
290{
291 int len; /* length of string needing translation */
292 int room; /* room in buffer after string */
293 char_u *trs; /* translated character */
294 int trs_len; /* length of trs[] */
295
296 len = (int)STRLEN(buf);
297 room = bufsize - len;
298 while (*buf != 0)
299 {
300# ifdef FEAT_MBYTE
301 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000302 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000303 len -= trs_len;
304 else
305# endif
306 {
307 trs = transchar_byte(*buf);
308 trs_len = (int)STRLEN(trs);
309 if (trs_len > 1)
310 {
311 room -= trs_len - 1;
312 if (room <= 0)
313 return;
314 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
315 }
316 mch_memmove(buf, trs, (size_t)trs_len);
317 --len;
318 }
319 buf += trs_len;
320 }
321}
322
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000323#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
324 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325/*
326 * Translate a string into allocated memory, replacing special chars with
327 * printable chars. Returns NULL when out of memory.
328 */
329 char_u *
330transstr(s)
331 char_u *s;
332{
333 char_u *res;
334 char_u *p;
335#ifdef FEAT_MBYTE
336 int l, len, c;
337 char_u hexbuf[11];
338#endif
339
340#ifdef FEAT_MBYTE
341 if (has_mbyte)
342 {
343 /* Compute the length of the result, taking account of unprintable
344 * multi-byte characters. */
345 len = 0;
346 p = s;
347 while (*p != NUL)
348 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000349 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350 {
351 c = (*mb_ptr2char)(p);
352 p += l;
353 if (vim_isprintc(c))
354 len += l;
355 else
356 {
357 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000358 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 }
360 }
361 else
362 {
363 l = byte2cells(*p++);
364 if (l > 0)
365 len += l;
366 else
367 len += 4; /* illegal byte sequence */
368 }
369 }
370 res = alloc((unsigned)(len + 1));
371 }
372 else
373#endif
374 res = alloc((unsigned)(vim_strsize(s) + 1));
375 if (res != NULL)
376 {
377 *res = NUL;
378 p = s;
379 while (*p != NUL)
380 {
381#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000382 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000383 {
384 c = (*mb_ptr2char)(p);
385 if (vim_isprintc(c))
386 STRNCAT(res, p, l); /* append printable multi-byte char */
387 else
388 transchar_hex(res + STRLEN(res), c);
389 p += l;
390 }
391 else
392#endif
393 STRCAT(res, transchar_byte(*p++));
394 }
395 }
396 return res;
397}
398#endif
399
400#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
401/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000402 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
403 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
405 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000406 */
407 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000408str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000409 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000410 int orglen;
411 char_u *buf;
412 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413{
414 garray_T ga;
415 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417
418#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
419#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000420#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
421#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000423 /* Copy "str" into "buf" or allocated memory, unmodified. */
424 if (buf == NULL)
425 {
426 ga_init2(&ga, 1, 10);
427 if (ga_grow(&ga, len + 1) == FAIL)
428 return NULL;
429 mch_memmove(ga.ga_data, str, (size_t)len);
430 ga.ga_len = len;
431 }
432 else
433 {
434 if (len >= buflen) /* Ugly! */
435 len = buflen - 1;
436 mch_memmove(buf, str, (size_t)len);
437 }
438 if (buf == NULL)
439 GA_CHAR(len) = NUL;
440 else
441 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
443 /* Make each character lower case. */
444 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000445 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 {
447#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000448 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000449 {
450 if (enc_utf8)
451 {
452 int c, lc;
453
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000454 c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 lc = utf_tolower(c);
456 if (c != lc)
457 {
458 int ol = utf_char2len(c);
459 int nl = utf_char2len(lc);
460
461 /* If the byte length changes need to shift the following
462 * characters forward or backward. */
463 if (ol != nl)
464 {
465 if (nl > ol)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000466 {
467 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
468 : len + nl - ol >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000469 {
470 /* out of memory, keep old char */
471 lc = c;
472 nl = ol;
473 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000474 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 if (ol != nl)
476 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000477 if (buf == NULL)
478 {
479 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000480 STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000481 ga.ga_len += nl - ol;
482 }
483 else
484 {
485 mch_memmove(buf + i + nl, buf + i + ol,
486 STRLEN(buf + i + ol) + 1);
487 len += nl - ol;
488 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000489 }
490 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000491 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492 }
493 }
494 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000495 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 }
497 else
498#endif
499 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000500 if (buf == NULL)
501 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
502 else
503 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 ++i;
505 }
506 }
507
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000508 if (buf == NULL)
509 return (char_u *)ga.ga_data;
510 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511}
512#endif
513
514/*
515 * Catch 22: chartab[] can't be initialized before the options are
516 * initialized, and initializing options may cause transchar() to be called!
517 * When chartab_initialized == FALSE don't use chartab[].
518 * Does NOT work for multi-byte characters, c must be <= 255.
519 * Also doesn't work for the first byte of a multi-byte, "c" must be a
520 * character!
521 */
522static char_u transchar_buf[7];
523
524 char_u *
525transchar(c)
526 int c;
527{
528 int i;
529
530 i = 0;
531 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
532 {
533 transchar_buf[0] = '~';
534 transchar_buf[1] = '@';
535 i = 2;
536 c = K_SECOND(c);
537 }
538
539 if ((!chartab_initialized && (
540#ifdef EBCDIC
541 (c >= 64 && c < 255)
542#else
543 (c >= ' ' && c <= '~')
544#endif
545#ifdef FEAT_FKMAP
546 || F_ischar(c)
547#endif
548 )) || (c < 256 && vim_isprintc_strict(c)))
549 {
550 /* printable character */
551 transchar_buf[i] = c;
552 transchar_buf[i + 1] = NUL;
553 }
554 else
555 transchar_nonprint(transchar_buf + i, c);
556 return transchar_buf;
557}
558
559#if defined(FEAT_MBYTE) || defined(PROTO)
560/*
561 * Like transchar(), but called with a byte instead of a character. Checks
562 * for an illegal UTF-8 byte.
563 */
564 char_u *
565transchar_byte(c)
566 int c;
567{
568 if (enc_utf8 && c >= 0x80)
569 {
570 transchar_nonprint(transchar_buf, c);
571 return transchar_buf;
572 }
573 return transchar(c);
574}
575#endif
576
577/*
578 * Convert non-printable character to two or more printable characters in
579 * "buf[]". "buf" needs to be able to hold five bytes.
580 * Does NOT work for multi-byte characters, c must be <= 255.
581 */
582 void
583transchar_nonprint(buf, c)
584 char_u *buf;
585 int c;
586{
587 if (c == NL)
588 c = NUL; /* we use newline in place of a NUL */
589 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
590 c = NL; /* we use CR in place of NL in this case */
591
592 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
593 transchar_hex(buf, c);
594
595#ifdef EBCDIC
596 /* For EBCDIC only the characters 0-63 and 255 are not printable */
597 else if (CtrlChar(c) != 0 || c == DEL)
598#else
599 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
600#endif
601 {
602 buf[0] = '^';
603#ifdef EBCDIC
604 if (c == DEL)
605 buf[1] = '?'; /* DEL displayed as ^? */
606 else
607 buf[1] = CtrlChar(c);
608#else
609 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
610#endif
611
612 buf[2] = NUL;
613 }
614#ifdef FEAT_MBYTE
615 else if (enc_utf8 && c >= 0x80)
616 {
617 transchar_hex(buf, c);
618 }
619#endif
620#ifndef EBCDIC
621 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
622 {
623 buf[0] = '|';
624 buf[1] = c - 0x80;
625 buf[2] = NUL;
626 }
627#else
628 else if (c < 64)
629 {
630 buf[0] = '~';
631 buf[1] = MetaChar(c);
632 buf[2] = NUL;
633 }
634#endif
635 else /* 0x80 - 0x9f and 0xff */
636 {
637 /*
638 * TODO: EBCDIC I don't know what to do with this chars, so I display
639 * them as '~?' for now
640 */
641 buf[0] = '~';
642#ifdef EBCDIC
643 buf[1] = '?'; /* 0xff displayed as ~? */
644#else
645 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
646#endif
647 buf[2] = NUL;
648 }
649}
650
651 void
652transchar_hex(buf, c)
653 char_u *buf;
654 int c;
655{
656 int i = 0;
657
658 buf[0] = '<';
659#ifdef FEAT_MBYTE
660 if (c > 255)
661 {
662 buf[++i] = nr2hex((unsigned)c >> 12);
663 buf[++i] = nr2hex((unsigned)c >> 8);
664 }
665#endif
666 buf[++i] = nr2hex((unsigned)c >> 4);
667 buf[++i] = nr2hex(c);
668 buf[++i] = '>';
669 buf[++i] = NUL;
670}
671
672/*
673 * Convert the lower 4 bits of byte "c" to its hex character.
674 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
675 * function key 1.
676 */
677 static int
678nr2hex(c)
679 int c;
680{
681 if ((c & 0xf) <= 9)
682 return (c & 0xf) + '0';
683 return (c & 0xf) - 10 + 'a';
684}
685
686/*
687 * Return number of display cells occupied by byte "b".
688 * Caller must make sure 0 <= b <= 255.
689 * For multi-byte mode "b" must be the first byte of a character.
690 * A TAB is counted as two cells: "^I".
691 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
692 * cells depends on further bytes.
693 */
694 int
695byte2cells(b)
696 int b;
697{
698#ifdef FEAT_MBYTE
699 if (enc_utf8 && b >= 0x80)
700 return 0;
701#endif
702 return (chartab[b] & CT_CELL_MASK);
703}
704
705/*
706 * Return number of display cells occupied by character "c".
707 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
708 * A TAB is counted as two cells: "^I" or four: "<09>".
709 */
710 int
711char2cells(c)
712 int c;
713{
714 if (IS_SPECIAL(c))
715 return char2cells(K_SECOND(c)) + 2;
716#ifdef FEAT_MBYTE
717 if (c >= 0x80)
718 {
719 /* UTF-8: above 0x80 need to check the value */
720 if (enc_utf8)
721 return utf_char2cells(c);
722 /* DBCS: double-byte means double-width, except for euc-jp with first
723 * byte 0x8e */
724 if (enc_dbcs != 0 && c >= 0x100)
725 {
726 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
727 return 1;
728 return 2;
729 }
730 }
731#endif
732 return (chartab[c & 0xff] & CT_CELL_MASK);
733}
734
735/*
736 * Return number of display cells occupied by character at "*p".
737 * A TAB is counted as two cells: "^I" or four: "<09>".
738 */
739 int
740ptr2cells(p)
741 char_u *p;
742{
743#ifdef FEAT_MBYTE
744 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
745 if (enc_utf8 && *p >= 0x80)
746 return utf_ptr2cells(p);
747 /* For DBCS we can tell the cell count from the first byte. */
748#endif
749 return (chartab[*p] & CT_CELL_MASK);
750}
751
752/*
753 * Return the number of characters string "s" will take on the screen,
754 * counting TABs as two characters: "^I".
755 */
756 int
757vim_strsize(s)
758 char_u *s;
759{
760 return vim_strnsize(s, (int)MAXCOL);
761}
762
763/*
764 * Return the number of characters string "s[len]" will take on the screen,
765 * counting TABs as two characters: "^I".
766 */
767 int
768vim_strnsize(s, len)
769 char_u *s;
770 int len;
771{
772 int size = 0;
773
774 while (*s != NUL && --len >= 0)
775 {
776#ifdef FEAT_MBYTE
777 if (has_mbyte)
778 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000779 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000780
781 size += ptr2cells(s);
782 s += l;
783 len -= l - 1;
784 }
785 else
786#endif
787 size += byte2cells(*s++);
788 }
789 return size;
790}
791
792/*
793 * Return the number of characters 'c' will take on the screen, taking
794 * into account the size of a tab.
795 * Use a define to make it fast, this is used very often!!!
796 * Also see getvcol() below.
797 */
798
799#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
800 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
801 { \
802 int ts; \
803 ts = (buf)->b_p_ts; \
804 return (int)(ts - (col % ts)); \
805 } \
806 else \
807 return ptr2cells(p);
808
809#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
810 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
811 int
812chartabsize(p, col)
813 char_u *p;
814 colnr_T col;
815{
816 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
817}
818#endif
819
820#ifdef FEAT_LINEBREAK
821 static int
822win_chartabsize(wp, p, col)
823 win_T *wp;
824 char_u *p;
825 colnr_T col;
826{
827 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
828}
829#endif
830
831/*
832 * return the number of characters the string 's' will take on the screen,
833 * taking into account the size of a tab
834 */
835 int
836linetabsize(s)
837 char_u *s;
838{
839 colnr_T col = 0;
840
841 while (*s != NUL)
842 col += lbr_chartabsize_adv(&s, col);
843 return (int)col;
844}
845
846/*
847 * Like linetabsize(), but for a given window instead of the current one.
848 */
849 int
850win_linetabsize(wp, p, len)
851 win_T *wp;
852 char_u *p;
853 colnr_T len;
854{
855 colnr_T col = 0;
856 char_u *s;
857
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000858 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000859 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860 return (int)col;
861}
862
863/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000864 * Return TRUE if 'c' is a normal identifier character:
865 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000866 */
867 int
868vim_isIDc(c)
869 int c;
870{
871 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
872}
873
874/*
875 * return TRUE if 'c' is a keyword character: Letters and characters from
876 * 'iskeyword' option for current buffer.
877 * For multi-byte characters mb_get_class() is used (builtin rules).
878 */
879 int
880vim_iswordc(c)
881 int c;
882{
883#ifdef FEAT_MBYTE
884 if (c >= 0x100)
885 {
886 if (enc_dbcs != 0)
887 return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
888 if (enc_utf8)
889 return utf_class(c) >= 2;
890 }
891#endif
892 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
893}
894
895/*
896 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
897 */
898 int
899vim_iswordp(p)
900 char_u *p;
901{
902#ifdef FEAT_MBYTE
903 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
904 return mb_get_class(p) >= 2;
905#endif
906 return GET_CHARTAB(curbuf, *p) != 0;
907}
908
909#if defined(FEAT_SYN_HL) || defined(PROTO)
910 int
911vim_iswordc_buf(p, buf)
912 char_u *p;
913 buf_T *buf;
914{
915# ifdef FEAT_MBYTE
916 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
917 return mb_get_class(p) >= 2;
918# endif
919 return (GET_CHARTAB(buf, *p) != 0);
920}
Bram Moolenaarc4956c82006-03-12 21:58:43 +0000921#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000922
923/*
924 * return TRUE if 'c' is a valid file-name character
925 * Assume characters above 0x100 are valid (multi-byte).
926 */
927 int
928vim_isfilec(c)
929 int c;
930{
931 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
932}
933
934/*
935 * return TRUE if 'c' is a printable character
936 * Assume characters above 0x100 are printable (multi-byte), except for
937 * Unicode.
938 */
939 int
940vim_isprintc(c)
941 int c;
942{
943#ifdef FEAT_MBYTE
944 if (enc_utf8 && c >= 0x100)
945 return utf_printable(c);
946#endif
947 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
948}
949
950/*
951 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
952 * byte of a double-byte character.
953 */
954 int
955vim_isprintc_strict(c)
956 int c;
957{
958#ifdef FEAT_MBYTE
959 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
960 return FALSE;
961 if (enc_utf8 && c >= 0x100)
962 return utf_printable(c);
963#endif
964 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
965}
966
967/*
968 * like chartabsize(), but also check for line breaks on the screen
969 */
970 int
971lbr_chartabsize(s, col)
972 unsigned char *s;
973 colnr_T col;
974{
975#ifdef FEAT_LINEBREAK
976 if (!curwin->w_p_lbr && *p_sbr == NUL)
977 {
978#endif
979#ifdef FEAT_MBYTE
980 if (curwin->w_p_wrap)
981 return win_nolbr_chartabsize(curwin, s, col, NULL);
982#endif
983 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
984#ifdef FEAT_LINEBREAK
985 }
986 return win_lbr_chartabsize(curwin, s, col, NULL);
987#endif
988}
989
990/*
991 * Call lbr_chartabsize() and advance the pointer.
992 */
993 int
994lbr_chartabsize_adv(s, col)
995 char_u **s;
996 colnr_T col;
997{
998 int retval;
999
1000 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001001 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001002 return retval;
1003}
1004
1005/*
1006 * This function is used very often, keep it fast!!!!
1007 *
1008 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1009 * string at start of line. Warning: *headp is only set if it's a non-zero
1010 * value, init to 0 before calling.
1011 */
1012/*ARGSUSED*/
1013 int
1014win_lbr_chartabsize(wp, s, col, headp)
1015 win_T *wp;
1016 char_u *s;
1017 colnr_T col;
1018 int *headp;
1019{
1020#ifdef FEAT_LINEBREAK
1021 int c;
1022 int size;
1023 colnr_T col2;
1024 colnr_T colmax;
1025 int added;
1026# ifdef FEAT_MBYTE
1027 int mb_added = 0;
1028# else
1029# define mb_added 0
1030# endif
1031 int numberextra;
1032 char_u *ps;
1033 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001034 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001035
1036 /*
1037 * No 'linebreak' and 'showbreak': return quickly.
1038 */
1039 if (!wp->w_p_lbr && *p_sbr == NUL)
1040#endif
1041 {
1042#ifdef FEAT_MBYTE
1043 if (wp->w_p_wrap)
1044 return win_nolbr_chartabsize(wp, s, col, headp);
1045#endif
1046 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1047 }
1048
1049#ifdef FEAT_LINEBREAK
1050 /*
1051 * First get normal size, without 'linebreak'
1052 */
1053 size = win_chartabsize(wp, s, col);
1054 c = *s;
1055
1056 /*
1057 * If 'linebreak' set check at a blank before a non-blank if the line
1058 * needs a break here
1059 */
1060 if (wp->w_p_lbr
1061 && vim_isbreak(c)
1062 && !vim_isbreak(s[1])
1063 && !wp->w_p_list
1064 && wp->w_p_wrap
1065# ifdef FEAT_VERTSPLIT
1066 && wp->w_width != 0
1067# endif
1068 )
1069 {
1070 /*
1071 * Count all characters from first non-blank after a blank up to next
1072 * non-blank after a blank.
1073 */
1074 numberextra = win_col_off(wp);
1075 col2 = col;
1076 colmax = W_WIDTH(wp) - numberextra;
1077 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001078 {
1079 n = colmax + win_col_off2(wp);
1080 if (n > 0)
1081 colmax += (((col - colmax) / n) + 1) * n;
1082 }
1083
Bram Moolenaar071d4272004-06-13 20:20:40 +00001084 for (;;)
1085 {
1086 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001087 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001088 c = *s;
1089 if (!(c != NUL
1090 && (vim_isbreak(c)
1091 || (!vim_isbreak(c)
1092 && (col2 == col || !vim_isbreak(*ps))))))
1093 break;
1094
1095 col2 += win_chartabsize(wp, s, col2);
1096 if (col2 >= colmax) /* doesn't fit */
1097 {
1098 size = colmax - col;
1099 tab_corr = FALSE;
1100 break;
1101 }
1102 }
1103 }
1104# ifdef FEAT_MBYTE
1105 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1106 && wp->w_p_wrap && in_win_border(wp, col))
1107 {
1108 ++size; /* Count the ">" in the last column. */
1109 mb_added = 1;
1110 }
1111# endif
1112
1113 /*
1114 * May have to add something for 'showbreak' string at start of line
1115 * Set *headp to the size of what we add.
1116 */
1117 added = 0;
1118 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1119 {
1120 numberextra = win_col_off(wp);
1121 col += numberextra + mb_added;
1122 if (col >= (colnr_T)W_WIDTH(wp))
1123 {
1124 col -= W_WIDTH(wp);
1125 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1126 if (numberextra > 0)
1127 col = col % numberextra;
1128 }
1129 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1130 {
1131 added = vim_strsize(p_sbr);
1132 if (tab_corr)
1133 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1134 else
1135 size += added;
1136 if (col != 0)
1137 added = 0;
1138 }
1139 }
1140 if (headp != NULL)
1141 *headp = added + mb_added;
1142 return size;
1143#endif
1144}
1145
1146#if defined(FEAT_MBYTE) || defined(PROTO)
1147/*
1148 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1149 * 'wrap' is on. This means we need to check for a double-byte character that
1150 * doesn't fit at the end of the screen line.
1151 */
1152 static int
1153win_nolbr_chartabsize(wp, s, col, headp)
1154 win_T *wp;
1155 char_u *s;
1156 colnr_T col;
1157 int *headp;
1158{
1159 int n;
1160
1161 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1162 {
1163 n = wp->w_buffer->b_p_ts;
1164 return (int)(n - (col % n));
1165 }
1166 n = ptr2cells(s);
1167 /* Add one cell for a double-width character in the last column of the
1168 * window, displayed with a ">". */
1169 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1170 {
1171 if (headp != NULL)
1172 *headp = 1;
1173 return 3;
1174 }
1175 return n;
1176}
1177
1178/*
1179 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1180 * "wp".
1181 */
1182 int
1183in_win_border(wp, vcol)
1184 win_T *wp;
1185 colnr_T vcol;
1186{
1187 colnr_T width1; /* width of first line (after line number) */
1188 colnr_T width2; /* width of further lines */
1189
1190#ifdef FEAT_VERTSPLIT
1191 if (wp->w_width == 0) /* there is no border */
1192 return FALSE;
1193#endif
1194 width1 = W_WIDTH(wp) - win_col_off(wp);
1195 if (vcol < width1 - 1)
1196 return FALSE;
1197 if (vcol == width1 - 1)
1198 return TRUE;
1199 width2 = width1 + win_col_off2(wp);
1200 return ((vcol - width1) % width2 == width2 - 1);
1201}
1202#endif /* FEAT_MBYTE */
1203
1204/*
1205 * Get virtual column number of pos.
1206 * start: on the first position of this character (TAB, ctrl)
1207 * cursor: where the cursor is on this character (first char, except for TAB)
1208 * end: on the last position of this character (TAB, ctrl)
1209 *
1210 * This is used very often, keep it fast!
1211 */
1212 void
1213getvcol(wp, pos, start, cursor, end)
1214 win_T *wp;
1215 pos_T *pos;
1216 colnr_T *start;
1217 colnr_T *cursor;
1218 colnr_T *end;
1219{
1220 colnr_T vcol;
1221 char_u *ptr; /* points to current char */
1222 char_u *posptr; /* points to char at pos->col */
1223 int incr;
1224 int head;
1225 int ts = wp->w_buffer->b_p_ts;
1226 int c;
1227
1228 vcol = 0;
1229 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1230 posptr = ptr + pos->col;
1231
1232 /*
1233 * This function is used very often, do some speed optimizations.
1234 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1235 * Also use this when 'list' is set but tabs take their normal size.
1236 */
1237 if ((!wp->w_p_list || lcs_tab1 != NUL)
1238#ifdef FEAT_LINEBREAK
1239 && !wp->w_p_lbr && *p_sbr == NUL
1240#endif
1241 )
1242 {
1243#ifndef FEAT_MBYTE
1244 head = 0;
1245#endif
1246 for (;;)
1247 {
1248#ifdef FEAT_MBYTE
1249 head = 0;
1250#endif
1251 c = *ptr;
1252 /* make sure we don't go past the end of the line */
1253 if (c == NUL)
1254 {
1255 incr = 1; /* NUL at end of line only takes one column */
1256 break;
1257 }
1258 /* A tab gets expanded, depending on the current column */
1259 if (c == TAB)
1260 incr = ts - (vcol % ts);
1261 else
1262 {
1263#ifdef FEAT_MBYTE
1264 if (has_mbyte)
1265 {
1266 /* For utf-8, if the byte is >= 0x80, need to look at
1267 * further bytes to find the cell width. */
1268 if (enc_utf8 && c >= 0x80)
1269 incr = utf_ptr2cells(ptr);
1270 else
1271 incr = CHARSIZE(c);
1272
1273 /* If a double-cell char doesn't fit at the end of a line
1274 * it wraps to the next line, it's like this char is three
1275 * cells wide. */
1276 if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1277 {
1278 ++incr;
1279 head = 1;
1280 }
1281 }
1282 else
1283#endif
1284 incr = CHARSIZE(c);
1285 }
1286
1287 if (ptr >= posptr) /* character at pos->col */
1288 break;
1289
1290 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001291 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001292 }
1293 }
1294 else
1295 {
1296 for (;;)
1297 {
1298 /* A tab gets expanded, depending on the current column */
1299 head = 0;
1300 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1301 /* make sure we don't go past the end of the line */
1302 if (*ptr == NUL)
1303 {
1304 incr = 1; /* NUL at end of line only takes one column */
1305 break;
1306 }
1307
1308 if (ptr >= posptr) /* character at pos->col */
1309 break;
1310
1311 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001312 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001313 }
1314 }
1315 if (start != NULL)
1316 *start = vcol + head;
1317 if (end != NULL)
1318 *end = vcol + incr - 1;
1319 if (cursor != NULL)
1320 {
1321 if (*ptr == TAB
1322 && (State & NORMAL)
1323 && !wp->w_p_list
1324 && !virtual_active()
1325#ifdef FEAT_VISUAL
1326 && !(VIsual_active
1327 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1328#endif
1329 )
1330 *cursor = vcol + incr - 1; /* cursor at end */
1331 else
1332 *cursor = vcol + head; /* cursor at start */
1333 }
1334}
1335
1336/*
1337 * Get virtual cursor column in the current window, pretending 'list' is off.
1338 */
1339 colnr_T
1340getvcol_nolist(posp)
1341 pos_T *posp;
1342{
1343 int list_save = curwin->w_p_list;
1344 colnr_T vcol;
1345
1346 curwin->w_p_list = FALSE;
1347 getvcol(curwin, posp, NULL, &vcol, NULL);
1348 curwin->w_p_list = list_save;
1349 return vcol;
1350}
1351
1352#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1353/*
1354 * Get virtual column in virtual mode.
1355 */
1356 void
1357getvvcol(wp, pos, start, cursor, end)
1358 win_T *wp;
1359 pos_T *pos;
1360 colnr_T *start;
1361 colnr_T *cursor;
1362 colnr_T *end;
1363{
1364 colnr_T col;
1365 colnr_T coladd;
1366 colnr_T endadd;
1367# ifdef FEAT_MBYTE
1368 char_u *ptr;
1369# endif
1370
1371 if (virtual_active())
1372 {
1373 /* For virtual mode, only want one value */
1374 getvcol(wp, pos, &col, NULL, NULL);
1375
1376 coladd = pos->coladd;
1377 endadd = 0;
1378# ifdef FEAT_MBYTE
1379 /* Cannot put the cursor on part of a wide character. */
1380 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1381 if (pos->col < STRLEN(ptr))
1382 {
1383 int c = (*mb_ptr2char)(ptr + pos->col);
1384
1385 if (c != TAB && vim_isprintc(c))
1386 {
1387 endadd = char2cells(c) - 1;
Bram Moolenaara5792f52005-11-23 21:25:05 +00001388 if (coladd > endadd) /* past end of line */
1389 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390 else
1391 coladd = 0;
1392 }
1393 }
1394# endif
1395 col += coladd;
1396 if (start != NULL)
1397 *start = col;
1398 if (cursor != NULL)
1399 *cursor = col;
1400 if (end != NULL)
1401 *end = col + endadd;
1402 }
1403 else
1404 getvcol(wp, pos, start, cursor, end);
1405}
1406#endif
1407
1408#if defined(FEAT_VISUAL) || defined(PROTO)
1409/*
1410 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1411 * Used for Visual block mode.
1412 */
1413 void
1414getvcols(wp, pos1, pos2, left, right)
1415 win_T *wp;
1416 pos_T *pos1, *pos2;
1417 colnr_T *left, *right;
1418{
1419 colnr_T from1, from2, to1, to2;
1420
1421 if (ltp(pos1, pos2))
1422 {
1423 getvvcol(wp, pos1, &from1, NULL, &to1);
1424 getvvcol(wp, pos2, &from2, NULL, &to2);
1425 }
1426 else
1427 {
1428 getvvcol(wp, pos2, &from1, NULL, &to1);
1429 getvvcol(wp, pos1, &from2, NULL, &to2);
1430 }
1431 if (from2 < from1)
1432 *left = from2;
1433 else
1434 *left = from1;
1435 if (to2 > to1)
1436 {
1437 if (*p_sel == 'e' && from2 - 1 >= to1)
1438 *right = from2 - 1;
1439 else
1440 *right = to2;
1441 }
1442 else
1443 *right = to1;
1444}
1445#endif
1446
1447/*
1448 * skipwhite: skip over ' ' and '\t'.
1449 */
1450 char_u *
1451skipwhite(p)
1452 char_u *p;
1453{
1454 while (vim_iswhite(*p)) /* skip to next non-white */
1455 ++p;
1456 return p;
1457}
1458
1459/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001460 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001461 */
1462 char_u *
1463skipdigits(p)
1464 char_u *p;
1465{
1466 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1467 ++p;
1468 return p;
1469}
1470
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001471#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001472/*
1473 * skip over digits and hex characters
1474 */
1475 char_u *
1476skiphex(p)
1477 char_u *p;
1478{
1479 while (vim_isxdigit(*p)) /* skip to next non-digit */
1480 ++p;
1481 return p;
1482}
1483#endif
1484
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001485#if defined(FEAT_EX_EXTRA) || defined(PROTO)
1486/*
1487 * skip to digit (or NUL after the string)
1488 */
1489 char_u *
1490skiptodigit(p)
1491 char_u *p;
1492{
1493 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1494 ++p;
1495 return p;
1496}
1497
1498/*
1499 * skip to hex character (or NUL after the string)
1500 */
1501 char_u *
1502skiptohex(p)
1503 char_u *p;
1504{
1505 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1506 ++p;
1507 return p;
1508}
1509#endif
1510
Bram Moolenaar071d4272004-06-13 20:20:40 +00001511/*
1512 * Variant of isdigit() that can handle characters > 0x100.
1513 * We don't use isdigit() here, because on some systems it also considers
1514 * superscript 1 to be a digit.
1515 * Use the VIM_ISDIGIT() macro for simple arguments.
1516 */
1517 int
1518vim_isdigit(c)
1519 int c;
1520{
1521 return (c >= '0' && c <= '9');
1522}
1523
1524/*
1525 * Variant of isxdigit() that can handle characters > 0x100.
1526 * We don't use isxdigit() here, because on some systems it also considers
1527 * superscript 1 to be a digit.
1528 */
1529 int
1530vim_isxdigit(c)
1531 int c;
1532{
1533 return (c >= '0' && c <= '9')
1534 || (c >= 'a' && c <= 'f')
1535 || (c >= 'A' && c <= 'F');
1536}
1537
Bram Moolenaar78622822005-08-23 21:00:13 +00001538#if defined(FEAT_MBYTE) || defined(PROTO)
1539/*
1540 * Vim's own character class functions. These exist because many library
1541 * islower()/toupper() etc. do not work properly: they crash when used with
1542 * invalid values or can't handle latin1 when the locale is C.
1543 * Speed is most important here.
1544 */
1545#define LATIN1LOWER 'l'
1546#define LATIN1UPPER 'U'
1547
1548/* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001549static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1550static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1551static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
Bram Moolenaar78622822005-08-23 21:00:13 +00001552
1553 int
1554vim_islower(c)
1555 int c;
1556{
1557 if (c <= '@')
1558 return FALSE;
1559 if (c >= 0x80)
1560 {
1561 if (enc_utf8)
1562 return utf_islower(c);
1563 if (c >= 0x100)
1564 {
1565#ifdef HAVE_ISWLOWER
1566 if (has_mbyte)
1567 return iswlower(c);
1568#endif
1569 /* islower() can't handle these chars and may crash */
1570 return FALSE;
1571 }
1572 if (enc_latin1like)
1573 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1574 }
1575 return islower(c);
1576}
1577
1578 int
1579vim_isupper(c)
1580 int c;
1581{
1582 if (c <= '@')
1583 return FALSE;
1584 if (c >= 0x80)
1585 {
1586 if (enc_utf8)
1587 return utf_isupper(c);
1588 if (c >= 0x100)
1589 {
1590#ifdef HAVE_ISWUPPER
1591 if (has_mbyte)
1592 return iswupper(c);
1593#endif
1594 /* islower() can't handle these chars and may crash */
1595 return FALSE;
1596 }
1597 if (enc_latin1like)
1598 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1599 }
1600 return isupper(c);
1601}
1602
1603 int
1604vim_toupper(c)
1605 int c;
1606{
1607 if (c <= '@')
1608 return c;
1609 if (c >= 0x80)
1610 {
1611 if (enc_utf8)
1612 return utf_toupper(c);
1613 if (c >= 0x100)
1614 {
1615#ifdef HAVE_TOWUPPER
1616 if (has_mbyte)
1617 return towupper(c);
1618#endif
1619 /* toupper() can't handle these chars and may crash */
1620 return c;
1621 }
1622 if (enc_latin1like)
1623 return latin1upper[c];
1624 }
1625 return TOUPPER_LOC(c);
1626}
1627
1628 int
1629vim_tolower(c)
1630 int c;
1631{
1632 if (c <= '@')
1633 return c;
1634 if (c >= 0x80)
1635 {
1636 if (enc_utf8)
1637 return utf_tolower(c);
1638 if (c >= 0x100)
1639 {
1640#ifdef HAVE_TOWLOWER
1641 if (has_mbyte)
1642 return towlower(c);
1643#endif
1644 /* tolower() can't handle these chars and may crash */
1645 return c;
1646 }
1647 if (enc_latin1like)
1648 return latin1lower[c];
1649 }
1650 return TOLOWER_LOC(c);
1651}
1652#endif
1653
Bram Moolenaar071d4272004-06-13 20:20:40 +00001654/*
1655 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1656 */
1657 char_u *
1658skiptowhite(p)
1659 char_u *p;
1660{
1661 while (*p != ' ' && *p != '\t' && *p != NUL)
1662 ++p;
1663 return p;
1664}
1665
1666#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1667 || defined(PROTO)
1668/*
1669 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1670 */
1671 char_u *
1672skiptowhite_esc(p)
1673 char_u *p;
1674{
1675 while (*p != ' ' && *p != '\t' && *p != NUL)
1676 {
1677 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1678 ++p;
1679 ++p;
1680 }
1681 return p;
1682}
1683#endif
1684
1685/*
1686 * Getdigits: Get a number from a string and skip over it.
1687 * Note: the argument is a pointer to a char_u pointer!
1688 */
1689 long
1690getdigits(pp)
1691 char_u **pp;
1692{
1693 char_u *p;
1694 long retval;
1695
1696 p = *pp;
1697 retval = atol((char *)p);
1698 if (*p == '-') /* skip negative sign */
1699 ++p;
1700 p = skipdigits(p); /* skip to next non-digit */
1701 *pp = p;
1702 return retval;
1703}
1704
1705/*
1706 * Return TRUE if "lbuf" is empty or only contains blanks.
1707 */
1708 int
1709vim_isblankline(lbuf)
1710 char_u *lbuf;
1711{
1712 char_u *p;
1713
1714 p = skipwhite(lbuf);
1715 return (*p == NUL || *p == '\r' || *p == '\n');
1716}
1717
1718/*
1719 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001720 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001721 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1722 * 0 decimal
1723 * '0' octal
1724 * 'X' hex
1725 * 'x' hex
1726 * If "len" is not NULL, the length of the number in characters is returned.
1727 * If "nptr" is not NULL, the signed result is returned in it.
1728 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001729 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001730 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1731 * octal number.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001732 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001733 * hex number.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001734 */
1735 void
1736vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1737 char_u *start;
1738 int *hexp; /* return: type of number 0 = decimal, 'x'
1739 or 'X' is hex, '0' = octal */
1740 int *len; /* return: detected length of number */
1741 int dooct; /* recognize octal number */
1742 int dohex; /* recognize hex number */
1743 long *nptr; /* return: signed result */
1744 unsigned long *unptr; /* return: unsigned result */
1745{
1746 char_u *ptr = start;
1747 int hex = 0; /* default is decimal */
1748 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001749 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001750 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001751
1752 if (ptr[0] == '-')
1753 {
1754 negative = TRUE;
1755 ++ptr;
1756 }
1757
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001758 /* Recognize hex and octal. */
1759 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001760 {
1761 hex = ptr[1];
1762 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1763 ptr += 2; /* hexadecimal */
1764 else
1765 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001766 hex = 0; /* default is decimal */
1767 if (dooct)
1768 {
1769 /* Don't interpret "0", "08" or "0129" as octal. */
1770 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1771 {
1772 if (ptr[n] > '7')
1773 {
1774 hex = 0; /* can't be octal */
1775 break;
1776 }
1777 if (ptr[n] > '0')
1778 hex = '0'; /* assume octal */
1779 }
1780 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001781 }
1782 }
1783
1784 /*
1785 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1786 */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001787 if (hex == '0' || dooct > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001788 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001789 /* octal */
1790 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001791 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001792 un = 8 * un + (unsigned long)(*ptr - '0');
1793 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001794 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001795 }
1796 else if (hex != 0 || dohex > 1)
1797 {
1798 /* hex */
1799 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001800 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001801 un = 16 * un + (unsigned long)hex2nr(*ptr);
1802 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001803 }
1804 }
1805 else
1806 {
1807 /* decimal */
1808 while (VIM_ISDIGIT(*ptr))
1809 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001810 un = 10 * un + (unsigned long)(*ptr - '0');
1811 ++ptr;
1812 }
1813 }
1814
Bram Moolenaar071d4272004-06-13 20:20:40 +00001815 if (hexp != NULL)
1816 *hexp = hex;
1817 if (len != NULL)
1818 *len = (int)(ptr - start);
1819 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001820 {
1821 if (negative) /* account for leading '-' for decimal numbers */
1822 *nptr = -(long)un;
1823 else
1824 *nptr = (long)un;
1825 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001826 if (unptr != NULL)
1827 *unptr = un;
1828}
1829
1830/*
1831 * Return the value of a single hex character.
1832 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1833 */
1834 int
1835hex2nr(c)
1836 int c;
1837{
1838 if (c >= 'a' && c <= 'f')
1839 return c - 'a' + 10;
1840 if (c >= 'A' && c <= 'F')
1841 return c - 'A' + 10;
1842 return c - '0';
1843}
1844
1845#if defined(FEAT_TERMRESPONSE) \
1846 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1847/*
1848 * Convert two hex characters to a byte.
1849 * Return -1 if one of the characters is not hex.
1850 */
1851 int
1852hexhex2nr(p)
1853 char_u *p;
1854{
1855 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1856 return -1;
1857 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1858}
1859#endif
1860
1861/*
1862 * Return TRUE if "str" starts with a backslash that should be removed.
1863 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1864 * backslash is not a normal file name character.
1865 * '$' is a valid file name character, we don't remove the backslash before
1866 * it. This means it is not possible to use an environment variable after a
1867 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1868 * Although "\ name" is valid, the backslash in "Program\ files" must be
1869 * removed. Assume a file name doesn't start with a space.
1870 * For multi-byte names, never remove a backslash before a non-ascii
1871 * character, assume that all multi-byte characters are valid file name
1872 * characters.
1873 */
1874 int
1875rem_backslash(str)
1876 char_u *str;
1877{
1878#ifdef BACKSLASH_IN_FILENAME
1879 return (str[0] == '\\'
1880# ifdef FEAT_MBYTE
1881 && str[1] < 0x80
1882# endif
1883 && (str[1] == ' '
1884 || (str[1] != NUL
1885 && str[1] != '*'
1886 && str[1] != '?'
1887 && !vim_isfilec(str[1]))));
1888#else
1889 return (str[0] == '\\' && str[1] != NUL);
1890#endif
1891}
1892
1893/*
1894 * Halve the number of backslashes in a file name argument.
1895 * For MS-DOS we only do this if the character after the backslash
1896 * is not a normal file character.
1897 */
1898 void
1899backslash_halve(p)
1900 char_u *p;
1901{
1902 for ( ; *p; ++p)
1903 if (rem_backslash(p))
Bram Moolenaar452a81b2007-08-06 20:28:43 +00001904 mch_memmove(p, p + 1, STRLEN(p));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001905}
1906
1907/*
1908 * backslash_halve() plus save the result in allocated memory.
1909 */
1910 char_u *
1911backslash_halve_save(p)
1912 char_u *p;
1913{
1914 char_u *res;
1915
1916 res = vim_strsave(p);
1917 if (res == NULL)
1918 return p;
1919 backslash_halve(res);
1920 return res;
1921}
1922
1923#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1924/*
1925 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1926 * The first 64 entries have been added to map control characters defined in
1927 * ascii.h
1928 */
1929static char_u ebcdic2ascii_tab[256] =
1930{
1931 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1932 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1933 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1934 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1935 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1936 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1937 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1938 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1939 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1940 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1941 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1942 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1943 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1944 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1945 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1946 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1947 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1948 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1949 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1950 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1951 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1952 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1953 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1954 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1955 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1956 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1957 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1958 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1959 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1960 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1961 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1962 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1963};
1964
1965/*
1966 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1967 * wanting 7-bit ASCII characters out the other end.
1968 */
1969 void
1970ebcdic2ascii(buffer, len)
1971 char_u *buffer;
1972 int len;
1973{
1974 int i;
1975
1976 for (i = 0; i < len; i++)
1977 buffer[i] = ebcdic2ascii_tab[buffer[i]];
1978}
1979#endif