blob: a82a7264ba76e498554320d042210778bcdad8b5 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
17static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18#endif
19
20static int nr2hex __ARGS((int c));
21
22static int chartab_initialized = FALSE;
23
24/* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
29
30/*
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
33 *
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
36 *
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
43 *
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
52 *
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
55 */
56 int
57init_chartab()
58{
59 return buf_init_chartab(curbuf, TRUE);
60}
61
62 int
63buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
66{
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
73
74 if (global)
75 {
76 /*
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
80 *
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
83 */
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87#ifdef EBCDIC
88 while (c < 255)
89#else
90 while (c <= '~')
91#endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93#ifdef FEAT_FKMAP
94 if (p_altkeymap)
95 {
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
98 }
99#endif
100 while (c < 256)
101 {
102#ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113#endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
116 }
117
118#ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125#endif
126 }
127
128 /*
129 * Init word char flags all to FALSE
130 */
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
135 {
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
139 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140#endif
141
142#ifdef FEAT_LISP
143 /*
144 * In lisp mode the '-' character is included in keywords.
145 */
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148#endif
149
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
153 */
154 for (i = global ? 0 : 3; i <= 3; ++i)
155 {
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
164
165 while (*p)
166 {
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
170 {
171 tilde = TRUE;
172 ++p;
173 }
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
185 c2 = *p++;
186 }
187 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
191 if (c2 == -1) /* not a range */
192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
210 if (!do_isalpha || isalpha(c)
211#ifdef FEAT_FKMAP
212 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
213#endif
214 )
215 {
216 if (i == 0) /* (re)set ID flag */
217 {
218 if (tilde)
219 chartab[c] &= ~CT_ID_CHAR;
220 else
221 chartab[c] |= CT_ID_CHAR;
222 }
223 else if (i == 1) /* (re)set printable */
224 {
225 if ((c < ' '
226#ifndef EBCDIC
227 || c > '~'
228#endif
229#ifdef FEAT_FKMAP
230 || (p_altkeymap
231 && (F_isalpha(c) || F_isdigit(c)))
232#endif
233 )
234#ifdef FEAT_MBYTE
235 /* For double-byte we keep the cell width, so
236 * that we can detect it from the first byte. */
237 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
238#endif
239 )
240 {
241 if (tilde)
242 {
243 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
244 + ((dy_flags & DY_UHEX) ? 4 : 2);
245 chartab[c] &= ~CT_PRINT_CHAR;
246 }
247 else
248 {
249 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
250 chartab[c] |= CT_PRINT_CHAR;
251 }
252 }
253 }
254 else if (i == 2) /* (re)set fname flag */
255 {
256 if (tilde)
257 chartab[c] &= ~CT_FNAME_CHAR;
258 else
259 chartab[c] |= CT_FNAME_CHAR;
260 }
261 else /* i == 3 */ /* (re)set keyword flag */
262 {
263 if (tilde)
264 RESET_CHARTAB(buf, c);
265 else
266 SET_CHARTAB(buf, c);
267 }
268 }
269 ++c;
270 }
271 p = skip_to_option_part(p);
272 }
273 }
274 chartab_initialized = TRUE;
275 return OK;
276}
277
278/*
279 * Translate any special characters in buf[bufsize] in-place.
280 * The result is a string with only printable characters, but if there is not
281 * enough room, not all characters will be translated.
282 */
283 void
284trans_characters(buf, bufsize)
285 char_u *buf;
286 int bufsize;
287{
288 int len; /* length of string needing translation */
289 int room; /* room in buffer after string */
290 char_u *trs; /* translated character */
291 int trs_len; /* length of trs[] */
292
293 len = (int)STRLEN(buf);
294 room = bufsize - len;
295 while (*buf != 0)
296 {
297# ifdef FEAT_MBYTE
298 /* Assume a multi-byte character doesn't need translation. */
299 if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
300 len -= trs_len;
301 else
302# endif
303 {
304 trs = transchar_byte(*buf);
305 trs_len = (int)STRLEN(trs);
306 if (trs_len > 1)
307 {
308 room -= trs_len - 1;
309 if (room <= 0)
310 return;
311 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
312 }
313 mch_memmove(buf, trs, (size_t)trs_len);
314 --len;
315 }
316 buf += trs_len;
317 }
318}
319
320#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(PROTO)
321/*
322 * Translate a string into allocated memory, replacing special chars with
323 * printable chars. Returns NULL when out of memory.
324 */
325 char_u *
326transstr(s)
327 char_u *s;
328{
329 char_u *res;
330 char_u *p;
331#ifdef FEAT_MBYTE
332 int l, len, c;
333 char_u hexbuf[11];
334#endif
335
336#ifdef FEAT_MBYTE
337 if (has_mbyte)
338 {
339 /* Compute the length of the result, taking account of unprintable
340 * multi-byte characters. */
341 len = 0;
342 p = s;
343 while (*p != NUL)
344 {
345 if ((l = (*mb_ptr2len_check)(p)) > 1)
346 {
347 c = (*mb_ptr2char)(p);
348 p += l;
349 if (vim_isprintc(c))
350 len += l;
351 else
352 {
353 transchar_hex(hexbuf, c);
354 len += STRLEN(hexbuf);
355 }
356 }
357 else
358 {
359 l = byte2cells(*p++);
360 if (l > 0)
361 len += l;
362 else
363 len += 4; /* illegal byte sequence */
364 }
365 }
366 res = alloc((unsigned)(len + 1));
367 }
368 else
369#endif
370 res = alloc((unsigned)(vim_strsize(s) + 1));
371 if (res != NULL)
372 {
373 *res = NUL;
374 p = s;
375 while (*p != NUL)
376 {
377#ifdef FEAT_MBYTE
378 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
379 {
380 c = (*mb_ptr2char)(p);
381 if (vim_isprintc(c))
382 STRNCAT(res, p, l); /* append printable multi-byte char */
383 else
384 transchar_hex(res + STRLEN(res), c);
385 p += l;
386 }
387 else
388#endif
389 STRCAT(res, transchar_byte(*p++));
390 }
391 }
392 return res;
393}
394#endif
395
396#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
397/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000398 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
399 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000400 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
401 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402 */
403 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000405 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000406 int orglen;
407 char_u *buf;
408 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000409{
410 garray_T ga;
411 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000412 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413
414#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
415#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
417#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000418
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000419 /* Copy "str" into "buf" or allocated memory, unmodified. */
420 if (buf == NULL)
421 {
422 ga_init2(&ga, 1, 10);
423 if (ga_grow(&ga, len + 1) == FAIL)
424 return NULL;
425 mch_memmove(ga.ga_data, str, (size_t)len);
426 ga.ga_len = len;
427 }
428 else
429 {
430 if (len >= buflen) /* Ugly! */
431 len = buflen - 1;
432 mch_memmove(buf, str, (size_t)len);
433 }
434 if (buf == NULL)
435 GA_CHAR(len) = NUL;
436 else
437 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000438
439 /* Make each character lower case. */
440 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000441 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442 {
443#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000444 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445 {
446 if (enc_utf8)
447 {
448 int c, lc;
449
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000450 c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 lc = utf_tolower(c);
452 if (c != lc)
453 {
454 int ol = utf_char2len(c);
455 int nl = utf_char2len(lc);
456
457 /* If the byte length changes need to shift the following
458 * characters forward or backward. */
459 if (ol != nl)
460 {
461 if (nl > ol)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000462 {
463 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
464 : len + nl - ol >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000465 {
466 /* out of memory, keep old char */
467 lc = c;
468 nl = ol;
469 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000470 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 if (ol != nl)
472 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000473 if (buf == NULL)
474 {
475 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000476 STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000477 ga.ga_len += nl - ol;
478 }
479 else
480 {
481 mch_memmove(buf + i + nl, buf + i + ol,
482 STRLEN(buf + i + ol) + 1);
483 len += nl - ol;
484 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000485 }
486 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000487 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000488 }
489 }
490 /* skip to next multi-byte char */
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000491 i += (*mb_ptr2len_check)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492 }
493 else
494#endif
495 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000496 if (buf == NULL)
497 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
498 else
499 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 ++i;
501 }
502 }
503
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000504 if (buf == NULL)
505 return (char_u *)ga.ga_data;
506 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000507}
508#endif
509
510/*
511 * Catch 22: chartab[] can't be initialized before the options are
512 * initialized, and initializing options may cause transchar() to be called!
513 * When chartab_initialized == FALSE don't use chartab[].
514 * Does NOT work for multi-byte characters, c must be <= 255.
515 * Also doesn't work for the first byte of a multi-byte, "c" must be a
516 * character!
517 */
518static char_u transchar_buf[7];
519
520 char_u *
521transchar(c)
522 int c;
523{
524 int i;
525
526 i = 0;
527 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
528 {
529 transchar_buf[0] = '~';
530 transchar_buf[1] = '@';
531 i = 2;
532 c = K_SECOND(c);
533 }
534
535 if ((!chartab_initialized && (
536#ifdef EBCDIC
537 (c >= 64 && c < 255)
538#else
539 (c >= ' ' && c <= '~')
540#endif
541#ifdef FEAT_FKMAP
542 || F_ischar(c)
543#endif
544 )) || (c < 256 && vim_isprintc_strict(c)))
545 {
546 /* printable character */
547 transchar_buf[i] = c;
548 transchar_buf[i + 1] = NUL;
549 }
550 else
551 transchar_nonprint(transchar_buf + i, c);
552 return transchar_buf;
553}
554
555#if defined(FEAT_MBYTE) || defined(PROTO)
556/*
557 * Like transchar(), but called with a byte instead of a character. Checks
558 * for an illegal UTF-8 byte.
559 */
560 char_u *
561transchar_byte(c)
562 int c;
563{
564 if (enc_utf8 && c >= 0x80)
565 {
566 transchar_nonprint(transchar_buf, c);
567 return transchar_buf;
568 }
569 return transchar(c);
570}
571#endif
572
573/*
574 * Convert non-printable character to two or more printable characters in
575 * "buf[]". "buf" needs to be able to hold five bytes.
576 * Does NOT work for multi-byte characters, c must be <= 255.
577 */
578 void
579transchar_nonprint(buf, c)
580 char_u *buf;
581 int c;
582{
583 if (c == NL)
584 c = NUL; /* we use newline in place of a NUL */
585 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
586 c = NL; /* we use CR in place of NL in this case */
587
588 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
589 transchar_hex(buf, c);
590
591#ifdef EBCDIC
592 /* For EBCDIC only the characters 0-63 and 255 are not printable */
593 else if (CtrlChar(c) != 0 || c == DEL)
594#else
595 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
596#endif
597 {
598 buf[0] = '^';
599#ifdef EBCDIC
600 if (c == DEL)
601 buf[1] = '?'; /* DEL displayed as ^? */
602 else
603 buf[1] = CtrlChar(c);
604#else
605 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
606#endif
607
608 buf[2] = NUL;
609 }
610#ifdef FEAT_MBYTE
611 else if (enc_utf8 && c >= 0x80)
612 {
613 transchar_hex(buf, c);
614 }
615#endif
616#ifndef EBCDIC
617 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
618 {
619 buf[0] = '|';
620 buf[1] = c - 0x80;
621 buf[2] = NUL;
622 }
623#else
624 else if (c < 64)
625 {
626 buf[0] = '~';
627 buf[1] = MetaChar(c);
628 buf[2] = NUL;
629 }
630#endif
631 else /* 0x80 - 0x9f and 0xff */
632 {
633 /*
634 * TODO: EBCDIC I don't know what to do with this chars, so I display
635 * them as '~?' for now
636 */
637 buf[0] = '~';
638#ifdef EBCDIC
639 buf[1] = '?'; /* 0xff displayed as ~? */
640#else
641 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
642#endif
643 buf[2] = NUL;
644 }
645}
646
647 void
648transchar_hex(buf, c)
649 char_u *buf;
650 int c;
651{
652 int i = 0;
653
654 buf[0] = '<';
655#ifdef FEAT_MBYTE
656 if (c > 255)
657 {
658 buf[++i] = nr2hex((unsigned)c >> 12);
659 buf[++i] = nr2hex((unsigned)c >> 8);
660 }
661#endif
662 buf[++i] = nr2hex((unsigned)c >> 4);
663 buf[++i] = nr2hex(c);
664 buf[++i] = '>';
665 buf[++i] = NUL;
666}
667
668/*
669 * Convert the lower 4 bits of byte "c" to its hex character.
670 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
671 * function key 1.
672 */
673 static int
674nr2hex(c)
675 int c;
676{
677 if ((c & 0xf) <= 9)
678 return (c & 0xf) + '0';
679 return (c & 0xf) - 10 + 'a';
680}
681
682/*
683 * Return number of display cells occupied by byte "b".
684 * Caller must make sure 0 <= b <= 255.
685 * For multi-byte mode "b" must be the first byte of a character.
686 * A TAB is counted as two cells: "^I".
687 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
688 * cells depends on further bytes.
689 */
690 int
691byte2cells(b)
692 int b;
693{
694#ifdef FEAT_MBYTE
695 if (enc_utf8 && b >= 0x80)
696 return 0;
697#endif
698 return (chartab[b] & CT_CELL_MASK);
699}
700
701/*
702 * Return number of display cells occupied by character "c".
703 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
704 * A TAB is counted as two cells: "^I" or four: "<09>".
705 */
706 int
707char2cells(c)
708 int c;
709{
710 if (IS_SPECIAL(c))
711 return char2cells(K_SECOND(c)) + 2;
712#ifdef FEAT_MBYTE
713 if (c >= 0x80)
714 {
715 /* UTF-8: above 0x80 need to check the value */
716 if (enc_utf8)
717 return utf_char2cells(c);
718 /* DBCS: double-byte means double-width, except for euc-jp with first
719 * byte 0x8e */
720 if (enc_dbcs != 0 && c >= 0x100)
721 {
722 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
723 return 1;
724 return 2;
725 }
726 }
727#endif
728 return (chartab[c & 0xff] & CT_CELL_MASK);
729}
730
731/*
732 * Return number of display cells occupied by character at "*p".
733 * A TAB is counted as two cells: "^I" or four: "<09>".
734 */
735 int
736ptr2cells(p)
737 char_u *p;
738{
739#ifdef FEAT_MBYTE
740 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
741 if (enc_utf8 && *p >= 0x80)
742 return utf_ptr2cells(p);
743 /* For DBCS we can tell the cell count from the first byte. */
744#endif
745 return (chartab[*p] & CT_CELL_MASK);
746}
747
748/*
749 * Return the number of characters string "s" will take on the screen,
750 * counting TABs as two characters: "^I".
751 */
752 int
753vim_strsize(s)
754 char_u *s;
755{
756 return vim_strnsize(s, (int)MAXCOL);
757}
758
759/*
760 * Return the number of characters string "s[len]" will take on the screen,
761 * counting TABs as two characters: "^I".
762 */
763 int
764vim_strnsize(s, len)
765 char_u *s;
766 int len;
767{
768 int size = 0;
769
770 while (*s != NUL && --len >= 0)
771 {
772#ifdef FEAT_MBYTE
773 if (has_mbyte)
774 {
775 int l = (*mb_ptr2len_check)(s);
776
777 size += ptr2cells(s);
778 s += l;
779 len -= l - 1;
780 }
781 else
782#endif
783 size += byte2cells(*s++);
784 }
785 return size;
786}
787
788/*
789 * Return the number of characters 'c' will take on the screen, taking
790 * into account the size of a tab.
791 * Use a define to make it fast, this is used very often!!!
792 * Also see getvcol() below.
793 */
794
795#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
796 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
797 { \
798 int ts; \
799 ts = (buf)->b_p_ts; \
800 return (int)(ts - (col % ts)); \
801 } \
802 else \
803 return ptr2cells(p);
804
805#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
806 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
807 int
808chartabsize(p, col)
809 char_u *p;
810 colnr_T col;
811{
812 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
813}
814#endif
815
816#ifdef FEAT_LINEBREAK
817 static int
818win_chartabsize(wp, p, col)
819 win_T *wp;
820 char_u *p;
821 colnr_T col;
822{
823 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
824}
825#endif
826
827/*
828 * return the number of characters the string 's' will take on the screen,
829 * taking into account the size of a tab
830 */
831 int
832linetabsize(s)
833 char_u *s;
834{
835 colnr_T col = 0;
836
837 while (*s != NUL)
838 col += lbr_chartabsize_adv(&s, col);
839 return (int)col;
840}
841
842/*
843 * Like linetabsize(), but for a given window instead of the current one.
844 */
845 int
846win_linetabsize(wp, p, len)
847 win_T *wp;
848 char_u *p;
849 colnr_T len;
850{
851 colnr_T col = 0;
852 char_u *s;
853
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000854 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000856 return (int)col;
857}
858
859/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000860 * Return TRUE if 'c' is a normal identifier character:
861 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000862 */
863 int
864vim_isIDc(c)
865 int c;
866{
867 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
868}
869
870/*
871 * return TRUE if 'c' is a keyword character: Letters and characters from
872 * 'iskeyword' option for current buffer.
873 * For multi-byte characters mb_get_class() is used (builtin rules).
874 */
875 int
876vim_iswordc(c)
877 int c;
878{
879#ifdef FEAT_MBYTE
880 if (c >= 0x100)
881 {
882 if (enc_dbcs != 0)
883 return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
884 if (enc_utf8)
885 return utf_class(c) >= 2;
886 }
887#endif
888 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
889}
890
891/*
892 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
893 */
894 int
895vim_iswordp(p)
896 char_u *p;
897{
898#ifdef FEAT_MBYTE
899 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
900 return mb_get_class(p) >= 2;
901#endif
902 return GET_CHARTAB(curbuf, *p) != 0;
903}
904
905#if defined(FEAT_SYN_HL) || defined(PROTO)
906 int
907vim_iswordc_buf(p, buf)
908 char_u *p;
909 buf_T *buf;
910{
911# ifdef FEAT_MBYTE
912 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
913 return mb_get_class(p) >= 2;
914# endif
915 return (GET_CHARTAB(buf, *p) != 0);
916}
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000917
918static char spell_chartab[256];
919
920/*
921 * Init the chartab used for spelling. Only depends on 'encoding'.
922 * Called once while starting up and when 'encoding' was changed.
923 * Unfortunately, we can't use isalpha() here, since the current locale may
924 * differ from 'encoding'.
925 */
926 void
927init_spell_chartab()
928{
929 int i;
930
931 /* ASCII is always the same, no matter what 'encoding' is used.
932 * EBCDIC is not supported! */
933 for (i = 0; i < '0'; ++i)
934 spell_chartab[i] = FALSE;
935 /* We include numbers. A word shouldn't start with a number, but handling
936 * that is done separately. */
937 for ( ; i <= '9'; ++i)
938 spell_chartab[i] = TRUE;
939 for ( ; i < 'A'; ++i)
940 spell_chartab[i] = FALSE;
941 for ( ; i <= 'Z'; ++i)
942 spell_chartab[i] = TRUE;
943 for ( ; i < 'a'; ++i)
944 spell_chartab[i] = FALSE;
945 for ( ; i <= 'z'; ++i)
946 spell_chartab[i] = TRUE;
947#ifdef FEAT_MBYTE
948 if (enc_dbcs)
949 {
950 /* DBCS: assume double-wide characters are word characters. */
951 for ( ; i <= 255; ++i)
952 if (MB_BYTE2LEN(i) == 2)
953 spell_chartab[i] = TRUE;
954 else
955 spell_chartab[i] = FALSE;
956 }
957 else if (STRCMP(p_enc, "cp850") == 0)
958#endif
959#if defined(MSDOS) || defined(FEAT_MBYTE)
960 {
961 /* cp850, MS-DOS */
962 for ( ; i < 128; ++i)
963 spell_chartab[i] = FALSE;
964 for ( ; i <= 0x9a; ++i)
965 spell_chartab[i] = TRUE;
966 for ( ; i < 0xa0; ++i)
967 spell_chartab[i] = FALSE;
968 for ( ; i <= 0xa5; ++i)
969 spell_chartab[i] = TRUE;
970 for ( ; i <= 255; ++i)
971 spell_chartab[i] = FALSE;
972 }
973#endif
974#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000975 else if (STRCMP(p_enc, "iso-8859-2") == 0)
976 {
977 /* latin2 */
978 for ( ; i <= 0xa0; ++i)
979 spell_chartab[i] = FALSE;
980 for ( ; i <= 255; ++i)
981 spell_chartab[i] = TRUE;
982 spell_chartab[0xa4] = FALSE; /* currency sign */
983 spell_chartab[0xa7] = FALSE; /* paragraph sign */
984 spell_chartab[0xad] = FALSE; /* dash */
985 spell_chartab[0xb0] = FALSE; /* degrees */
986 spell_chartab[0xf7] = FALSE; /* divide-by */
987 }
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000988 else
989#endif
990#if defined(FEAT_MBYTE) || !defined(MSDOS)
991 {
992 /* Rough guess: anything we don't recognize assumes word characters
993 * like latin1. */
994 for ( ; i < 0xc0; ++i)
995 spell_chartab[i] = FALSE;
996 for ( ; i <= 255; ++i)
997 spell_chartab[i] = TRUE;
998# ifdef FEAT_MBYTE
999 if (STRCMP(p_enc, "latin1") == 0)
1000# endif
1001 spell_chartab[0xf7] = FALSE; /* divide-by */
1002 }
1003#endif
1004}
1005
1006/*
1007 * Return TRUE if "p" points to a word character.
1008 * This only depends on 'encoding', not on 'iskeyword'.
1009 */
1010 int
1011spell_iswordc(p)
1012 char_u *p;
1013{
1014# ifdef FEAT_MBYTE
1015 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
1016 return mb_get_class(p) >= 2;
1017# endif
1018 return spell_chartab[*p];
1019}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001020#endif
1021
1022/*
1023 * return TRUE if 'c' is a valid file-name character
1024 * Assume characters above 0x100 are valid (multi-byte).
1025 */
1026 int
1027vim_isfilec(c)
1028 int c;
1029{
1030 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
1031}
1032
1033/*
1034 * return TRUE if 'c' is a printable character
1035 * Assume characters above 0x100 are printable (multi-byte), except for
1036 * Unicode.
1037 */
1038 int
1039vim_isprintc(c)
1040 int c;
1041{
1042#ifdef FEAT_MBYTE
1043 if (enc_utf8 && c >= 0x100)
1044 return utf_printable(c);
1045#endif
1046 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1047}
1048
1049/*
1050 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
1051 * byte of a double-byte character.
1052 */
1053 int
1054vim_isprintc_strict(c)
1055 int c;
1056{
1057#ifdef FEAT_MBYTE
1058 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
1059 return FALSE;
1060 if (enc_utf8 && c >= 0x100)
1061 return utf_printable(c);
1062#endif
1063 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1064}
1065
1066/*
1067 * like chartabsize(), but also check for line breaks on the screen
1068 */
1069 int
1070lbr_chartabsize(s, col)
1071 unsigned char *s;
1072 colnr_T col;
1073{
1074#ifdef FEAT_LINEBREAK
1075 if (!curwin->w_p_lbr && *p_sbr == NUL)
1076 {
1077#endif
1078#ifdef FEAT_MBYTE
1079 if (curwin->w_p_wrap)
1080 return win_nolbr_chartabsize(curwin, s, col, NULL);
1081#endif
1082 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1083#ifdef FEAT_LINEBREAK
1084 }
1085 return win_lbr_chartabsize(curwin, s, col, NULL);
1086#endif
1087}
1088
1089/*
1090 * Call lbr_chartabsize() and advance the pointer.
1091 */
1092 int
1093lbr_chartabsize_adv(s, col)
1094 char_u **s;
1095 colnr_T col;
1096{
1097 int retval;
1098
1099 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001100 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001101 return retval;
1102}
1103
1104/*
1105 * This function is used very often, keep it fast!!!!
1106 *
1107 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1108 * string at start of line. Warning: *headp is only set if it's a non-zero
1109 * value, init to 0 before calling.
1110 */
1111/*ARGSUSED*/
1112 int
1113win_lbr_chartabsize(wp, s, col, headp)
1114 win_T *wp;
1115 char_u *s;
1116 colnr_T col;
1117 int *headp;
1118{
1119#ifdef FEAT_LINEBREAK
1120 int c;
1121 int size;
1122 colnr_T col2;
1123 colnr_T colmax;
1124 int added;
1125# ifdef FEAT_MBYTE
1126 int mb_added = 0;
1127# else
1128# define mb_added 0
1129# endif
1130 int numberextra;
1131 char_u *ps;
1132 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001133 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001134
1135 /*
1136 * No 'linebreak' and 'showbreak': return quickly.
1137 */
1138 if (!wp->w_p_lbr && *p_sbr == NUL)
1139#endif
1140 {
1141#ifdef FEAT_MBYTE
1142 if (wp->w_p_wrap)
1143 return win_nolbr_chartabsize(wp, s, col, headp);
1144#endif
1145 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1146 }
1147
1148#ifdef FEAT_LINEBREAK
1149 /*
1150 * First get normal size, without 'linebreak'
1151 */
1152 size = win_chartabsize(wp, s, col);
1153 c = *s;
1154
1155 /*
1156 * If 'linebreak' set check at a blank before a non-blank if the line
1157 * needs a break here
1158 */
1159 if (wp->w_p_lbr
1160 && vim_isbreak(c)
1161 && !vim_isbreak(s[1])
1162 && !wp->w_p_list
1163 && wp->w_p_wrap
1164# ifdef FEAT_VERTSPLIT
1165 && wp->w_width != 0
1166# endif
1167 )
1168 {
1169 /*
1170 * Count all characters from first non-blank after a blank up to next
1171 * non-blank after a blank.
1172 */
1173 numberextra = win_col_off(wp);
1174 col2 = col;
1175 colmax = W_WIDTH(wp) - numberextra;
1176 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001177 {
1178 n = colmax + win_col_off2(wp);
1179 if (n > 0)
1180 colmax += (((col - colmax) / n) + 1) * n;
1181 }
1182
Bram Moolenaar071d4272004-06-13 20:20:40 +00001183 for (;;)
1184 {
1185 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001186 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001187 c = *s;
1188 if (!(c != NUL
1189 && (vim_isbreak(c)
1190 || (!vim_isbreak(c)
1191 && (col2 == col || !vim_isbreak(*ps))))))
1192 break;
1193
1194 col2 += win_chartabsize(wp, s, col2);
1195 if (col2 >= colmax) /* doesn't fit */
1196 {
1197 size = colmax - col;
1198 tab_corr = FALSE;
1199 break;
1200 }
1201 }
1202 }
1203# ifdef FEAT_MBYTE
1204 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1205 && wp->w_p_wrap && in_win_border(wp, col))
1206 {
1207 ++size; /* Count the ">" in the last column. */
1208 mb_added = 1;
1209 }
1210# endif
1211
1212 /*
1213 * May have to add something for 'showbreak' string at start of line
1214 * Set *headp to the size of what we add.
1215 */
1216 added = 0;
1217 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1218 {
1219 numberextra = win_col_off(wp);
1220 col += numberextra + mb_added;
1221 if (col >= (colnr_T)W_WIDTH(wp))
1222 {
1223 col -= W_WIDTH(wp);
1224 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1225 if (numberextra > 0)
1226 col = col % numberextra;
1227 }
1228 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1229 {
1230 added = vim_strsize(p_sbr);
1231 if (tab_corr)
1232 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1233 else
1234 size += added;
1235 if (col != 0)
1236 added = 0;
1237 }
1238 }
1239 if (headp != NULL)
1240 *headp = added + mb_added;
1241 return size;
1242#endif
1243}
1244
1245#if defined(FEAT_MBYTE) || defined(PROTO)
1246/*
1247 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1248 * 'wrap' is on. This means we need to check for a double-byte character that
1249 * doesn't fit at the end of the screen line.
1250 */
1251 static int
1252win_nolbr_chartabsize(wp, s, col, headp)
1253 win_T *wp;
1254 char_u *s;
1255 colnr_T col;
1256 int *headp;
1257{
1258 int n;
1259
1260 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1261 {
1262 n = wp->w_buffer->b_p_ts;
1263 return (int)(n - (col % n));
1264 }
1265 n = ptr2cells(s);
1266 /* Add one cell for a double-width character in the last column of the
1267 * window, displayed with a ">". */
1268 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1269 {
1270 if (headp != NULL)
1271 *headp = 1;
1272 return 3;
1273 }
1274 return n;
1275}
1276
1277/*
1278 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1279 * "wp".
1280 */
1281 int
1282in_win_border(wp, vcol)
1283 win_T *wp;
1284 colnr_T vcol;
1285{
1286 colnr_T width1; /* width of first line (after line number) */
1287 colnr_T width2; /* width of further lines */
1288
1289#ifdef FEAT_VERTSPLIT
1290 if (wp->w_width == 0) /* there is no border */
1291 return FALSE;
1292#endif
1293 width1 = W_WIDTH(wp) - win_col_off(wp);
1294 if (vcol < width1 - 1)
1295 return FALSE;
1296 if (vcol == width1 - 1)
1297 return TRUE;
1298 width2 = width1 + win_col_off2(wp);
1299 return ((vcol - width1) % width2 == width2 - 1);
1300}
1301#endif /* FEAT_MBYTE */
1302
1303/*
1304 * Get virtual column number of pos.
1305 * start: on the first position of this character (TAB, ctrl)
1306 * cursor: where the cursor is on this character (first char, except for TAB)
1307 * end: on the last position of this character (TAB, ctrl)
1308 *
1309 * This is used very often, keep it fast!
1310 */
1311 void
1312getvcol(wp, pos, start, cursor, end)
1313 win_T *wp;
1314 pos_T *pos;
1315 colnr_T *start;
1316 colnr_T *cursor;
1317 colnr_T *end;
1318{
1319 colnr_T vcol;
1320 char_u *ptr; /* points to current char */
1321 char_u *posptr; /* points to char at pos->col */
1322 int incr;
1323 int head;
1324 int ts = wp->w_buffer->b_p_ts;
1325 int c;
1326
1327 vcol = 0;
1328 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1329 posptr = ptr + pos->col;
1330
1331 /*
1332 * This function is used very often, do some speed optimizations.
1333 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1334 * Also use this when 'list' is set but tabs take their normal size.
1335 */
1336 if ((!wp->w_p_list || lcs_tab1 != NUL)
1337#ifdef FEAT_LINEBREAK
1338 && !wp->w_p_lbr && *p_sbr == NUL
1339#endif
1340 )
1341 {
1342#ifndef FEAT_MBYTE
1343 head = 0;
1344#endif
1345 for (;;)
1346 {
1347#ifdef FEAT_MBYTE
1348 head = 0;
1349#endif
1350 c = *ptr;
1351 /* make sure we don't go past the end of the line */
1352 if (c == NUL)
1353 {
1354 incr = 1; /* NUL at end of line only takes one column */
1355 break;
1356 }
1357 /* A tab gets expanded, depending on the current column */
1358 if (c == TAB)
1359 incr = ts - (vcol % ts);
1360 else
1361 {
1362#ifdef FEAT_MBYTE
1363 if (has_mbyte)
1364 {
1365 /* For utf-8, if the byte is >= 0x80, need to look at
1366 * further bytes to find the cell width. */
1367 if (enc_utf8 && c >= 0x80)
1368 incr = utf_ptr2cells(ptr);
1369 else
1370 incr = CHARSIZE(c);
1371
1372 /* If a double-cell char doesn't fit at the end of a line
1373 * it wraps to the next line, it's like this char is three
1374 * cells wide. */
1375 if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1376 {
1377 ++incr;
1378 head = 1;
1379 }
1380 }
1381 else
1382#endif
1383 incr = CHARSIZE(c);
1384 }
1385
1386 if (ptr >= posptr) /* character at pos->col */
1387 break;
1388
1389 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001390 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001391 }
1392 }
1393 else
1394 {
1395 for (;;)
1396 {
1397 /* A tab gets expanded, depending on the current column */
1398 head = 0;
1399 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1400 /* make sure we don't go past the end of the line */
1401 if (*ptr == NUL)
1402 {
1403 incr = 1; /* NUL at end of line only takes one column */
1404 break;
1405 }
1406
1407 if (ptr >= posptr) /* character at pos->col */
1408 break;
1409
1410 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001411 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001412 }
1413 }
1414 if (start != NULL)
1415 *start = vcol + head;
1416 if (end != NULL)
1417 *end = vcol + incr - 1;
1418 if (cursor != NULL)
1419 {
1420 if (*ptr == TAB
1421 && (State & NORMAL)
1422 && !wp->w_p_list
1423 && !virtual_active()
1424#ifdef FEAT_VISUAL
1425 && !(VIsual_active
1426 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1427#endif
1428 )
1429 *cursor = vcol + incr - 1; /* cursor at end */
1430 else
1431 *cursor = vcol + head; /* cursor at start */
1432 }
1433}
1434
1435/*
1436 * Get virtual cursor column in the current window, pretending 'list' is off.
1437 */
1438 colnr_T
1439getvcol_nolist(posp)
1440 pos_T *posp;
1441{
1442 int list_save = curwin->w_p_list;
1443 colnr_T vcol;
1444
1445 curwin->w_p_list = FALSE;
1446 getvcol(curwin, posp, NULL, &vcol, NULL);
1447 curwin->w_p_list = list_save;
1448 return vcol;
1449}
1450
1451#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1452/*
1453 * Get virtual column in virtual mode.
1454 */
1455 void
1456getvvcol(wp, pos, start, cursor, end)
1457 win_T *wp;
1458 pos_T *pos;
1459 colnr_T *start;
1460 colnr_T *cursor;
1461 colnr_T *end;
1462{
1463 colnr_T col;
1464 colnr_T coladd;
1465 colnr_T endadd;
1466# ifdef FEAT_MBYTE
1467 char_u *ptr;
1468# endif
1469
1470 if (virtual_active())
1471 {
1472 /* For virtual mode, only want one value */
1473 getvcol(wp, pos, &col, NULL, NULL);
1474
1475 coladd = pos->coladd;
1476 endadd = 0;
1477# ifdef FEAT_MBYTE
1478 /* Cannot put the cursor on part of a wide character. */
1479 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1480 if (pos->col < STRLEN(ptr))
1481 {
1482 int c = (*mb_ptr2char)(ptr + pos->col);
1483
1484 if (c != TAB && vim_isprintc(c))
1485 {
1486 endadd = char2cells(c) - 1;
1487 if (coladd >= endadd)
1488 coladd -= endadd;
1489 else
1490 coladd = 0;
1491 }
1492 }
1493# endif
1494 col += coladd;
1495 if (start != NULL)
1496 *start = col;
1497 if (cursor != NULL)
1498 *cursor = col;
1499 if (end != NULL)
1500 *end = col + endadd;
1501 }
1502 else
1503 getvcol(wp, pos, start, cursor, end);
1504}
1505#endif
1506
1507#if defined(FEAT_VISUAL) || defined(PROTO)
1508/*
1509 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1510 * Used for Visual block mode.
1511 */
1512 void
1513getvcols(wp, pos1, pos2, left, right)
1514 win_T *wp;
1515 pos_T *pos1, *pos2;
1516 colnr_T *left, *right;
1517{
1518 colnr_T from1, from2, to1, to2;
1519
1520 if (ltp(pos1, pos2))
1521 {
1522 getvvcol(wp, pos1, &from1, NULL, &to1);
1523 getvvcol(wp, pos2, &from2, NULL, &to2);
1524 }
1525 else
1526 {
1527 getvvcol(wp, pos2, &from1, NULL, &to1);
1528 getvvcol(wp, pos1, &from2, NULL, &to2);
1529 }
1530 if (from2 < from1)
1531 *left = from2;
1532 else
1533 *left = from1;
1534 if (to2 > to1)
1535 {
1536 if (*p_sel == 'e' && from2 - 1 >= to1)
1537 *right = from2 - 1;
1538 else
1539 *right = to2;
1540 }
1541 else
1542 *right = to1;
1543}
1544#endif
1545
1546/*
1547 * skipwhite: skip over ' ' and '\t'.
1548 */
1549 char_u *
1550skipwhite(p)
1551 char_u *p;
1552{
1553 while (vim_iswhite(*p)) /* skip to next non-white */
1554 ++p;
1555 return p;
1556}
1557
1558/*
1559 * skipdigits: skip over digits;
1560 */
1561 char_u *
1562skipdigits(p)
1563 char_u *p;
1564{
1565 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1566 ++p;
1567 return p;
1568}
1569
1570/*
1571 * Variant of isdigit() that can handle characters > 0x100.
1572 * We don't use isdigit() here, because on some systems it also considers
1573 * superscript 1 to be a digit.
1574 * Use the VIM_ISDIGIT() macro for simple arguments.
1575 */
1576 int
1577vim_isdigit(c)
1578 int c;
1579{
1580 return (c >= '0' && c <= '9');
1581}
1582
1583/*
1584 * Variant of isxdigit() that can handle characters > 0x100.
1585 * We don't use isxdigit() here, because on some systems it also considers
1586 * superscript 1 to be a digit.
1587 */
1588 int
1589vim_isxdigit(c)
1590 int c;
1591{
1592 return (c >= '0' && c <= '9')
1593 || (c >= 'a' && c <= 'f')
1594 || (c >= 'A' && c <= 'F');
1595}
1596
1597/*
1598 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1599 */
1600 char_u *
1601skiptowhite(p)
1602 char_u *p;
1603{
1604 while (*p != ' ' && *p != '\t' && *p != NUL)
1605 ++p;
1606 return p;
1607}
1608
1609#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1610 || defined(PROTO)
1611/*
1612 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1613 */
1614 char_u *
1615skiptowhite_esc(p)
1616 char_u *p;
1617{
1618 while (*p != ' ' && *p != '\t' && *p != NUL)
1619 {
1620 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1621 ++p;
1622 ++p;
1623 }
1624 return p;
1625}
1626#endif
1627
1628/*
1629 * Getdigits: Get a number from a string and skip over it.
1630 * Note: the argument is a pointer to a char_u pointer!
1631 */
1632 long
1633getdigits(pp)
1634 char_u **pp;
1635{
1636 char_u *p;
1637 long retval;
1638
1639 p = *pp;
1640 retval = atol((char *)p);
1641 if (*p == '-') /* skip negative sign */
1642 ++p;
1643 p = skipdigits(p); /* skip to next non-digit */
1644 *pp = p;
1645 return retval;
1646}
1647
1648/*
1649 * Return TRUE if "lbuf" is empty or only contains blanks.
1650 */
1651 int
1652vim_isblankline(lbuf)
1653 char_u *lbuf;
1654{
1655 char_u *p;
1656
1657 p = skipwhite(lbuf);
1658 return (*p == NUL || *p == '\r' || *p == '\n');
1659}
1660
1661/*
1662 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001663 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001664 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1665 * 0 decimal
1666 * '0' octal
1667 * 'X' hex
1668 * 'x' hex
1669 * If "len" is not NULL, the length of the number in characters is returned.
1670 * If "nptr" is not NULL, the signed result is returned in it.
1671 * If "unptr" is not NULL, the unsigned result is returned in it.
1672 */
1673 void
1674vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1675 char_u *start;
1676 int *hexp; /* return: type of number 0 = decimal, 'x'
1677 or 'X' is hex, '0' = octal */
1678 int *len; /* return: detected length of number */
1679 int dooct; /* recognize octal number */
1680 int dohex; /* recognize hex number */
1681 long *nptr; /* return: signed result */
1682 unsigned long *unptr; /* return: unsigned result */
1683{
1684 char_u *ptr = start;
1685 int hex = 0; /* default is decimal */
1686 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001687 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001688 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001689
1690 if (ptr[0] == '-')
1691 {
1692 negative = TRUE;
1693 ++ptr;
1694 }
1695
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001696 /* Recognize hex and octal. */
1697 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001698 {
1699 hex = ptr[1];
1700 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1701 ptr += 2; /* hexadecimal */
1702 else
1703 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001704 hex = 0; /* default is decimal */
1705 if (dooct)
1706 {
1707 /* Don't interpret "0", "08" or "0129" as octal. */
1708 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1709 {
1710 if (ptr[n] > '7')
1711 {
1712 hex = 0; /* can't be octal */
1713 break;
1714 }
1715 if (ptr[n] > '0')
1716 hex = '0'; /* assume octal */
1717 }
1718 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001719 }
1720 }
1721
1722 /*
1723 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1724 */
1725 if (hex)
1726 {
1727 if (hex == '0')
1728 {
1729 /* octal */
1730 while ('0' <= *ptr && *ptr <= '7')
1731 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001732 un = 8 * un + (unsigned long)(*ptr - '0');
1733 ++ptr;
1734 }
1735 }
1736 else
1737 {
1738 /* hex */
1739 while (vim_isxdigit(*ptr))
1740 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001741 un = 16 * un + (unsigned long)hex2nr(*ptr);
1742 ++ptr;
1743 }
1744 }
1745 }
1746 else
1747 {
1748 /* decimal */
1749 while (VIM_ISDIGIT(*ptr))
1750 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001751 un = 10 * un + (unsigned long)(*ptr - '0');
1752 ++ptr;
1753 }
1754 }
1755
Bram Moolenaar071d4272004-06-13 20:20:40 +00001756 if (hexp != NULL)
1757 *hexp = hex;
1758 if (len != NULL)
1759 *len = (int)(ptr - start);
1760 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001761 {
1762 if (negative) /* account for leading '-' for decimal numbers */
1763 *nptr = -(long)un;
1764 else
1765 *nptr = (long)un;
1766 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001767 if (unptr != NULL)
1768 *unptr = un;
1769}
1770
1771/*
1772 * Return the value of a single hex character.
1773 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1774 */
1775 int
1776hex2nr(c)
1777 int c;
1778{
1779 if (c >= 'a' && c <= 'f')
1780 return c - 'a' + 10;
1781 if (c >= 'A' && c <= 'F')
1782 return c - 'A' + 10;
1783 return c - '0';
1784}
1785
1786#if defined(FEAT_TERMRESPONSE) \
1787 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1788/*
1789 * Convert two hex characters to a byte.
1790 * Return -1 if one of the characters is not hex.
1791 */
1792 int
1793hexhex2nr(p)
1794 char_u *p;
1795{
1796 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1797 return -1;
1798 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1799}
1800#endif
1801
1802/*
1803 * Return TRUE if "str" starts with a backslash that should be removed.
1804 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1805 * backslash is not a normal file name character.
1806 * '$' is a valid file name character, we don't remove the backslash before
1807 * it. This means it is not possible to use an environment variable after a
1808 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1809 * Although "\ name" is valid, the backslash in "Program\ files" must be
1810 * removed. Assume a file name doesn't start with a space.
1811 * For multi-byte names, never remove a backslash before a non-ascii
1812 * character, assume that all multi-byte characters are valid file name
1813 * characters.
1814 */
1815 int
1816rem_backslash(str)
1817 char_u *str;
1818{
1819#ifdef BACKSLASH_IN_FILENAME
1820 return (str[0] == '\\'
1821# ifdef FEAT_MBYTE
1822 && str[1] < 0x80
1823# endif
1824 && (str[1] == ' '
1825 || (str[1] != NUL
1826 && str[1] != '*'
1827 && str[1] != '?'
1828 && !vim_isfilec(str[1]))));
1829#else
1830 return (str[0] == '\\' && str[1] != NUL);
1831#endif
1832}
1833
1834/*
1835 * Halve the number of backslashes in a file name argument.
1836 * For MS-DOS we only do this if the character after the backslash
1837 * is not a normal file character.
1838 */
1839 void
1840backslash_halve(p)
1841 char_u *p;
1842{
1843 for ( ; *p; ++p)
1844 if (rem_backslash(p))
1845 STRCPY(p, p + 1);
1846}
1847
1848/*
1849 * backslash_halve() plus save the result in allocated memory.
1850 */
1851 char_u *
1852backslash_halve_save(p)
1853 char_u *p;
1854{
1855 char_u *res;
1856
1857 res = vim_strsave(p);
1858 if (res == NULL)
1859 return p;
1860 backslash_halve(res);
1861 return res;
1862}
1863
1864#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1865/*
1866 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1867 * The first 64 entries have been added to map control characters defined in
1868 * ascii.h
1869 */
1870static char_u ebcdic2ascii_tab[256] =
1871{
1872 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1873 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1874 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1875 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1876 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1877 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1878 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1879 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1880 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1881 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1882 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1883 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1884 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1885 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1886 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1887 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1888 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1889 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1890 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1891 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1892 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1893 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1894 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1895 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1896 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1897 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1898 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1899 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1900 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1901 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1902 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1903 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1904};
1905
1906/*
1907 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1908 * wanting 7-bit ASCII characters out the other end.
1909 */
1910 void
1911ebcdic2ascii(buffer, len)
1912 char_u *buffer;
1913 int len;
1914{
1915 int i;
1916
1917 for (i = 0; i < len; i++)
1918 buffer[i] = ebcdic2ascii_tab[buffer[i]];
1919}
1920#endif