blob: bd319ffe7cec3d3b1fd3eb19e71c9b15a14e6eb9 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
17static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18#endif
19
20static int nr2hex __ARGS((int c));
21
22static int chartab_initialized = FALSE;
23
24/* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
29
30/*
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
33 *
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
36 *
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
43 *
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
52 *
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
55 */
56 int
57init_chartab()
58{
59 return buf_init_chartab(curbuf, TRUE);
60}
61
62 int
63buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
66{
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
73
74 if (global)
75 {
76 /*
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
80 *
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
83 */
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87#ifdef EBCDIC
88 while (c < 255)
89#else
90 while (c <= '~')
91#endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93#ifdef FEAT_FKMAP
94 if (p_altkeymap)
95 {
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
98 }
99#endif
100 while (c < 256)
101 {
102#ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113#endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
116 }
117
118#ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125#endif
126 }
127
128 /*
129 * Init word char flags all to FALSE
130 */
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
135 {
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
139 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140#endif
141
142#ifdef FEAT_LISP
143 /*
144 * In lisp mode the '-' character is included in keywords.
145 */
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148#endif
149
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
153 */
154 for (i = global ? 0 : 3; i <= 3; ++i)
155 {
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
164
165 while (*p)
166 {
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
170 {
171 tilde = TRUE;
172 ++p;
173 }
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
185 c2 = *p++;
186 }
187 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
191 if (c2 == -1) /* not a range */
192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
210 if (!do_isalpha || isalpha(c)
211#ifdef FEAT_FKMAP
212 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
213#endif
214 )
215 {
216 if (i == 0) /* (re)set ID flag */
217 {
218 if (tilde)
219 chartab[c] &= ~CT_ID_CHAR;
220 else
221 chartab[c] |= CT_ID_CHAR;
222 }
223 else if (i == 1) /* (re)set printable */
224 {
225 if ((c < ' '
226#ifndef EBCDIC
227 || c > '~'
228#endif
229#ifdef FEAT_FKMAP
230 || (p_altkeymap
231 && (F_isalpha(c) || F_isdigit(c)))
232#endif
233 )
234#ifdef FEAT_MBYTE
235 /* For double-byte we keep the cell width, so
236 * that we can detect it from the first byte. */
237 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
238#endif
239 )
240 {
241 if (tilde)
242 {
243 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
244 + ((dy_flags & DY_UHEX) ? 4 : 2);
245 chartab[c] &= ~CT_PRINT_CHAR;
246 }
247 else
248 {
249 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
250 chartab[c] |= CT_PRINT_CHAR;
251 }
252 }
253 }
254 else if (i == 2) /* (re)set fname flag */
255 {
256 if (tilde)
257 chartab[c] &= ~CT_FNAME_CHAR;
258 else
259 chartab[c] |= CT_FNAME_CHAR;
260 }
261 else /* i == 3 */ /* (re)set keyword flag */
262 {
263 if (tilde)
264 RESET_CHARTAB(buf, c);
265 else
266 SET_CHARTAB(buf, c);
267 }
268 }
269 ++c;
270 }
271 p = skip_to_option_part(p);
272 }
273 }
274 chartab_initialized = TRUE;
275 return OK;
276}
277
278/*
279 * Translate any special characters in buf[bufsize] in-place.
280 * The result is a string with only printable characters, but if there is not
281 * enough room, not all characters will be translated.
282 */
283 void
284trans_characters(buf, bufsize)
285 char_u *buf;
286 int bufsize;
287{
288 int len; /* length of string needing translation */
289 int room; /* room in buffer after string */
290 char_u *trs; /* translated character */
291 int trs_len; /* length of trs[] */
292
293 len = (int)STRLEN(buf);
294 room = bufsize - len;
295 while (*buf != 0)
296 {
297# ifdef FEAT_MBYTE
298 /* Assume a multi-byte character doesn't need translation. */
299 if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
300 len -= trs_len;
301 else
302# endif
303 {
304 trs = transchar_byte(*buf);
305 trs_len = (int)STRLEN(trs);
306 if (trs_len > 1)
307 {
308 room -= trs_len - 1;
309 if (room <= 0)
310 return;
311 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
312 }
313 mch_memmove(buf, trs, (size_t)trs_len);
314 --len;
315 }
316 buf += trs_len;
317 }
318}
319
320#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(PROTO)
321/*
322 * Translate a string into allocated memory, replacing special chars with
323 * printable chars. Returns NULL when out of memory.
324 */
325 char_u *
326transstr(s)
327 char_u *s;
328{
329 char_u *res;
330 char_u *p;
331#ifdef FEAT_MBYTE
332 int l, len, c;
333 char_u hexbuf[11];
334#endif
335
336#ifdef FEAT_MBYTE
337 if (has_mbyte)
338 {
339 /* Compute the length of the result, taking account of unprintable
340 * multi-byte characters. */
341 len = 0;
342 p = s;
343 while (*p != NUL)
344 {
345 if ((l = (*mb_ptr2len_check)(p)) > 1)
346 {
347 c = (*mb_ptr2char)(p);
348 p += l;
349 if (vim_isprintc(c))
350 len += l;
351 else
352 {
353 transchar_hex(hexbuf, c);
354 len += STRLEN(hexbuf);
355 }
356 }
357 else
358 {
359 l = byte2cells(*p++);
360 if (l > 0)
361 len += l;
362 else
363 len += 4; /* illegal byte sequence */
364 }
365 }
366 res = alloc((unsigned)(len + 1));
367 }
368 else
369#endif
370 res = alloc((unsigned)(vim_strsize(s) + 1));
371 if (res != NULL)
372 {
373 *res = NUL;
374 p = s;
375 while (*p != NUL)
376 {
377#ifdef FEAT_MBYTE
378 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
379 {
380 c = (*mb_ptr2char)(p);
381 if (vim_isprintc(c))
382 STRNCAT(res, p, l); /* append printable multi-byte char */
383 else
384 transchar_hex(res + STRLEN(res), c);
385 p += l;
386 }
387 else
388#endif
389 STRCAT(res, transchar_byte(*p++));
390 }
391 }
392 return res;
393}
394#endif
395
396#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
397/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000398 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
399 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000400 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
401 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402 */
403 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000405 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000406 int orglen;
407 char_u *buf;
408 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000409{
410 garray_T ga;
411 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000412 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413
414#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
415#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
417#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000418
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000419 /* Copy "str" into "buf" or allocated memory, unmodified. */
420 if (buf == NULL)
421 {
422 ga_init2(&ga, 1, 10);
423 if (ga_grow(&ga, len + 1) == FAIL)
424 return NULL;
425 mch_memmove(ga.ga_data, str, (size_t)len);
426 ga.ga_len = len;
427 }
428 else
429 {
430 if (len >= buflen) /* Ugly! */
431 len = buflen - 1;
432 mch_memmove(buf, str, (size_t)len);
433 }
434 if (buf == NULL)
435 GA_CHAR(len) = NUL;
436 else
437 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000438
439 /* Make each character lower case. */
440 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000441 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442 {
443#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000444 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445 {
446 if (enc_utf8)
447 {
448 int c, lc;
449
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000450 c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 lc = utf_tolower(c);
452 if (c != lc)
453 {
454 int ol = utf_char2len(c);
455 int nl = utf_char2len(lc);
456
457 /* If the byte length changes need to shift the following
458 * characters forward or backward. */
459 if (ol != nl)
460 {
461 if (nl > ol)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000462 {
463 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
464 : len + nl - ol >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000465 {
466 /* out of memory, keep old char */
467 lc = c;
468 nl = ol;
469 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000470 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 if (ol != nl)
472 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000473 if (buf == NULL)
474 {
475 mch_memmove(GA_PTR(i) + nl, GA_PTR(i) + ol,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000476 STRLEN(GA_PTR(i) + ol) + 1);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000477 ga.ga_len += nl - ol;
478 }
479 else
480 {
481 mch_memmove(buf + i + nl, buf + i + ol,
482 STRLEN(buf + i + ol) + 1);
483 len += nl - ol;
484 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000485 }
486 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000487 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000488 }
489 }
490 /* skip to next multi-byte char */
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000491 i += (*mb_ptr2len_check)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492 }
493 else
494#endif
495 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000496 if (buf == NULL)
497 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
498 else
499 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 ++i;
501 }
502 }
503
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000504 if (buf == NULL)
505 return (char_u *)ga.ga_data;
506 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000507}
508#endif
509
510/*
511 * Catch 22: chartab[] can't be initialized before the options are
512 * initialized, and initializing options may cause transchar() to be called!
513 * When chartab_initialized == FALSE don't use chartab[].
514 * Does NOT work for multi-byte characters, c must be <= 255.
515 * Also doesn't work for the first byte of a multi-byte, "c" must be a
516 * character!
517 */
518static char_u transchar_buf[7];
519
520 char_u *
521transchar(c)
522 int c;
523{
524 int i;
525
526 i = 0;
527 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
528 {
529 transchar_buf[0] = '~';
530 transchar_buf[1] = '@';
531 i = 2;
532 c = K_SECOND(c);
533 }
534
535 if ((!chartab_initialized && (
536#ifdef EBCDIC
537 (c >= 64 && c < 255)
538#else
539 (c >= ' ' && c <= '~')
540#endif
541#ifdef FEAT_FKMAP
542 || F_ischar(c)
543#endif
544 )) || (c < 256 && vim_isprintc_strict(c)))
545 {
546 /* printable character */
547 transchar_buf[i] = c;
548 transchar_buf[i + 1] = NUL;
549 }
550 else
551 transchar_nonprint(transchar_buf + i, c);
552 return transchar_buf;
553}
554
555#if defined(FEAT_MBYTE) || defined(PROTO)
556/*
557 * Like transchar(), but called with a byte instead of a character. Checks
558 * for an illegal UTF-8 byte.
559 */
560 char_u *
561transchar_byte(c)
562 int c;
563{
564 if (enc_utf8 && c >= 0x80)
565 {
566 transchar_nonprint(transchar_buf, c);
567 return transchar_buf;
568 }
569 return transchar(c);
570}
571#endif
572
573/*
574 * Convert non-printable character to two or more printable characters in
575 * "buf[]". "buf" needs to be able to hold five bytes.
576 * Does NOT work for multi-byte characters, c must be <= 255.
577 */
578 void
579transchar_nonprint(buf, c)
580 char_u *buf;
581 int c;
582{
583 if (c == NL)
584 c = NUL; /* we use newline in place of a NUL */
585 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
586 c = NL; /* we use CR in place of NL in this case */
587
588 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
589 transchar_hex(buf, c);
590
591#ifdef EBCDIC
592 /* For EBCDIC only the characters 0-63 and 255 are not printable */
593 else if (CtrlChar(c) != 0 || c == DEL)
594#else
595 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
596#endif
597 {
598 buf[0] = '^';
599#ifdef EBCDIC
600 if (c == DEL)
601 buf[1] = '?'; /* DEL displayed as ^? */
602 else
603 buf[1] = CtrlChar(c);
604#else
605 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
606#endif
607
608 buf[2] = NUL;
609 }
610#ifdef FEAT_MBYTE
611 else if (enc_utf8 && c >= 0x80)
612 {
613 transchar_hex(buf, c);
614 }
615#endif
616#ifndef EBCDIC
617 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
618 {
619 buf[0] = '|';
620 buf[1] = c - 0x80;
621 buf[2] = NUL;
622 }
623#else
624 else if (c < 64)
625 {
626 buf[0] = '~';
627 buf[1] = MetaChar(c);
628 buf[2] = NUL;
629 }
630#endif
631 else /* 0x80 - 0x9f and 0xff */
632 {
633 /*
634 * TODO: EBCDIC I don't know what to do with this chars, so I display
635 * them as '~?' for now
636 */
637 buf[0] = '~';
638#ifdef EBCDIC
639 buf[1] = '?'; /* 0xff displayed as ~? */
640#else
641 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
642#endif
643 buf[2] = NUL;
644 }
645}
646
647 void
648transchar_hex(buf, c)
649 char_u *buf;
650 int c;
651{
652 int i = 0;
653
654 buf[0] = '<';
655#ifdef FEAT_MBYTE
656 if (c > 255)
657 {
658 buf[++i] = nr2hex((unsigned)c >> 12);
659 buf[++i] = nr2hex((unsigned)c >> 8);
660 }
661#endif
662 buf[++i] = nr2hex((unsigned)c >> 4);
663 buf[++i] = nr2hex(c);
664 buf[++i] = '>';
665 buf[++i] = NUL;
666}
667
668/*
669 * Convert the lower 4 bits of byte "c" to its hex character.
670 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
671 * function key 1.
672 */
673 static int
674nr2hex(c)
675 int c;
676{
677 if ((c & 0xf) <= 9)
678 return (c & 0xf) + '0';
679 return (c & 0xf) - 10 + 'a';
680}
681
682/*
683 * Return number of display cells occupied by byte "b".
684 * Caller must make sure 0 <= b <= 255.
685 * For multi-byte mode "b" must be the first byte of a character.
686 * A TAB is counted as two cells: "^I".
687 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
688 * cells depends on further bytes.
689 */
690 int
691byte2cells(b)
692 int b;
693{
694#ifdef FEAT_MBYTE
695 if (enc_utf8 && b >= 0x80)
696 return 0;
697#endif
698 return (chartab[b] & CT_CELL_MASK);
699}
700
701/*
702 * Return number of display cells occupied by character "c".
703 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
704 * A TAB is counted as two cells: "^I" or four: "<09>".
705 */
706 int
707char2cells(c)
708 int c;
709{
710 if (IS_SPECIAL(c))
711 return char2cells(K_SECOND(c)) + 2;
712#ifdef FEAT_MBYTE
713 if (c >= 0x80)
714 {
715 /* UTF-8: above 0x80 need to check the value */
716 if (enc_utf8)
717 return utf_char2cells(c);
718 /* DBCS: double-byte means double-width, except for euc-jp with first
719 * byte 0x8e */
720 if (enc_dbcs != 0 && c >= 0x100)
721 {
722 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
723 return 1;
724 return 2;
725 }
726 }
727#endif
728 return (chartab[c & 0xff] & CT_CELL_MASK);
729}
730
731/*
732 * Return number of display cells occupied by character at "*p".
733 * A TAB is counted as two cells: "^I" or four: "<09>".
734 */
735 int
736ptr2cells(p)
737 char_u *p;
738{
739#ifdef FEAT_MBYTE
740 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
741 if (enc_utf8 && *p >= 0x80)
742 return utf_ptr2cells(p);
743 /* For DBCS we can tell the cell count from the first byte. */
744#endif
745 return (chartab[*p] & CT_CELL_MASK);
746}
747
748/*
749 * Return the number of characters string "s" will take on the screen,
750 * counting TABs as two characters: "^I".
751 */
752 int
753vim_strsize(s)
754 char_u *s;
755{
756 return vim_strnsize(s, (int)MAXCOL);
757}
758
759/*
760 * Return the number of characters string "s[len]" will take on the screen,
761 * counting TABs as two characters: "^I".
762 */
763 int
764vim_strnsize(s, len)
765 char_u *s;
766 int len;
767{
768 int size = 0;
769
770 while (*s != NUL && --len >= 0)
771 {
772#ifdef FEAT_MBYTE
773 if (has_mbyte)
774 {
775 int l = (*mb_ptr2len_check)(s);
776
777 size += ptr2cells(s);
778 s += l;
779 len -= l - 1;
780 }
781 else
782#endif
783 size += byte2cells(*s++);
784 }
785 return size;
786}
787
788/*
789 * Return the number of characters 'c' will take on the screen, taking
790 * into account the size of a tab.
791 * Use a define to make it fast, this is used very often!!!
792 * Also see getvcol() below.
793 */
794
795#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
796 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
797 { \
798 int ts; \
799 ts = (buf)->b_p_ts; \
800 return (int)(ts - (col % ts)); \
801 } \
802 else \
803 return ptr2cells(p);
804
805#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
806 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
807 int
808chartabsize(p, col)
809 char_u *p;
810 colnr_T col;
811{
812 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
813}
814#endif
815
816#ifdef FEAT_LINEBREAK
817 static int
818win_chartabsize(wp, p, col)
819 win_T *wp;
820 char_u *p;
821 colnr_T col;
822{
823 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
824}
825#endif
826
827/*
828 * return the number of characters the string 's' will take on the screen,
829 * taking into account the size of a tab
830 */
831 int
832linetabsize(s)
833 char_u *s;
834{
835 colnr_T col = 0;
836
837 while (*s != NUL)
838 col += lbr_chartabsize_adv(&s, col);
839 return (int)col;
840}
841
842/*
843 * Like linetabsize(), but for a given window instead of the current one.
844 */
845 int
846win_linetabsize(wp, p, len)
847 win_T *wp;
848 char_u *p;
849 colnr_T len;
850{
851 colnr_T col = 0;
852 char_u *s;
853
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000854 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000856 return (int)col;
857}
858
859/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000860 * Return TRUE if 'c' is a normal identifier character:
861 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000862 */
863 int
864vim_isIDc(c)
865 int c;
866{
867 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
868}
869
870/*
871 * return TRUE if 'c' is a keyword character: Letters and characters from
872 * 'iskeyword' option for current buffer.
873 * For multi-byte characters mb_get_class() is used (builtin rules).
874 */
875 int
876vim_iswordc(c)
877 int c;
878{
879#ifdef FEAT_MBYTE
880 if (c >= 0x100)
881 {
882 if (enc_dbcs != 0)
883 return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
884 if (enc_utf8)
885 return utf_class(c) >= 2;
886 }
887#endif
888 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
889}
890
891/*
892 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
893 */
894 int
895vim_iswordp(p)
896 char_u *p;
897{
898#ifdef FEAT_MBYTE
899 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
900 return mb_get_class(p) >= 2;
901#endif
902 return GET_CHARTAB(curbuf, *p) != 0;
903}
904
905#if defined(FEAT_SYN_HL) || defined(PROTO)
906 int
907vim_iswordc_buf(p, buf)
908 char_u *p;
909 buf_T *buf;
910{
911# ifdef FEAT_MBYTE
912 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
913 return mb_get_class(p) >= 2;
914# endif
915 return (GET_CHARTAB(buf, *p) != 0);
916}
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000917
918static char spell_chartab[256];
919
920/*
921 * Init the chartab used for spelling. Only depends on 'encoding'.
922 * Called once while starting up and when 'encoding' was changed.
923 * Unfortunately, we can't use isalpha() here, since the current locale may
924 * differ from 'encoding'.
925 */
926 void
927init_spell_chartab()
928{
929 int i;
930
931 /* ASCII is always the same, no matter what 'encoding' is used.
932 * EBCDIC is not supported! */
933 for (i = 0; i < '0'; ++i)
934 spell_chartab[i] = FALSE;
935 /* We include numbers. A word shouldn't start with a number, but handling
936 * that is done separately. */
937 for ( ; i <= '9'; ++i)
938 spell_chartab[i] = TRUE;
939 for ( ; i < 'A'; ++i)
940 spell_chartab[i] = FALSE;
941 for ( ; i <= 'Z'; ++i)
942 spell_chartab[i] = TRUE;
943 for ( ; i < 'a'; ++i)
944 spell_chartab[i] = FALSE;
945 for ( ; i <= 'z'; ++i)
946 spell_chartab[i] = TRUE;
947#ifdef FEAT_MBYTE
948 if (enc_dbcs)
949 {
950 /* DBCS: assume double-wide characters are word characters. */
951 for ( ; i <= 255; ++i)
952 if (MB_BYTE2LEN(i) == 2)
953 spell_chartab[i] = TRUE;
954 else
955 spell_chartab[i] = FALSE;
956 }
957 else if (STRCMP(p_enc, "cp850") == 0)
958#endif
959#if defined(MSDOS) || defined(FEAT_MBYTE)
960 {
961 /* cp850, MS-DOS */
962 for ( ; i < 128; ++i)
963 spell_chartab[i] = FALSE;
964 for ( ; i <= 0x9a; ++i)
965 spell_chartab[i] = TRUE;
966 for ( ; i < 0xa0; ++i)
967 spell_chartab[i] = FALSE;
968 for ( ; i <= 0xa5; ++i)
969 spell_chartab[i] = TRUE;
970 for ( ; i <= 255; ++i)
971 spell_chartab[i] = FALSE;
972 }
973#endif
974#ifdef FEAT_MBYTE
975 else
976#endif
977#if defined(FEAT_MBYTE) || !defined(MSDOS)
978 {
979 /* Rough guess: anything we don't recognize assumes word characters
980 * like latin1. */
981 for ( ; i < 0xc0; ++i)
982 spell_chartab[i] = FALSE;
983 for ( ; i <= 255; ++i)
984 spell_chartab[i] = TRUE;
985# ifdef FEAT_MBYTE
986 if (STRCMP(p_enc, "latin1") == 0)
987# endif
988 spell_chartab[0xf7] = FALSE; /* divide-by */
989 }
990#endif
991}
992
993/*
994 * Return TRUE if "p" points to a word character.
995 * This only depends on 'encoding', not on 'iskeyword'.
996 */
997 int
998spell_iswordc(p)
999 char_u *p;
1000{
1001# ifdef FEAT_MBYTE
1002 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
1003 return mb_get_class(p) >= 2;
1004# endif
1005 return spell_chartab[*p];
1006}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001007#endif
1008
1009/*
1010 * return TRUE if 'c' is a valid file-name character
1011 * Assume characters above 0x100 are valid (multi-byte).
1012 */
1013 int
1014vim_isfilec(c)
1015 int c;
1016{
1017 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
1018}
1019
1020/*
1021 * return TRUE if 'c' is a printable character
1022 * Assume characters above 0x100 are printable (multi-byte), except for
1023 * Unicode.
1024 */
1025 int
1026vim_isprintc(c)
1027 int c;
1028{
1029#ifdef FEAT_MBYTE
1030 if (enc_utf8 && c >= 0x100)
1031 return utf_printable(c);
1032#endif
1033 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1034}
1035
1036/*
1037 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
1038 * byte of a double-byte character.
1039 */
1040 int
1041vim_isprintc_strict(c)
1042 int c;
1043{
1044#ifdef FEAT_MBYTE
1045 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
1046 return FALSE;
1047 if (enc_utf8 && c >= 0x100)
1048 return utf_printable(c);
1049#endif
1050 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1051}
1052
1053/*
1054 * like chartabsize(), but also check for line breaks on the screen
1055 */
1056 int
1057lbr_chartabsize(s, col)
1058 unsigned char *s;
1059 colnr_T col;
1060{
1061#ifdef FEAT_LINEBREAK
1062 if (!curwin->w_p_lbr && *p_sbr == NUL)
1063 {
1064#endif
1065#ifdef FEAT_MBYTE
1066 if (curwin->w_p_wrap)
1067 return win_nolbr_chartabsize(curwin, s, col, NULL);
1068#endif
1069 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1070#ifdef FEAT_LINEBREAK
1071 }
1072 return win_lbr_chartabsize(curwin, s, col, NULL);
1073#endif
1074}
1075
1076/*
1077 * Call lbr_chartabsize() and advance the pointer.
1078 */
1079 int
1080lbr_chartabsize_adv(s, col)
1081 char_u **s;
1082 colnr_T col;
1083{
1084 int retval;
1085
1086 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001087 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001088 return retval;
1089}
1090
1091/*
1092 * This function is used very often, keep it fast!!!!
1093 *
1094 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1095 * string at start of line. Warning: *headp is only set if it's a non-zero
1096 * value, init to 0 before calling.
1097 */
1098/*ARGSUSED*/
1099 int
1100win_lbr_chartabsize(wp, s, col, headp)
1101 win_T *wp;
1102 char_u *s;
1103 colnr_T col;
1104 int *headp;
1105{
1106#ifdef FEAT_LINEBREAK
1107 int c;
1108 int size;
1109 colnr_T col2;
1110 colnr_T colmax;
1111 int added;
1112# ifdef FEAT_MBYTE
1113 int mb_added = 0;
1114# else
1115# define mb_added 0
1116# endif
1117 int numberextra;
1118 char_u *ps;
1119 int tab_corr = (*s == TAB);
1120
1121 /*
1122 * No 'linebreak' and 'showbreak': return quickly.
1123 */
1124 if (!wp->w_p_lbr && *p_sbr == NUL)
1125#endif
1126 {
1127#ifdef FEAT_MBYTE
1128 if (wp->w_p_wrap)
1129 return win_nolbr_chartabsize(wp, s, col, headp);
1130#endif
1131 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1132 }
1133
1134#ifdef FEAT_LINEBREAK
1135 /*
1136 * First get normal size, without 'linebreak'
1137 */
1138 size = win_chartabsize(wp, s, col);
1139 c = *s;
1140
1141 /*
1142 * If 'linebreak' set check at a blank before a non-blank if the line
1143 * needs a break here
1144 */
1145 if (wp->w_p_lbr
1146 && vim_isbreak(c)
1147 && !vim_isbreak(s[1])
1148 && !wp->w_p_list
1149 && wp->w_p_wrap
1150# ifdef FEAT_VERTSPLIT
1151 && wp->w_width != 0
1152# endif
1153 )
1154 {
1155 /*
1156 * Count all characters from first non-blank after a blank up to next
1157 * non-blank after a blank.
1158 */
1159 numberextra = win_col_off(wp);
1160 col2 = col;
1161 colmax = W_WIDTH(wp) - numberextra;
1162 if (col >= colmax)
1163 colmax += (((col - colmax)
1164 / (colmax + win_col_off2(wp))) + 1)
1165 * (colmax + win_col_off2(wp));
1166 for (;;)
1167 {
1168 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001169 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001170 c = *s;
1171 if (!(c != NUL
1172 && (vim_isbreak(c)
1173 || (!vim_isbreak(c)
1174 && (col2 == col || !vim_isbreak(*ps))))))
1175 break;
1176
1177 col2 += win_chartabsize(wp, s, col2);
1178 if (col2 >= colmax) /* doesn't fit */
1179 {
1180 size = colmax - col;
1181 tab_corr = FALSE;
1182 break;
1183 }
1184 }
1185 }
1186# ifdef FEAT_MBYTE
1187 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1188 && wp->w_p_wrap && in_win_border(wp, col))
1189 {
1190 ++size; /* Count the ">" in the last column. */
1191 mb_added = 1;
1192 }
1193# endif
1194
1195 /*
1196 * May have to add something for 'showbreak' string at start of line
1197 * Set *headp to the size of what we add.
1198 */
1199 added = 0;
1200 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1201 {
1202 numberextra = win_col_off(wp);
1203 col += numberextra + mb_added;
1204 if (col >= (colnr_T)W_WIDTH(wp))
1205 {
1206 col -= W_WIDTH(wp);
1207 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1208 if (numberextra > 0)
1209 col = col % numberextra;
1210 }
1211 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1212 {
1213 added = vim_strsize(p_sbr);
1214 if (tab_corr)
1215 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1216 else
1217 size += added;
1218 if (col != 0)
1219 added = 0;
1220 }
1221 }
1222 if (headp != NULL)
1223 *headp = added + mb_added;
1224 return size;
1225#endif
1226}
1227
1228#if defined(FEAT_MBYTE) || defined(PROTO)
1229/*
1230 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1231 * 'wrap' is on. This means we need to check for a double-byte character that
1232 * doesn't fit at the end of the screen line.
1233 */
1234 static int
1235win_nolbr_chartabsize(wp, s, col, headp)
1236 win_T *wp;
1237 char_u *s;
1238 colnr_T col;
1239 int *headp;
1240{
1241 int n;
1242
1243 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1244 {
1245 n = wp->w_buffer->b_p_ts;
1246 return (int)(n - (col % n));
1247 }
1248 n = ptr2cells(s);
1249 /* Add one cell for a double-width character in the last column of the
1250 * window, displayed with a ">". */
1251 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1252 {
1253 if (headp != NULL)
1254 *headp = 1;
1255 return 3;
1256 }
1257 return n;
1258}
1259
1260/*
1261 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1262 * "wp".
1263 */
1264 int
1265in_win_border(wp, vcol)
1266 win_T *wp;
1267 colnr_T vcol;
1268{
1269 colnr_T width1; /* width of first line (after line number) */
1270 colnr_T width2; /* width of further lines */
1271
1272#ifdef FEAT_VERTSPLIT
1273 if (wp->w_width == 0) /* there is no border */
1274 return FALSE;
1275#endif
1276 width1 = W_WIDTH(wp) - win_col_off(wp);
1277 if (vcol < width1 - 1)
1278 return FALSE;
1279 if (vcol == width1 - 1)
1280 return TRUE;
1281 width2 = width1 + win_col_off2(wp);
1282 return ((vcol - width1) % width2 == width2 - 1);
1283}
1284#endif /* FEAT_MBYTE */
1285
1286/*
1287 * Get virtual column number of pos.
1288 * start: on the first position of this character (TAB, ctrl)
1289 * cursor: where the cursor is on this character (first char, except for TAB)
1290 * end: on the last position of this character (TAB, ctrl)
1291 *
1292 * This is used very often, keep it fast!
1293 */
1294 void
1295getvcol(wp, pos, start, cursor, end)
1296 win_T *wp;
1297 pos_T *pos;
1298 colnr_T *start;
1299 colnr_T *cursor;
1300 colnr_T *end;
1301{
1302 colnr_T vcol;
1303 char_u *ptr; /* points to current char */
1304 char_u *posptr; /* points to char at pos->col */
1305 int incr;
1306 int head;
1307 int ts = wp->w_buffer->b_p_ts;
1308 int c;
1309
1310 vcol = 0;
1311 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1312 posptr = ptr + pos->col;
1313
1314 /*
1315 * This function is used very often, do some speed optimizations.
1316 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1317 * Also use this when 'list' is set but tabs take their normal size.
1318 */
1319 if ((!wp->w_p_list || lcs_tab1 != NUL)
1320#ifdef FEAT_LINEBREAK
1321 && !wp->w_p_lbr && *p_sbr == NUL
1322#endif
1323 )
1324 {
1325#ifndef FEAT_MBYTE
1326 head = 0;
1327#endif
1328 for (;;)
1329 {
1330#ifdef FEAT_MBYTE
1331 head = 0;
1332#endif
1333 c = *ptr;
1334 /* make sure we don't go past the end of the line */
1335 if (c == NUL)
1336 {
1337 incr = 1; /* NUL at end of line only takes one column */
1338 break;
1339 }
1340 /* A tab gets expanded, depending on the current column */
1341 if (c == TAB)
1342 incr = ts - (vcol % ts);
1343 else
1344 {
1345#ifdef FEAT_MBYTE
1346 if (has_mbyte)
1347 {
1348 /* For utf-8, if the byte is >= 0x80, need to look at
1349 * further bytes to find the cell width. */
1350 if (enc_utf8 && c >= 0x80)
1351 incr = utf_ptr2cells(ptr);
1352 else
1353 incr = CHARSIZE(c);
1354
1355 /* If a double-cell char doesn't fit at the end of a line
1356 * it wraps to the next line, it's like this char is three
1357 * cells wide. */
1358 if (incr == 2 && wp->w_p_wrap && in_win_border(wp, vcol))
1359 {
1360 ++incr;
1361 head = 1;
1362 }
1363 }
1364 else
1365#endif
1366 incr = CHARSIZE(c);
1367 }
1368
1369 if (ptr >= posptr) /* character at pos->col */
1370 break;
1371
1372 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001373 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374 }
1375 }
1376 else
1377 {
1378 for (;;)
1379 {
1380 /* A tab gets expanded, depending on the current column */
1381 head = 0;
1382 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1383 /* make sure we don't go past the end of the line */
1384 if (*ptr == NUL)
1385 {
1386 incr = 1; /* NUL at end of line only takes one column */
1387 break;
1388 }
1389
1390 if (ptr >= posptr) /* character at pos->col */
1391 break;
1392
1393 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001394 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001395 }
1396 }
1397 if (start != NULL)
1398 *start = vcol + head;
1399 if (end != NULL)
1400 *end = vcol + incr - 1;
1401 if (cursor != NULL)
1402 {
1403 if (*ptr == TAB
1404 && (State & NORMAL)
1405 && !wp->w_p_list
1406 && !virtual_active()
1407#ifdef FEAT_VISUAL
1408 && !(VIsual_active
1409 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1410#endif
1411 )
1412 *cursor = vcol + incr - 1; /* cursor at end */
1413 else
1414 *cursor = vcol + head; /* cursor at start */
1415 }
1416}
1417
1418/*
1419 * Get virtual cursor column in the current window, pretending 'list' is off.
1420 */
1421 colnr_T
1422getvcol_nolist(posp)
1423 pos_T *posp;
1424{
1425 int list_save = curwin->w_p_list;
1426 colnr_T vcol;
1427
1428 curwin->w_p_list = FALSE;
1429 getvcol(curwin, posp, NULL, &vcol, NULL);
1430 curwin->w_p_list = list_save;
1431 return vcol;
1432}
1433
1434#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1435/*
1436 * Get virtual column in virtual mode.
1437 */
1438 void
1439getvvcol(wp, pos, start, cursor, end)
1440 win_T *wp;
1441 pos_T *pos;
1442 colnr_T *start;
1443 colnr_T *cursor;
1444 colnr_T *end;
1445{
1446 colnr_T col;
1447 colnr_T coladd;
1448 colnr_T endadd;
1449# ifdef FEAT_MBYTE
1450 char_u *ptr;
1451# endif
1452
1453 if (virtual_active())
1454 {
1455 /* For virtual mode, only want one value */
1456 getvcol(wp, pos, &col, NULL, NULL);
1457
1458 coladd = pos->coladd;
1459 endadd = 0;
1460# ifdef FEAT_MBYTE
1461 /* Cannot put the cursor on part of a wide character. */
1462 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1463 if (pos->col < STRLEN(ptr))
1464 {
1465 int c = (*mb_ptr2char)(ptr + pos->col);
1466
1467 if (c != TAB && vim_isprintc(c))
1468 {
1469 endadd = char2cells(c) - 1;
1470 if (coladd >= endadd)
1471 coladd -= endadd;
1472 else
1473 coladd = 0;
1474 }
1475 }
1476# endif
1477 col += coladd;
1478 if (start != NULL)
1479 *start = col;
1480 if (cursor != NULL)
1481 *cursor = col;
1482 if (end != NULL)
1483 *end = col + endadd;
1484 }
1485 else
1486 getvcol(wp, pos, start, cursor, end);
1487}
1488#endif
1489
1490#if defined(FEAT_VISUAL) || defined(PROTO)
1491/*
1492 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1493 * Used for Visual block mode.
1494 */
1495 void
1496getvcols(wp, pos1, pos2, left, right)
1497 win_T *wp;
1498 pos_T *pos1, *pos2;
1499 colnr_T *left, *right;
1500{
1501 colnr_T from1, from2, to1, to2;
1502
1503 if (ltp(pos1, pos2))
1504 {
1505 getvvcol(wp, pos1, &from1, NULL, &to1);
1506 getvvcol(wp, pos2, &from2, NULL, &to2);
1507 }
1508 else
1509 {
1510 getvvcol(wp, pos2, &from1, NULL, &to1);
1511 getvvcol(wp, pos1, &from2, NULL, &to2);
1512 }
1513 if (from2 < from1)
1514 *left = from2;
1515 else
1516 *left = from1;
1517 if (to2 > to1)
1518 {
1519 if (*p_sel == 'e' && from2 - 1 >= to1)
1520 *right = from2 - 1;
1521 else
1522 *right = to2;
1523 }
1524 else
1525 *right = to1;
1526}
1527#endif
1528
1529/*
1530 * skipwhite: skip over ' ' and '\t'.
1531 */
1532 char_u *
1533skipwhite(p)
1534 char_u *p;
1535{
1536 while (vim_iswhite(*p)) /* skip to next non-white */
1537 ++p;
1538 return p;
1539}
1540
1541/*
1542 * skipdigits: skip over digits;
1543 */
1544 char_u *
1545skipdigits(p)
1546 char_u *p;
1547{
1548 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1549 ++p;
1550 return p;
1551}
1552
1553/*
1554 * Variant of isdigit() that can handle characters > 0x100.
1555 * We don't use isdigit() here, because on some systems it also considers
1556 * superscript 1 to be a digit.
1557 * Use the VIM_ISDIGIT() macro for simple arguments.
1558 */
1559 int
1560vim_isdigit(c)
1561 int c;
1562{
1563 return (c >= '0' && c <= '9');
1564}
1565
1566/*
1567 * Variant of isxdigit() that can handle characters > 0x100.
1568 * We don't use isxdigit() here, because on some systems it also considers
1569 * superscript 1 to be a digit.
1570 */
1571 int
1572vim_isxdigit(c)
1573 int c;
1574{
1575 return (c >= '0' && c <= '9')
1576 || (c >= 'a' && c <= 'f')
1577 || (c >= 'A' && c <= 'F');
1578}
1579
1580/*
1581 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1582 */
1583 char_u *
1584skiptowhite(p)
1585 char_u *p;
1586{
1587 while (*p != ' ' && *p != '\t' && *p != NUL)
1588 ++p;
1589 return p;
1590}
1591
1592#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1593 || defined(PROTO)
1594/*
1595 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1596 */
1597 char_u *
1598skiptowhite_esc(p)
1599 char_u *p;
1600{
1601 while (*p != ' ' && *p != '\t' && *p != NUL)
1602 {
1603 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1604 ++p;
1605 ++p;
1606 }
1607 return p;
1608}
1609#endif
1610
1611/*
1612 * Getdigits: Get a number from a string and skip over it.
1613 * Note: the argument is a pointer to a char_u pointer!
1614 */
1615 long
1616getdigits(pp)
1617 char_u **pp;
1618{
1619 char_u *p;
1620 long retval;
1621
1622 p = *pp;
1623 retval = atol((char *)p);
1624 if (*p == '-') /* skip negative sign */
1625 ++p;
1626 p = skipdigits(p); /* skip to next non-digit */
1627 *pp = p;
1628 return retval;
1629}
1630
1631/*
1632 * Return TRUE if "lbuf" is empty or only contains blanks.
1633 */
1634 int
1635vim_isblankline(lbuf)
1636 char_u *lbuf;
1637{
1638 char_u *p;
1639
1640 p = skipwhite(lbuf);
1641 return (*p == NUL || *p == '\r' || *p == '\n');
1642}
1643
1644/*
1645 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001646 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001647 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1648 * 0 decimal
1649 * '0' octal
1650 * 'X' hex
1651 * 'x' hex
1652 * If "len" is not NULL, the length of the number in characters is returned.
1653 * If "nptr" is not NULL, the signed result is returned in it.
1654 * If "unptr" is not NULL, the unsigned result is returned in it.
1655 */
1656 void
1657vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1658 char_u *start;
1659 int *hexp; /* return: type of number 0 = decimal, 'x'
1660 or 'X' is hex, '0' = octal */
1661 int *len; /* return: detected length of number */
1662 int dooct; /* recognize octal number */
1663 int dohex; /* recognize hex number */
1664 long *nptr; /* return: signed result */
1665 unsigned long *unptr; /* return: unsigned result */
1666{
1667 char_u *ptr = start;
1668 int hex = 0; /* default is decimal */
1669 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001670 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001671 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001672
1673 if (ptr[0] == '-')
1674 {
1675 negative = TRUE;
1676 ++ptr;
1677 }
1678
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001679 /* Recognize hex and octal. */
1680 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001681 {
1682 hex = ptr[1];
1683 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1684 ptr += 2; /* hexadecimal */
1685 else
1686 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001687 hex = 0; /* default is decimal */
1688 if (dooct)
1689 {
1690 /* Don't interpret "0", "08" or "0129" as octal. */
1691 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1692 {
1693 if (ptr[n] > '7')
1694 {
1695 hex = 0; /* can't be octal */
1696 break;
1697 }
1698 if (ptr[n] > '0')
1699 hex = '0'; /* assume octal */
1700 }
1701 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001702 }
1703 }
1704
1705 /*
1706 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1707 */
1708 if (hex)
1709 {
1710 if (hex == '0')
1711 {
1712 /* octal */
1713 while ('0' <= *ptr && *ptr <= '7')
1714 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001715 un = 8 * un + (unsigned long)(*ptr - '0');
1716 ++ptr;
1717 }
1718 }
1719 else
1720 {
1721 /* hex */
1722 while (vim_isxdigit(*ptr))
1723 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001724 un = 16 * un + (unsigned long)hex2nr(*ptr);
1725 ++ptr;
1726 }
1727 }
1728 }
1729 else
1730 {
1731 /* decimal */
1732 while (VIM_ISDIGIT(*ptr))
1733 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001734 un = 10 * un + (unsigned long)(*ptr - '0');
1735 ++ptr;
1736 }
1737 }
1738
Bram Moolenaar071d4272004-06-13 20:20:40 +00001739 if (hexp != NULL)
1740 *hexp = hex;
1741 if (len != NULL)
1742 *len = (int)(ptr - start);
1743 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001744 {
1745 if (negative) /* account for leading '-' for decimal numbers */
1746 *nptr = -(long)un;
1747 else
1748 *nptr = (long)un;
1749 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001750 if (unptr != NULL)
1751 *unptr = un;
1752}
1753
1754/*
1755 * Return the value of a single hex character.
1756 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1757 */
1758 int
1759hex2nr(c)
1760 int c;
1761{
1762 if (c >= 'a' && c <= 'f')
1763 return c - 'a' + 10;
1764 if (c >= 'A' && c <= 'F')
1765 return c - 'A' + 10;
1766 return c - '0';
1767}
1768
1769#if defined(FEAT_TERMRESPONSE) \
1770 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1771/*
1772 * Convert two hex characters to a byte.
1773 * Return -1 if one of the characters is not hex.
1774 */
1775 int
1776hexhex2nr(p)
1777 char_u *p;
1778{
1779 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1780 return -1;
1781 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1782}
1783#endif
1784
1785/*
1786 * Return TRUE if "str" starts with a backslash that should be removed.
1787 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1788 * backslash is not a normal file name character.
1789 * '$' is a valid file name character, we don't remove the backslash before
1790 * it. This means it is not possible to use an environment variable after a
1791 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1792 * Although "\ name" is valid, the backslash in "Program\ files" must be
1793 * removed. Assume a file name doesn't start with a space.
1794 * For multi-byte names, never remove a backslash before a non-ascii
1795 * character, assume that all multi-byte characters are valid file name
1796 * characters.
1797 */
1798 int
1799rem_backslash(str)
1800 char_u *str;
1801{
1802#ifdef BACKSLASH_IN_FILENAME
1803 return (str[0] == '\\'
1804# ifdef FEAT_MBYTE
1805 && str[1] < 0x80
1806# endif
1807 && (str[1] == ' '
1808 || (str[1] != NUL
1809 && str[1] != '*'
1810 && str[1] != '?'
1811 && !vim_isfilec(str[1]))));
1812#else
1813 return (str[0] == '\\' && str[1] != NUL);
1814#endif
1815}
1816
1817/*
1818 * Halve the number of backslashes in a file name argument.
1819 * For MS-DOS we only do this if the character after the backslash
1820 * is not a normal file character.
1821 */
1822 void
1823backslash_halve(p)
1824 char_u *p;
1825{
1826 for ( ; *p; ++p)
1827 if (rem_backslash(p))
1828 STRCPY(p, p + 1);
1829}
1830
1831/*
1832 * backslash_halve() plus save the result in allocated memory.
1833 */
1834 char_u *
1835backslash_halve_save(p)
1836 char_u *p;
1837{
1838 char_u *res;
1839
1840 res = vim_strsave(p);
1841 if (res == NULL)
1842 return p;
1843 backslash_halve(res);
1844 return res;
1845}
1846
1847#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1848/*
1849 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1850 * The first 64 entries have been added to map control characters defined in
1851 * ascii.h
1852 */
1853static char_u ebcdic2ascii_tab[256] =
1854{
1855 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1856 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1857 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1858 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1859 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1860 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1861 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1862 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1863 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1864 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1865 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1866 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1867 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1868 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1869 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1870 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1871 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1872 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1873 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1874 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1875 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1876 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1877 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1878 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1879 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1880 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1881 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1882 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1883 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1884 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1885 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1886 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1887};
1888
1889/*
1890 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1891 * wanting 7-bit ASCII characters out the other end.
1892 */
1893 void
1894ebcdic2ascii(buffer, len)
1895 char_u *buffer;
1896 int len;
1897{
1898 int i;
1899
1900 for (i = 0; i < len; i++)
1901 buffer[i] = ebcdic2ascii_tab[buffer[i]];
1902}
1903#endif