blob: 0c9c51136f4ffc53748b56d168a40f5c2080cb42 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
17static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18#endif
19
20static int nr2hex __ARGS((int c));
21
22static int chartab_initialized = FALSE;
23
24/* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
29
30/*
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
33 *
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
36 *
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
43 *
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
52 *
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
55 */
56 int
57init_chartab()
58{
59 return buf_init_chartab(curbuf, TRUE);
60}
61
62 int
63buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
66{
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
73
74 if (global)
75 {
76 /*
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
80 *
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
83 */
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87#ifdef EBCDIC
88 while (c < 255)
89#else
90 while (c <= '~')
91#endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93#ifdef FEAT_FKMAP
94 if (p_altkeymap)
95 {
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
98 }
99#endif
100 while (c < 256)
101 {
102#ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113#endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
116 }
117
118#ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125#endif
126 }
127
128 /*
129 * Init word char flags all to FALSE
130 */
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
135 {
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
139 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140#endif
141
142#ifdef FEAT_LISP
143 /*
144 * In lisp mode the '-' character is included in keywords.
145 */
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148#endif
149
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
153 */
154 for (i = global ? 0 : 3; i <= 3; ++i)
155 {
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
164
165 while (*p)
166 {
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
170 {
171 tilde = TRUE;
172 ++p;
173 }
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
185 c2 = *p++;
186 }
187 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
191 if (c2 == -1) /* not a range */
192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000210 /* Use the MB_ functions here, because isalpha() doesn't
211 * work properly when 'encoding' is "latin1" and the locale is
212 * "C". */
213 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000214#ifdef FEAT_FKMAP
215 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
216#endif
217 )
218 {
219 if (i == 0) /* (re)set ID flag */
220 {
221 if (tilde)
222 chartab[c] &= ~CT_ID_CHAR;
223 else
224 chartab[c] |= CT_ID_CHAR;
225 }
226 else if (i == 1) /* (re)set printable */
227 {
228 if ((c < ' '
229#ifndef EBCDIC
230 || c > '~'
231#endif
232#ifdef FEAT_FKMAP
233 || (p_altkeymap
234 && (F_isalpha(c) || F_isdigit(c)))
235#endif
236 )
237#ifdef FEAT_MBYTE
238 /* For double-byte we keep the cell width, so
239 * that we can detect it from the first byte. */
240 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
241#endif
242 )
243 {
244 if (tilde)
245 {
246 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
247 + ((dy_flags & DY_UHEX) ? 4 : 2);
248 chartab[c] &= ~CT_PRINT_CHAR;
249 }
250 else
251 {
252 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
253 chartab[c] |= CT_PRINT_CHAR;
254 }
255 }
256 }
257 else if (i == 2) /* (re)set fname flag */
258 {
259 if (tilde)
260 chartab[c] &= ~CT_FNAME_CHAR;
261 else
262 chartab[c] |= CT_FNAME_CHAR;
263 }
264 else /* i == 3 */ /* (re)set keyword flag */
265 {
266 if (tilde)
267 RESET_CHARTAB(buf, c);
268 else
269 SET_CHARTAB(buf, c);
270 }
271 }
272 ++c;
273 }
274 p = skip_to_option_part(p);
275 }
276 }
277 chartab_initialized = TRUE;
278 return OK;
279}
280
281/*
282 * Translate any special characters in buf[bufsize] in-place.
283 * The result is a string with only printable characters, but if there is not
284 * enough room, not all characters will be translated.
285 */
286 void
287trans_characters(buf, bufsize)
288 char_u *buf;
289 int bufsize;
290{
291 int len; /* length of string needing translation */
292 int room; /* room in buffer after string */
293 char_u *trs; /* translated character */
294 int trs_len; /* length of trs[] */
295
296 len = (int)STRLEN(buf);
297 room = bufsize - len;
298 while (*buf != 0)
299 {
300# ifdef FEAT_MBYTE
301 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000302 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000303 len -= trs_len;
304 else
305# endif
306 {
307 trs = transchar_byte(*buf);
308 trs_len = (int)STRLEN(trs);
309 if (trs_len > 1)
310 {
311 room -= trs_len - 1;
312 if (room <= 0)
313 return;
314 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
315 }
316 mch_memmove(buf, trs, (size_t)trs_len);
317 --len;
318 }
319 buf += trs_len;
320 }
321}
322
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000323#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
324 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325/*
326 * Translate a string into allocated memory, replacing special chars with
327 * printable chars. Returns NULL when out of memory.
328 */
329 char_u *
330transstr(s)
331 char_u *s;
332{
333 char_u *res;
334 char_u *p;
335#ifdef FEAT_MBYTE
336 int l, len, c;
337 char_u hexbuf[11];
338#endif
339
340#ifdef FEAT_MBYTE
341 if (has_mbyte)
342 {
343 /* Compute the length of the result, taking account of unprintable
344 * multi-byte characters. */
345 len = 0;
346 p = s;
347 while (*p != NUL)
348 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000349 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350 {
351 c = (*mb_ptr2char)(p);
352 p += l;
353 if (vim_isprintc(c))
354 len += l;
355 else
356 {
357 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000358 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 }
360 }
361 else
362 {
363 l = byte2cells(*p++);
364 if (l > 0)
365 len += l;
366 else
367 len += 4; /* illegal byte sequence */
368 }
369 }
370 res = alloc((unsigned)(len + 1));
371 }
372 else
373#endif
374 res = alloc((unsigned)(vim_strsize(s) + 1));
375 if (res != NULL)
376 {
377 *res = NUL;
378 p = s;
379 while (*p != NUL)
380 {
381#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000382 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000383 {
384 c = (*mb_ptr2char)(p);
385 if (vim_isprintc(c))
386 STRNCAT(res, p, l); /* append printable multi-byte char */
387 else
388 transchar_hex(res + STRLEN(res), c);
389 p += l;
390 }
391 else
392#endif
393 STRCAT(res, transchar_byte(*p++));
394 }
395 }
396 return res;
397}
398#endif
399
400#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
401/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000402 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
403 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
405 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000406 */
407 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000408str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000409 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000410 int orglen;
411 char_u *buf;
412 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413{
414 garray_T ga;
415 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417
418#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
419#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000420#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
421#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000423 /* Copy "str" into "buf" or allocated memory, unmodified. */
424 if (buf == NULL)
425 {
426 ga_init2(&ga, 1, 10);
427 if (ga_grow(&ga, len + 1) == FAIL)
428 return NULL;
429 mch_memmove(ga.ga_data, str, (size_t)len);
430 ga.ga_len = len;
431 }
432 else
433 {
434 if (len >= buflen) /* Ugly! */
435 len = buflen - 1;
436 mch_memmove(buf, str, (size_t)len);
437 }
438 if (buf == NULL)
439 GA_CHAR(len) = NUL;
440 else
441 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
443 /* Make each character lower case. */
444 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000445 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 {
447#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000448 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000449 {
450 if (enc_utf8)
451 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000452 int c = utf_ptr2char(STR_PTR(i));
453 int ol = utf_ptr2len(STR_PTR(i));
454 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455
Bram Moolenaarb9839212008-06-28 11:03:50 +0000456 /* Only replace the character when it is not an invalid
457 * sequence (ASCII character or more than one byte) and
458 * utf_tolower() doesn't return the original character. */
459 if ((c < 0x80 || ol > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000460 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461 int nl = utf_char2len(lc);
462
463 /* If the byte length changes need to shift the following
464 * characters forward or backward. */
465 if (ol != nl)
466 {
467 if (nl > ol)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000468 {
469 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
470 : len + nl - ol >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 {
472 /* out of memory, keep old char */
473 lc = c;
474 nl = ol;
475 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000476 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000477 if (ol != nl)
478 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000479 if (buf == NULL)
480 {
Bram Moolenaar446cb832008-06-24 21:56:24 +0000481 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000482 ga.ga_len += nl - ol;
483 }
484 else
485 {
Bram Moolenaar446cb832008-06-24 21:56:24 +0000486 STRMOVE(buf + i + nl, buf + i + ol);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000487 len += nl - ol;
488 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000489 }
490 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000491 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492 }
493 }
494 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000495 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 }
497 else
498#endif
499 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000500 if (buf == NULL)
501 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
502 else
503 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 ++i;
505 }
506 }
507
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000508 if (buf == NULL)
509 return (char_u *)ga.ga_data;
510 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511}
512#endif
513
514/*
515 * Catch 22: chartab[] can't be initialized before the options are
516 * initialized, and initializing options may cause transchar() to be called!
517 * When chartab_initialized == FALSE don't use chartab[].
518 * Does NOT work for multi-byte characters, c must be <= 255.
519 * Also doesn't work for the first byte of a multi-byte, "c" must be a
520 * character!
521 */
522static char_u transchar_buf[7];
523
524 char_u *
525transchar(c)
526 int c;
527{
528 int i;
529
530 i = 0;
531 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
532 {
533 transchar_buf[0] = '~';
534 transchar_buf[1] = '@';
535 i = 2;
536 c = K_SECOND(c);
537 }
538
539 if ((!chartab_initialized && (
540#ifdef EBCDIC
541 (c >= 64 && c < 255)
542#else
543 (c >= ' ' && c <= '~')
544#endif
545#ifdef FEAT_FKMAP
546 || F_ischar(c)
547#endif
548 )) || (c < 256 && vim_isprintc_strict(c)))
549 {
550 /* printable character */
551 transchar_buf[i] = c;
552 transchar_buf[i + 1] = NUL;
553 }
554 else
555 transchar_nonprint(transchar_buf + i, c);
556 return transchar_buf;
557}
558
559#if defined(FEAT_MBYTE) || defined(PROTO)
560/*
561 * Like transchar(), but called with a byte instead of a character. Checks
562 * for an illegal UTF-8 byte.
563 */
564 char_u *
565transchar_byte(c)
566 int c;
567{
568 if (enc_utf8 && c >= 0x80)
569 {
570 transchar_nonprint(transchar_buf, c);
571 return transchar_buf;
572 }
573 return transchar(c);
574}
575#endif
576
577/*
578 * Convert non-printable character to two or more printable characters in
579 * "buf[]". "buf" needs to be able to hold five bytes.
580 * Does NOT work for multi-byte characters, c must be <= 255.
581 */
582 void
583transchar_nonprint(buf, c)
584 char_u *buf;
585 int c;
586{
587 if (c == NL)
588 c = NUL; /* we use newline in place of a NUL */
589 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
590 c = NL; /* we use CR in place of NL in this case */
591
592 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
593 transchar_hex(buf, c);
594
595#ifdef EBCDIC
596 /* For EBCDIC only the characters 0-63 and 255 are not printable */
597 else if (CtrlChar(c) != 0 || c == DEL)
598#else
599 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
600#endif
601 {
602 buf[0] = '^';
603#ifdef EBCDIC
604 if (c == DEL)
605 buf[1] = '?'; /* DEL displayed as ^? */
606 else
607 buf[1] = CtrlChar(c);
608#else
609 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
610#endif
611
612 buf[2] = NUL;
613 }
614#ifdef FEAT_MBYTE
615 else if (enc_utf8 && c >= 0x80)
616 {
617 transchar_hex(buf, c);
618 }
619#endif
620#ifndef EBCDIC
621 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
622 {
623 buf[0] = '|';
624 buf[1] = c - 0x80;
625 buf[2] = NUL;
626 }
627#else
628 else if (c < 64)
629 {
630 buf[0] = '~';
631 buf[1] = MetaChar(c);
632 buf[2] = NUL;
633 }
634#endif
635 else /* 0x80 - 0x9f and 0xff */
636 {
637 /*
638 * TODO: EBCDIC I don't know what to do with this chars, so I display
639 * them as '~?' for now
640 */
641 buf[0] = '~';
642#ifdef EBCDIC
643 buf[1] = '?'; /* 0xff displayed as ~? */
644#else
645 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
646#endif
647 buf[2] = NUL;
648 }
649}
650
651 void
652transchar_hex(buf, c)
653 char_u *buf;
654 int c;
655{
656 int i = 0;
657
658 buf[0] = '<';
659#ifdef FEAT_MBYTE
660 if (c > 255)
661 {
662 buf[++i] = nr2hex((unsigned)c >> 12);
663 buf[++i] = nr2hex((unsigned)c >> 8);
664 }
665#endif
666 buf[++i] = nr2hex((unsigned)c >> 4);
667 buf[++i] = nr2hex(c);
668 buf[++i] = '>';
669 buf[++i] = NUL;
670}
671
672/*
673 * Convert the lower 4 bits of byte "c" to its hex character.
674 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
675 * function key 1.
676 */
677 static int
678nr2hex(c)
679 int c;
680{
681 if ((c & 0xf) <= 9)
682 return (c & 0xf) + '0';
683 return (c & 0xf) - 10 + 'a';
684}
685
686/*
687 * Return number of display cells occupied by byte "b".
688 * Caller must make sure 0 <= b <= 255.
689 * For multi-byte mode "b" must be the first byte of a character.
690 * A TAB is counted as two cells: "^I".
691 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
692 * cells depends on further bytes.
693 */
694 int
695byte2cells(b)
696 int b;
697{
698#ifdef FEAT_MBYTE
699 if (enc_utf8 && b >= 0x80)
700 return 0;
701#endif
702 return (chartab[b] & CT_CELL_MASK);
703}
704
705/*
706 * Return number of display cells occupied by character "c".
707 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
708 * A TAB is counted as two cells: "^I" or four: "<09>".
709 */
710 int
711char2cells(c)
712 int c;
713{
714 if (IS_SPECIAL(c))
715 return char2cells(K_SECOND(c)) + 2;
716#ifdef FEAT_MBYTE
717 if (c >= 0x80)
718 {
719 /* UTF-8: above 0x80 need to check the value */
720 if (enc_utf8)
721 return utf_char2cells(c);
722 /* DBCS: double-byte means double-width, except for euc-jp with first
723 * byte 0x8e */
724 if (enc_dbcs != 0 && c >= 0x100)
725 {
726 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
727 return 1;
728 return 2;
729 }
730 }
731#endif
732 return (chartab[c & 0xff] & CT_CELL_MASK);
733}
734
735/*
736 * Return number of display cells occupied by character at "*p".
737 * A TAB is counted as two cells: "^I" or four: "<09>".
738 */
739 int
740ptr2cells(p)
741 char_u *p;
742{
743#ifdef FEAT_MBYTE
744 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
745 if (enc_utf8 && *p >= 0x80)
746 return utf_ptr2cells(p);
747 /* For DBCS we can tell the cell count from the first byte. */
748#endif
749 return (chartab[*p] & CT_CELL_MASK);
750}
751
752/*
753 * Return the number of characters string "s" will take on the screen,
754 * counting TABs as two characters: "^I".
755 */
756 int
757vim_strsize(s)
758 char_u *s;
759{
760 return vim_strnsize(s, (int)MAXCOL);
761}
762
763/*
764 * Return the number of characters string "s[len]" will take on the screen,
765 * counting TABs as two characters: "^I".
766 */
767 int
768vim_strnsize(s, len)
769 char_u *s;
770 int len;
771{
772 int size = 0;
773
774 while (*s != NUL && --len >= 0)
775 {
776#ifdef FEAT_MBYTE
777 if (has_mbyte)
778 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000779 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000780
781 size += ptr2cells(s);
782 s += l;
783 len -= l - 1;
784 }
785 else
786#endif
787 size += byte2cells(*s++);
788 }
789 return size;
790}
791
792/*
793 * Return the number of characters 'c' will take on the screen, taking
794 * into account the size of a tab.
795 * Use a define to make it fast, this is used very often!!!
796 * Also see getvcol() below.
797 */
798
799#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
800 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
801 { \
802 int ts; \
803 ts = (buf)->b_p_ts; \
804 return (int)(ts - (col % ts)); \
805 } \
806 else \
807 return ptr2cells(p);
808
809#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
810 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
811 int
812chartabsize(p, col)
813 char_u *p;
814 colnr_T col;
815{
816 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
817}
818#endif
819
820#ifdef FEAT_LINEBREAK
821 static int
822win_chartabsize(wp, p, col)
823 win_T *wp;
824 char_u *p;
825 colnr_T col;
826{
827 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
828}
829#endif
830
831/*
832 * return the number of characters the string 's' will take on the screen,
833 * taking into account the size of a tab
834 */
835 int
836linetabsize(s)
837 char_u *s;
838{
839 colnr_T col = 0;
840
841 while (*s != NUL)
842 col += lbr_chartabsize_adv(&s, col);
843 return (int)col;
844}
845
846/*
847 * Like linetabsize(), but for a given window instead of the current one.
848 */
849 int
850win_linetabsize(wp, p, len)
851 win_T *wp;
852 char_u *p;
853 colnr_T len;
854{
855 colnr_T col = 0;
856 char_u *s;
857
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000858 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000859 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860 return (int)col;
861}
862
863/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000864 * Return TRUE if 'c' is a normal identifier character:
865 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000866 */
867 int
868vim_isIDc(c)
869 int c;
870{
871 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
872}
873
874/*
875 * return TRUE if 'c' is a keyword character: Letters and characters from
876 * 'iskeyword' option for current buffer.
877 * For multi-byte characters mb_get_class() is used (builtin rules).
878 */
879 int
880vim_iswordc(c)
881 int c;
882{
883#ifdef FEAT_MBYTE
884 if (c >= 0x100)
885 {
886 if (enc_dbcs != 0)
887 return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
888 if (enc_utf8)
889 return utf_class(c) >= 2;
890 }
891#endif
892 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
893}
894
895/*
896 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
897 */
898 int
899vim_iswordp(p)
900 char_u *p;
901{
902#ifdef FEAT_MBYTE
903 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
904 return mb_get_class(p) >= 2;
905#endif
906 return GET_CHARTAB(curbuf, *p) != 0;
907}
908
909#if defined(FEAT_SYN_HL) || defined(PROTO)
910 int
911vim_iswordc_buf(p, buf)
912 char_u *p;
913 buf_T *buf;
914{
915# ifdef FEAT_MBYTE
916 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
917 return mb_get_class(p) >= 2;
918# endif
919 return (GET_CHARTAB(buf, *p) != 0);
920}
Bram Moolenaarc4956c82006-03-12 21:58:43 +0000921#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000922
923/*
924 * return TRUE if 'c' is a valid file-name character
925 * Assume characters above 0x100 are valid (multi-byte).
926 */
927 int
928vim_isfilec(c)
929 int c;
930{
931 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
932}
933
934/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000935 * return TRUE if 'c' is a valid file-name character or a wildcard character
936 * Assume characters above 0x100 are valid (multi-byte).
937 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
938 * returns false.
939 */
940 int
941vim_isfilec_or_wc(c)
942 int c;
943{
944 char_u buf[2];
945
946 buf[0] = (char_u)c;
947 buf[1] = NUL;
948 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
949}
950
951/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000952 * return TRUE if 'c' is a printable character
953 * Assume characters above 0x100 are printable (multi-byte), except for
954 * Unicode.
955 */
956 int
957vim_isprintc(c)
958 int c;
959{
960#ifdef FEAT_MBYTE
961 if (enc_utf8 && c >= 0x100)
962 return utf_printable(c);
963#endif
964 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
965}
966
967/*
968 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
969 * byte of a double-byte character.
970 */
971 int
972vim_isprintc_strict(c)
973 int c;
974{
975#ifdef FEAT_MBYTE
976 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
977 return FALSE;
978 if (enc_utf8 && c >= 0x100)
979 return utf_printable(c);
980#endif
981 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
982}
983
984/*
985 * like chartabsize(), but also check for line breaks on the screen
986 */
987 int
988lbr_chartabsize(s, col)
989 unsigned char *s;
990 colnr_T col;
991{
992#ifdef FEAT_LINEBREAK
993 if (!curwin->w_p_lbr && *p_sbr == NUL)
994 {
995#endif
996#ifdef FEAT_MBYTE
997 if (curwin->w_p_wrap)
998 return win_nolbr_chartabsize(curwin, s, col, NULL);
999#endif
1000 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1001#ifdef FEAT_LINEBREAK
1002 }
1003 return win_lbr_chartabsize(curwin, s, col, NULL);
1004#endif
1005}
1006
1007/*
1008 * Call lbr_chartabsize() and advance the pointer.
1009 */
1010 int
1011lbr_chartabsize_adv(s, col)
1012 char_u **s;
1013 colnr_T col;
1014{
1015 int retval;
1016
1017 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001018 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001019 return retval;
1020}
1021
1022/*
1023 * This function is used very often, keep it fast!!!!
1024 *
1025 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1026 * string at start of line. Warning: *headp is only set if it's a non-zero
1027 * value, init to 0 before calling.
1028 */
1029/*ARGSUSED*/
1030 int
1031win_lbr_chartabsize(wp, s, col, headp)
1032 win_T *wp;
1033 char_u *s;
1034 colnr_T col;
1035 int *headp;
1036{
1037#ifdef FEAT_LINEBREAK
1038 int c;
1039 int size;
1040 colnr_T col2;
1041 colnr_T colmax;
1042 int added;
1043# ifdef FEAT_MBYTE
1044 int mb_added = 0;
1045# else
1046# define mb_added 0
1047# endif
1048 int numberextra;
1049 char_u *ps;
1050 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001051 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001052
1053 /*
1054 * No 'linebreak' and 'showbreak': return quickly.
1055 */
1056 if (!wp->w_p_lbr && *p_sbr == NUL)
1057#endif
1058 {
1059#ifdef FEAT_MBYTE
1060 if (wp->w_p_wrap)
1061 return win_nolbr_chartabsize(wp, s, col, headp);
1062#endif
1063 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1064 }
1065
1066#ifdef FEAT_LINEBREAK
1067 /*
1068 * First get normal size, without 'linebreak'
1069 */
1070 size = win_chartabsize(wp, s, col);
1071 c = *s;
1072
1073 /*
1074 * If 'linebreak' set check at a blank before a non-blank if the line
1075 * needs a break here
1076 */
1077 if (wp->w_p_lbr
1078 && vim_isbreak(c)
1079 && !vim_isbreak(s[1])
1080 && !wp->w_p_list
1081 && wp->w_p_wrap
1082# ifdef FEAT_VERTSPLIT
1083 && wp->w_width != 0
1084# endif
1085 )
1086 {
1087 /*
1088 * Count all characters from first non-blank after a blank up to next
1089 * non-blank after a blank.
1090 */
1091 numberextra = win_col_off(wp);
1092 col2 = col;
1093 colmax = W_WIDTH(wp) - numberextra;
1094 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001095 {
1096 n = colmax + win_col_off2(wp);
1097 if (n > 0)
1098 colmax += (((col - colmax) / n) + 1) * n;
1099 }
1100
Bram Moolenaar071d4272004-06-13 20:20:40 +00001101 for (;;)
1102 {
1103 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001104 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001105 c = *s;
1106 if (!(c != NUL
1107 && (vim_isbreak(c)
1108 || (!vim_isbreak(c)
1109 && (col2 == col || !vim_isbreak(*ps))))))
1110 break;
1111
1112 col2 += win_chartabsize(wp, s, col2);
1113 if (col2 >= colmax) /* doesn't fit */
1114 {
1115 size = colmax - col;
1116 tab_corr = FALSE;
1117 break;
1118 }
1119 }
1120 }
1121# ifdef FEAT_MBYTE
1122 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1123 && wp->w_p_wrap && in_win_border(wp, col))
1124 {
1125 ++size; /* Count the ">" in the last column. */
1126 mb_added = 1;
1127 }
1128# endif
1129
1130 /*
1131 * May have to add something for 'showbreak' string at start of line
1132 * Set *headp to the size of what we add.
1133 */
1134 added = 0;
1135 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1136 {
1137 numberextra = win_col_off(wp);
1138 col += numberextra + mb_added;
1139 if (col >= (colnr_T)W_WIDTH(wp))
1140 {
1141 col -= W_WIDTH(wp);
1142 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1143 if (numberextra > 0)
1144 col = col % numberextra;
1145 }
1146 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1147 {
1148 added = vim_strsize(p_sbr);
1149 if (tab_corr)
1150 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1151 else
1152 size += added;
1153 if (col != 0)
1154 added = 0;
1155 }
1156 }
1157 if (headp != NULL)
1158 *headp = added + mb_added;
1159 return size;
1160#endif
1161}
1162
1163#if defined(FEAT_MBYTE) || defined(PROTO)
1164/*
1165 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1166 * 'wrap' is on. This means we need to check for a double-byte character that
1167 * doesn't fit at the end of the screen line.
1168 */
1169 static int
1170win_nolbr_chartabsize(wp, s, col, headp)
1171 win_T *wp;
1172 char_u *s;
1173 colnr_T col;
1174 int *headp;
1175{
1176 int n;
1177
1178 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1179 {
1180 n = wp->w_buffer->b_p_ts;
1181 return (int)(n - (col % n));
1182 }
1183 n = ptr2cells(s);
1184 /* Add one cell for a double-width character in the last column of the
1185 * window, displayed with a ">". */
1186 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1187 {
1188 if (headp != NULL)
1189 *headp = 1;
1190 return 3;
1191 }
1192 return n;
1193}
1194
1195/*
1196 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1197 * "wp".
1198 */
1199 int
1200in_win_border(wp, vcol)
1201 win_T *wp;
1202 colnr_T vcol;
1203{
1204 colnr_T width1; /* width of first line (after line number) */
1205 colnr_T width2; /* width of further lines */
1206
1207#ifdef FEAT_VERTSPLIT
1208 if (wp->w_width == 0) /* there is no border */
1209 return FALSE;
1210#endif
1211 width1 = W_WIDTH(wp) - win_col_off(wp);
1212 if (vcol < width1 - 1)
1213 return FALSE;
1214 if (vcol == width1 - 1)
1215 return TRUE;
1216 width2 = width1 + win_col_off2(wp);
1217 return ((vcol - width1) % width2 == width2 - 1);
1218}
1219#endif /* FEAT_MBYTE */
1220
1221/*
1222 * Get virtual column number of pos.
1223 * start: on the first position of this character (TAB, ctrl)
1224 * cursor: where the cursor is on this character (first char, except for TAB)
1225 * end: on the last position of this character (TAB, ctrl)
1226 *
1227 * This is used very often, keep it fast!
1228 */
1229 void
1230getvcol(wp, pos, start, cursor, end)
1231 win_T *wp;
1232 pos_T *pos;
1233 colnr_T *start;
1234 colnr_T *cursor;
1235 colnr_T *end;
1236{
1237 colnr_T vcol;
1238 char_u *ptr; /* points to current char */
1239 char_u *posptr; /* points to char at pos->col */
1240 int incr;
1241 int head;
1242 int ts = wp->w_buffer->b_p_ts;
1243 int c;
1244
1245 vcol = 0;
1246 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1247 posptr = ptr + pos->col;
1248
1249 /*
1250 * This function is used very often, do some speed optimizations.
1251 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1252 * Also use this when 'list' is set but tabs take their normal size.
1253 */
1254 if ((!wp->w_p_list || lcs_tab1 != NUL)
1255#ifdef FEAT_LINEBREAK
1256 && !wp->w_p_lbr && *p_sbr == NUL
1257#endif
1258 )
1259 {
1260#ifndef FEAT_MBYTE
1261 head = 0;
1262#endif
1263 for (;;)
1264 {
1265#ifdef FEAT_MBYTE
1266 head = 0;
1267#endif
1268 c = *ptr;
1269 /* make sure we don't go past the end of the line */
1270 if (c == NUL)
1271 {
1272 incr = 1; /* NUL at end of line only takes one column */
1273 break;
1274 }
1275 /* A tab gets expanded, depending on the current column */
1276 if (c == TAB)
1277 incr = ts - (vcol % ts);
1278 else
1279 {
1280#ifdef FEAT_MBYTE
1281 if (has_mbyte)
1282 {
1283 /* For utf-8, if the byte is >= 0x80, need to look at
1284 * further bytes to find the cell width. */
1285 if (enc_utf8 && c >= 0x80)
1286 incr = utf_ptr2cells(ptr);
1287 else
1288 incr = CHARSIZE(c);
1289
1290 /* If a double-cell char doesn't fit at the end of a line
1291 * it wraps to the next line, it's like this char is three
1292 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001293 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1294 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001295 {
1296 ++incr;
1297 head = 1;
1298 }
1299 }
1300 else
1301#endif
1302 incr = CHARSIZE(c);
1303 }
1304
1305 if (ptr >= posptr) /* character at pos->col */
1306 break;
1307
1308 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001309 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310 }
1311 }
1312 else
1313 {
1314 for (;;)
1315 {
1316 /* A tab gets expanded, depending on the current column */
1317 head = 0;
1318 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1319 /* make sure we don't go past the end of the line */
1320 if (*ptr == NUL)
1321 {
1322 incr = 1; /* NUL at end of line only takes one column */
1323 break;
1324 }
1325
1326 if (ptr >= posptr) /* character at pos->col */
1327 break;
1328
1329 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001330 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001331 }
1332 }
1333 if (start != NULL)
1334 *start = vcol + head;
1335 if (end != NULL)
1336 *end = vcol + incr - 1;
1337 if (cursor != NULL)
1338 {
1339 if (*ptr == TAB
1340 && (State & NORMAL)
1341 && !wp->w_p_list
1342 && !virtual_active()
1343#ifdef FEAT_VISUAL
1344 && !(VIsual_active
1345 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1346#endif
1347 )
1348 *cursor = vcol + incr - 1; /* cursor at end */
1349 else
1350 *cursor = vcol + head; /* cursor at start */
1351 }
1352}
1353
1354/*
1355 * Get virtual cursor column in the current window, pretending 'list' is off.
1356 */
1357 colnr_T
1358getvcol_nolist(posp)
1359 pos_T *posp;
1360{
1361 int list_save = curwin->w_p_list;
1362 colnr_T vcol;
1363
1364 curwin->w_p_list = FALSE;
1365 getvcol(curwin, posp, NULL, &vcol, NULL);
1366 curwin->w_p_list = list_save;
1367 return vcol;
1368}
1369
1370#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1371/*
1372 * Get virtual column in virtual mode.
1373 */
1374 void
1375getvvcol(wp, pos, start, cursor, end)
1376 win_T *wp;
1377 pos_T *pos;
1378 colnr_T *start;
1379 colnr_T *cursor;
1380 colnr_T *end;
1381{
1382 colnr_T col;
1383 colnr_T coladd;
1384 colnr_T endadd;
1385# ifdef FEAT_MBYTE
1386 char_u *ptr;
1387# endif
1388
1389 if (virtual_active())
1390 {
1391 /* For virtual mode, only want one value */
1392 getvcol(wp, pos, &col, NULL, NULL);
1393
1394 coladd = pos->coladd;
1395 endadd = 0;
1396# ifdef FEAT_MBYTE
1397 /* Cannot put the cursor on part of a wide character. */
1398 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1399 if (pos->col < STRLEN(ptr))
1400 {
1401 int c = (*mb_ptr2char)(ptr + pos->col);
1402
1403 if (c != TAB && vim_isprintc(c))
1404 {
1405 endadd = char2cells(c) - 1;
Bram Moolenaara5792f52005-11-23 21:25:05 +00001406 if (coladd > endadd) /* past end of line */
1407 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001408 else
1409 coladd = 0;
1410 }
1411 }
1412# endif
1413 col += coladd;
1414 if (start != NULL)
1415 *start = col;
1416 if (cursor != NULL)
1417 *cursor = col;
1418 if (end != NULL)
1419 *end = col + endadd;
1420 }
1421 else
1422 getvcol(wp, pos, start, cursor, end);
1423}
1424#endif
1425
1426#if defined(FEAT_VISUAL) || defined(PROTO)
1427/*
1428 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1429 * Used for Visual block mode.
1430 */
1431 void
1432getvcols(wp, pos1, pos2, left, right)
1433 win_T *wp;
1434 pos_T *pos1, *pos2;
1435 colnr_T *left, *right;
1436{
1437 colnr_T from1, from2, to1, to2;
1438
1439 if (ltp(pos1, pos2))
1440 {
1441 getvvcol(wp, pos1, &from1, NULL, &to1);
1442 getvvcol(wp, pos2, &from2, NULL, &to2);
1443 }
1444 else
1445 {
1446 getvvcol(wp, pos2, &from1, NULL, &to1);
1447 getvvcol(wp, pos1, &from2, NULL, &to2);
1448 }
1449 if (from2 < from1)
1450 *left = from2;
1451 else
1452 *left = from1;
1453 if (to2 > to1)
1454 {
1455 if (*p_sel == 'e' && from2 - 1 >= to1)
1456 *right = from2 - 1;
1457 else
1458 *right = to2;
1459 }
1460 else
1461 *right = to1;
1462}
1463#endif
1464
1465/*
1466 * skipwhite: skip over ' ' and '\t'.
1467 */
1468 char_u *
1469skipwhite(p)
1470 char_u *p;
1471{
1472 while (vim_iswhite(*p)) /* skip to next non-white */
1473 ++p;
1474 return p;
1475}
1476
1477/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001478 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001479 */
1480 char_u *
1481skipdigits(p)
1482 char_u *p;
1483{
1484 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1485 ++p;
1486 return p;
1487}
1488
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001489#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001490/*
1491 * skip over digits and hex characters
1492 */
1493 char_u *
1494skiphex(p)
1495 char_u *p;
1496{
1497 while (vim_isxdigit(*p)) /* skip to next non-digit */
1498 ++p;
1499 return p;
1500}
1501#endif
1502
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001503#if defined(FEAT_EX_EXTRA) || defined(PROTO)
1504/*
1505 * skip to digit (or NUL after the string)
1506 */
1507 char_u *
1508skiptodigit(p)
1509 char_u *p;
1510{
1511 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1512 ++p;
1513 return p;
1514}
1515
1516/*
1517 * skip to hex character (or NUL after the string)
1518 */
1519 char_u *
1520skiptohex(p)
1521 char_u *p;
1522{
1523 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1524 ++p;
1525 return p;
1526}
1527#endif
1528
Bram Moolenaar071d4272004-06-13 20:20:40 +00001529/*
1530 * Variant of isdigit() that can handle characters > 0x100.
1531 * We don't use isdigit() here, because on some systems it also considers
1532 * superscript 1 to be a digit.
1533 * Use the VIM_ISDIGIT() macro for simple arguments.
1534 */
1535 int
1536vim_isdigit(c)
1537 int c;
1538{
1539 return (c >= '0' && c <= '9');
1540}
1541
1542/*
1543 * Variant of isxdigit() that can handle characters > 0x100.
1544 * We don't use isxdigit() here, because on some systems it also considers
1545 * superscript 1 to be a digit.
1546 */
1547 int
1548vim_isxdigit(c)
1549 int c;
1550{
1551 return (c >= '0' && c <= '9')
1552 || (c >= 'a' && c <= 'f')
1553 || (c >= 'A' && c <= 'F');
1554}
1555
Bram Moolenaar78622822005-08-23 21:00:13 +00001556#if defined(FEAT_MBYTE) || defined(PROTO)
1557/*
1558 * Vim's own character class functions. These exist because many library
1559 * islower()/toupper() etc. do not work properly: they crash when used with
1560 * invalid values or can't handle latin1 when the locale is C.
1561 * Speed is most important here.
1562 */
1563#define LATIN1LOWER 'l'
1564#define LATIN1UPPER 'U'
1565
1566/* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001567static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1568static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1569static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
Bram Moolenaar78622822005-08-23 21:00:13 +00001570
1571 int
1572vim_islower(c)
1573 int c;
1574{
1575 if (c <= '@')
1576 return FALSE;
1577 if (c >= 0x80)
1578 {
1579 if (enc_utf8)
1580 return utf_islower(c);
1581 if (c >= 0x100)
1582 {
1583#ifdef HAVE_ISWLOWER
1584 if (has_mbyte)
1585 return iswlower(c);
1586#endif
1587 /* islower() can't handle these chars and may crash */
1588 return FALSE;
1589 }
1590 if (enc_latin1like)
1591 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1592 }
1593 return islower(c);
1594}
1595
1596 int
1597vim_isupper(c)
1598 int c;
1599{
1600 if (c <= '@')
1601 return FALSE;
1602 if (c >= 0x80)
1603 {
1604 if (enc_utf8)
1605 return utf_isupper(c);
1606 if (c >= 0x100)
1607 {
1608#ifdef HAVE_ISWUPPER
1609 if (has_mbyte)
1610 return iswupper(c);
1611#endif
1612 /* islower() can't handle these chars and may crash */
1613 return FALSE;
1614 }
1615 if (enc_latin1like)
1616 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1617 }
1618 return isupper(c);
1619}
1620
1621 int
1622vim_toupper(c)
1623 int c;
1624{
1625 if (c <= '@')
1626 return c;
1627 if (c >= 0x80)
1628 {
1629 if (enc_utf8)
1630 return utf_toupper(c);
1631 if (c >= 0x100)
1632 {
1633#ifdef HAVE_TOWUPPER
1634 if (has_mbyte)
1635 return towupper(c);
1636#endif
1637 /* toupper() can't handle these chars and may crash */
1638 return c;
1639 }
1640 if (enc_latin1like)
1641 return latin1upper[c];
1642 }
1643 return TOUPPER_LOC(c);
1644}
1645
1646 int
1647vim_tolower(c)
1648 int c;
1649{
1650 if (c <= '@')
1651 return c;
1652 if (c >= 0x80)
1653 {
1654 if (enc_utf8)
1655 return utf_tolower(c);
1656 if (c >= 0x100)
1657 {
1658#ifdef HAVE_TOWLOWER
1659 if (has_mbyte)
1660 return towlower(c);
1661#endif
1662 /* tolower() can't handle these chars and may crash */
1663 return c;
1664 }
1665 if (enc_latin1like)
1666 return latin1lower[c];
1667 }
1668 return TOLOWER_LOC(c);
1669}
1670#endif
1671
Bram Moolenaar071d4272004-06-13 20:20:40 +00001672/*
1673 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1674 */
1675 char_u *
1676skiptowhite(p)
1677 char_u *p;
1678{
1679 while (*p != ' ' && *p != '\t' && *p != NUL)
1680 ++p;
1681 return p;
1682}
1683
1684#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1685 || defined(PROTO)
1686/*
1687 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1688 */
1689 char_u *
1690skiptowhite_esc(p)
1691 char_u *p;
1692{
1693 while (*p != ' ' && *p != '\t' && *p != NUL)
1694 {
1695 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1696 ++p;
1697 ++p;
1698 }
1699 return p;
1700}
1701#endif
1702
1703/*
1704 * Getdigits: Get a number from a string and skip over it.
1705 * Note: the argument is a pointer to a char_u pointer!
1706 */
1707 long
1708getdigits(pp)
1709 char_u **pp;
1710{
1711 char_u *p;
1712 long retval;
1713
1714 p = *pp;
1715 retval = atol((char *)p);
1716 if (*p == '-') /* skip negative sign */
1717 ++p;
1718 p = skipdigits(p); /* skip to next non-digit */
1719 *pp = p;
1720 return retval;
1721}
1722
1723/*
1724 * Return TRUE if "lbuf" is empty or only contains blanks.
1725 */
1726 int
1727vim_isblankline(lbuf)
1728 char_u *lbuf;
1729{
1730 char_u *p;
1731
1732 p = skipwhite(lbuf);
1733 return (*p == NUL || *p == '\r' || *p == '\n');
1734}
1735
1736/*
1737 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001738 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001739 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1740 * 0 decimal
1741 * '0' octal
1742 * 'X' hex
1743 * 'x' hex
1744 * If "len" is not NULL, the length of the number in characters is returned.
1745 * If "nptr" is not NULL, the signed result is returned in it.
1746 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001747 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1748 * octal number.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001749 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001750 * hex number.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001751 */
1752 void
1753vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1754 char_u *start;
1755 int *hexp; /* return: type of number 0 = decimal, 'x'
1756 or 'X' is hex, '0' = octal */
1757 int *len; /* return: detected length of number */
1758 int dooct; /* recognize octal number */
1759 int dohex; /* recognize hex number */
1760 long *nptr; /* return: signed result */
1761 unsigned long *unptr; /* return: unsigned result */
1762{
1763 char_u *ptr = start;
1764 int hex = 0; /* default is decimal */
1765 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001766 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001767 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001768
1769 if (ptr[0] == '-')
1770 {
1771 negative = TRUE;
1772 ++ptr;
1773 }
1774
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001775 /* Recognize hex and octal. */
1776 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001777 {
1778 hex = ptr[1];
1779 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1780 ptr += 2; /* hexadecimal */
1781 else
1782 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001783 hex = 0; /* default is decimal */
1784 if (dooct)
1785 {
1786 /* Don't interpret "0", "08" or "0129" as octal. */
1787 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1788 {
1789 if (ptr[n] > '7')
1790 {
1791 hex = 0; /* can't be octal */
1792 break;
1793 }
1794 if (ptr[n] > '0')
1795 hex = '0'; /* assume octal */
1796 }
1797 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001798 }
1799 }
1800
1801 /*
1802 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1803 */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001804 if (hex == '0' || dooct > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001805 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001806 /* octal */
1807 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001808 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001809 un = 8 * un + (unsigned long)(*ptr - '0');
1810 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001812 }
1813 else if (hex != 0 || dohex > 1)
1814 {
1815 /* hex */
1816 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001817 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001818 un = 16 * un + (unsigned long)hex2nr(*ptr);
1819 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001820 }
1821 }
1822 else
1823 {
1824 /* decimal */
1825 while (VIM_ISDIGIT(*ptr))
1826 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001827 un = 10 * un + (unsigned long)(*ptr - '0');
1828 ++ptr;
1829 }
1830 }
1831
Bram Moolenaar071d4272004-06-13 20:20:40 +00001832 if (hexp != NULL)
1833 *hexp = hex;
1834 if (len != NULL)
1835 *len = (int)(ptr - start);
1836 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001837 {
1838 if (negative) /* account for leading '-' for decimal numbers */
1839 *nptr = -(long)un;
1840 else
1841 *nptr = (long)un;
1842 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001843 if (unptr != NULL)
1844 *unptr = un;
1845}
1846
1847/*
1848 * Return the value of a single hex character.
1849 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1850 */
1851 int
1852hex2nr(c)
1853 int c;
1854{
1855 if (c >= 'a' && c <= 'f')
1856 return c - 'a' + 10;
1857 if (c >= 'A' && c <= 'F')
1858 return c - 'A' + 10;
1859 return c - '0';
1860}
1861
1862#if defined(FEAT_TERMRESPONSE) \
1863 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1864/*
1865 * Convert two hex characters to a byte.
1866 * Return -1 if one of the characters is not hex.
1867 */
1868 int
1869hexhex2nr(p)
1870 char_u *p;
1871{
1872 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1873 return -1;
1874 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1875}
1876#endif
1877
1878/*
1879 * Return TRUE if "str" starts with a backslash that should be removed.
1880 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1881 * backslash is not a normal file name character.
1882 * '$' is a valid file name character, we don't remove the backslash before
1883 * it. This means it is not possible to use an environment variable after a
1884 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1885 * Although "\ name" is valid, the backslash in "Program\ files" must be
1886 * removed. Assume a file name doesn't start with a space.
1887 * For multi-byte names, never remove a backslash before a non-ascii
1888 * character, assume that all multi-byte characters are valid file name
1889 * characters.
1890 */
1891 int
1892rem_backslash(str)
1893 char_u *str;
1894{
1895#ifdef BACKSLASH_IN_FILENAME
1896 return (str[0] == '\\'
1897# ifdef FEAT_MBYTE
1898 && str[1] < 0x80
1899# endif
1900 && (str[1] == ' '
1901 || (str[1] != NUL
1902 && str[1] != '*'
1903 && str[1] != '?'
1904 && !vim_isfilec(str[1]))));
1905#else
1906 return (str[0] == '\\' && str[1] != NUL);
1907#endif
1908}
1909
1910/*
1911 * Halve the number of backslashes in a file name argument.
1912 * For MS-DOS we only do this if the character after the backslash
1913 * is not a normal file character.
1914 */
1915 void
1916backslash_halve(p)
1917 char_u *p;
1918{
1919 for ( ; *p; ++p)
1920 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00001921 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001922}
1923
1924/*
1925 * backslash_halve() plus save the result in allocated memory.
1926 */
1927 char_u *
1928backslash_halve_save(p)
1929 char_u *p;
1930{
1931 char_u *res;
1932
1933 res = vim_strsave(p);
1934 if (res == NULL)
1935 return p;
1936 backslash_halve(res);
1937 return res;
1938}
1939
1940#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1941/*
1942 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1943 * The first 64 entries have been added to map control characters defined in
1944 * ascii.h
1945 */
1946static char_u ebcdic2ascii_tab[256] =
1947{
1948 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1949 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1950 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1951 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1952 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1953 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1954 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1955 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1956 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1957 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1958 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1959 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1960 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1961 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1962 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1963 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1964 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1965 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1966 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1967 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1968 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1969 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1970 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1971 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1972 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1973 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1974 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1975 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1976 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1977 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1978 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1979 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1980};
1981
1982/*
1983 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1984 * wanting 7-bit ASCII characters out the other end.
1985 */
1986 void
1987ebcdic2ascii(buffer, len)
1988 char_u *buffer;
1989 int len;
1990{
1991 int i;
1992
1993 for (i = 0; i < len; i++)
1994 buffer[i] = ebcdic2ascii_tab[buffer[i]];
1995}
1996#endif