blob: 282e8573e587f982bad058ac354d0ee47eb503d7 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
17static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
18#endif
19
20static int nr2hex __ARGS((int c));
21
22static int chartab_initialized = FALSE;
23
24/* b_chartab[] is an array of 32 bytes, each bit representing one of the
25 * characters 0-255. */
26#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
27#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
28#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
29
30/*
31 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
32 * characters for current buffer.
33 *
34 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
35 * 'isprint' and 'encoding'.
36 *
37 * The index in chartab[] depends on 'encoding':
38 * - For non-multi-byte index with the byte (same as the character).
39 * - For DBCS index with the first byte.
40 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
41 * the same as the character, if the first byte is 0x80 and above it depends
42 * on further bytes).
43 *
44 * The contents of chartab[]:
45 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
46 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
47 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
48 * translate the character before displaying it). Note that only DBCS
49 * characters can have 2 display cells and still be printable.
50 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
51 * - CT_ID_CHAR bit is set when the character can be in an identifier.
52 *
53 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
54 * error, OK otherwise.
55 */
56 int
57init_chartab()
58{
59 return buf_init_chartab(curbuf, TRUE);
60}
61
62 int
63buf_init_chartab(buf, global)
64 buf_T *buf;
65 int global; /* FALSE: only set buf->b_chartab[] */
66{
67 int c;
68 int c2;
69 char_u *p;
70 int i;
71 int tilde;
72 int do_isalpha;
73
74 if (global)
75 {
76 /*
77 * Set the default size for printable characters:
78 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
79 * This also inits all 'isident' and 'isfname' flags to FALSE.
80 *
81 * EBCDIC: all chars below ' ' are not printable, all others are
82 * printable.
83 */
84 c = 0;
85 while (c < ' ')
86 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
87#ifdef EBCDIC
88 while (c < 255)
89#else
90 while (c <= '~')
91#endif
92 chartab[c++] = 1 + CT_PRINT_CHAR;
93#ifdef FEAT_FKMAP
94 if (p_altkeymap)
95 {
96 while (c < YE)
97 chartab[c++] = 1 + CT_PRINT_CHAR;
98 }
99#endif
100 while (c < 256)
101 {
102#ifdef FEAT_MBYTE
103 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
104 if (enc_utf8 && c >= 0xa0)
105 chartab[c++] = CT_PRINT_CHAR + 1;
106 /* euc-jp characters starting with 0x8e are single width */
107 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* other double-byte chars can be printable AND double-width */
110 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
111 chartab[c++] = CT_PRINT_CHAR + 2;
112 else
113#endif
114 /* the rest is unprintable by default */
115 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
116 }
117
118#ifdef FEAT_MBYTE
119 /* Assume that every multi-byte char is a filename character. */
120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
124 chartab[c] |= CT_FNAME_CHAR;
125#endif
126 }
127
128 /*
129 * Init word char flags all to FALSE
130 */
131 vim_memset(buf->b_chartab, 0, (size_t)32);
132#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000133 if (enc_dbcs != 0)
134 for (c = 0; c < 256; ++c)
135 {
136 /* double-byte characters are probably word characters */
137 if (MB_BYTE2LEN(c) == 2)
138 SET_CHARTAB(buf, c);
139 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140#endif
141
142#ifdef FEAT_LISP
143 /*
144 * In lisp mode the '-' character is included in keywords.
145 */
146 if (buf->b_p_lisp)
147 SET_CHARTAB(buf, '-');
148#endif
149
150 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
151 * options Each option is a list of characters, character numbers or
152 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
153 */
154 for (i = global ? 0 : 3; i <= 3; ++i)
155 {
156 if (i == 0)
157 p = p_isi; /* first round: 'isident' */
158 else if (i == 1)
159 p = p_isp; /* second round: 'isprint' */
160 else if (i == 2)
161 p = p_isf; /* third round: 'isfname' */
162 else /* i == 3 */
163 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
164
165 while (*p)
166 {
167 tilde = FALSE;
168 do_isalpha = FALSE;
169 if (*p == '^' && p[1] != NUL)
170 {
171 tilde = TRUE;
172 ++p;
173 }
174 if (VIM_ISDIGIT(*p))
175 c = getdigits(&p);
176 else
177 c = *p++;
178 c2 = -1;
179 if (*p == '-' && p[1] != NUL)
180 {
181 ++p;
182 if (VIM_ISDIGIT(*p))
183 c2 = getdigits(&p);
184 else
185 c2 = *p++;
186 }
187 if (c <= 0 || (c2 < c && c2 != -1) || c2 >= 256
188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
191 if (c2 == -1) /* not a range */
192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000210 /* Use the MB_ functions here, because isalpha() doesn't
211 * work properly when 'encoding' is "latin1" and the locale is
212 * "C". */
213 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000214#ifdef FEAT_FKMAP
215 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
216#endif
217 )
218 {
219 if (i == 0) /* (re)set ID flag */
220 {
221 if (tilde)
222 chartab[c] &= ~CT_ID_CHAR;
223 else
224 chartab[c] |= CT_ID_CHAR;
225 }
226 else if (i == 1) /* (re)set printable */
227 {
228 if ((c < ' '
229#ifndef EBCDIC
230 || c > '~'
231#endif
232#ifdef FEAT_FKMAP
233 || (p_altkeymap
234 && (F_isalpha(c) || F_isdigit(c)))
235#endif
236 )
237#ifdef FEAT_MBYTE
238 /* For double-byte we keep the cell width, so
239 * that we can detect it from the first byte. */
240 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
241#endif
242 )
243 {
244 if (tilde)
245 {
246 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
247 + ((dy_flags & DY_UHEX) ? 4 : 2);
248 chartab[c] &= ~CT_PRINT_CHAR;
249 }
250 else
251 {
252 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
253 chartab[c] |= CT_PRINT_CHAR;
254 }
255 }
256 }
257 else if (i == 2) /* (re)set fname flag */
258 {
259 if (tilde)
260 chartab[c] &= ~CT_FNAME_CHAR;
261 else
262 chartab[c] |= CT_FNAME_CHAR;
263 }
264 else /* i == 3 */ /* (re)set keyword flag */
265 {
266 if (tilde)
267 RESET_CHARTAB(buf, c);
268 else
269 SET_CHARTAB(buf, c);
270 }
271 }
272 ++c;
273 }
274 p = skip_to_option_part(p);
275 }
276 }
277 chartab_initialized = TRUE;
278 return OK;
279}
280
281/*
282 * Translate any special characters in buf[bufsize] in-place.
283 * The result is a string with only printable characters, but if there is not
284 * enough room, not all characters will be translated.
285 */
286 void
287trans_characters(buf, bufsize)
288 char_u *buf;
289 int bufsize;
290{
291 int len; /* length of string needing translation */
292 int room; /* room in buffer after string */
293 char_u *trs; /* translated character */
294 int trs_len; /* length of trs[] */
295
296 len = (int)STRLEN(buf);
297 room = bufsize - len;
298 while (*buf != 0)
299 {
300# ifdef FEAT_MBYTE
301 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000302 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000303 len -= trs_len;
304 else
305# endif
306 {
307 trs = transchar_byte(*buf);
308 trs_len = (int)STRLEN(trs);
309 if (trs_len > 1)
310 {
311 room -= trs_len - 1;
312 if (room <= 0)
313 return;
314 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
315 }
316 mch_memmove(buf, trs, (size_t)trs_len);
317 --len;
318 }
319 buf += trs_len;
320 }
321}
322
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000323#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
324 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325/*
326 * Translate a string into allocated memory, replacing special chars with
327 * printable chars. Returns NULL when out of memory.
328 */
329 char_u *
330transstr(s)
331 char_u *s;
332{
333 char_u *res;
334 char_u *p;
335#ifdef FEAT_MBYTE
336 int l, len, c;
337 char_u hexbuf[11];
338#endif
339
340#ifdef FEAT_MBYTE
341 if (has_mbyte)
342 {
343 /* Compute the length of the result, taking account of unprintable
344 * multi-byte characters. */
345 len = 0;
346 p = s;
347 while (*p != NUL)
348 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000349 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350 {
351 c = (*mb_ptr2char)(p);
352 p += l;
353 if (vim_isprintc(c))
354 len += l;
355 else
356 {
357 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000358 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 }
360 }
361 else
362 {
363 l = byte2cells(*p++);
364 if (l > 0)
365 len += l;
366 else
367 len += 4; /* illegal byte sequence */
368 }
369 }
370 res = alloc((unsigned)(len + 1));
371 }
372 else
373#endif
374 res = alloc((unsigned)(vim_strsize(s) + 1));
375 if (res != NULL)
376 {
377 *res = NUL;
378 p = s;
379 while (*p != NUL)
380 {
381#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000382 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000383 {
384 c = (*mb_ptr2char)(p);
385 if (vim_isprintc(c))
386 STRNCAT(res, p, l); /* append printable multi-byte char */
387 else
388 transchar_hex(res + STRLEN(res), c);
389 p += l;
390 }
391 else
392#endif
393 STRCAT(res, transchar_byte(*p++));
394 }
395 }
396 return res;
397}
398#endif
399
400#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
401/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000402 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
403 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
405 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000406 */
407 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000408str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000409 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000410 int orglen;
411 char_u *buf;
412 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000413{
414 garray_T ga;
415 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000416 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417
418#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
419#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000420#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
421#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000423 /* Copy "str" into "buf" or allocated memory, unmodified. */
424 if (buf == NULL)
425 {
426 ga_init2(&ga, 1, 10);
427 if (ga_grow(&ga, len + 1) == FAIL)
428 return NULL;
429 mch_memmove(ga.ga_data, str, (size_t)len);
430 ga.ga_len = len;
431 }
432 else
433 {
434 if (len >= buflen) /* Ugly! */
435 len = buflen - 1;
436 mch_memmove(buf, str, (size_t)len);
437 }
438 if (buf == NULL)
439 GA_CHAR(len) = NUL;
440 else
441 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
443 /* Make each character lower case. */
444 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000445 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 {
447#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000448 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000449 {
450 if (enc_utf8)
451 {
452 int c, lc;
453
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000454 c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 lc = utf_tolower(c);
456 if (c != lc)
457 {
458 int ol = utf_char2len(c);
459 int nl = utf_char2len(lc);
460
461 /* If the byte length changes need to shift the following
462 * characters forward or backward. */
463 if (ol != nl)
464 {
465 if (nl > ol)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000466 {
467 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL
468 : len + nl - ol >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000469 {
470 /* out of memory, keep old char */
471 lc = c;
472 nl = ol;
473 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000474 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000475 if (ol != nl)
476 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000477 if (buf == NULL)
478 {
Bram Moolenaar446cb832008-06-24 21:56:24 +0000479 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000480 ga.ga_len += nl - ol;
481 }
482 else
483 {
Bram Moolenaar446cb832008-06-24 21:56:24 +0000484 STRMOVE(buf + i + nl, buf + i + ol);
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000485 len += nl - ol;
486 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000487 }
488 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000489 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490 }
491 }
492 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000493 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 }
495 else
496#endif
497 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000498 if (buf == NULL)
499 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
500 else
501 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000502 ++i;
503 }
504 }
505
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000506 if (buf == NULL)
507 return (char_u *)ga.ga_data;
508 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000509}
510#endif
511
512/*
513 * Catch 22: chartab[] can't be initialized before the options are
514 * initialized, and initializing options may cause transchar() to be called!
515 * When chartab_initialized == FALSE don't use chartab[].
516 * Does NOT work for multi-byte characters, c must be <= 255.
517 * Also doesn't work for the first byte of a multi-byte, "c" must be a
518 * character!
519 */
520static char_u transchar_buf[7];
521
522 char_u *
523transchar(c)
524 int c;
525{
526 int i;
527
528 i = 0;
529 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
530 {
531 transchar_buf[0] = '~';
532 transchar_buf[1] = '@';
533 i = 2;
534 c = K_SECOND(c);
535 }
536
537 if ((!chartab_initialized && (
538#ifdef EBCDIC
539 (c >= 64 && c < 255)
540#else
541 (c >= ' ' && c <= '~')
542#endif
543#ifdef FEAT_FKMAP
544 || F_ischar(c)
545#endif
546 )) || (c < 256 && vim_isprintc_strict(c)))
547 {
548 /* printable character */
549 transchar_buf[i] = c;
550 transchar_buf[i + 1] = NUL;
551 }
552 else
553 transchar_nonprint(transchar_buf + i, c);
554 return transchar_buf;
555}
556
557#if defined(FEAT_MBYTE) || defined(PROTO)
558/*
559 * Like transchar(), but called with a byte instead of a character. Checks
560 * for an illegal UTF-8 byte.
561 */
562 char_u *
563transchar_byte(c)
564 int c;
565{
566 if (enc_utf8 && c >= 0x80)
567 {
568 transchar_nonprint(transchar_buf, c);
569 return transchar_buf;
570 }
571 return transchar(c);
572}
573#endif
574
575/*
576 * Convert non-printable character to two or more printable characters in
577 * "buf[]". "buf" needs to be able to hold five bytes.
578 * Does NOT work for multi-byte characters, c must be <= 255.
579 */
580 void
581transchar_nonprint(buf, c)
582 char_u *buf;
583 int c;
584{
585 if (c == NL)
586 c = NUL; /* we use newline in place of a NUL */
587 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
588 c = NL; /* we use CR in place of NL in this case */
589
590 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
591 transchar_hex(buf, c);
592
593#ifdef EBCDIC
594 /* For EBCDIC only the characters 0-63 and 255 are not printable */
595 else if (CtrlChar(c) != 0 || c == DEL)
596#else
597 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
598#endif
599 {
600 buf[0] = '^';
601#ifdef EBCDIC
602 if (c == DEL)
603 buf[1] = '?'; /* DEL displayed as ^? */
604 else
605 buf[1] = CtrlChar(c);
606#else
607 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
608#endif
609
610 buf[2] = NUL;
611 }
612#ifdef FEAT_MBYTE
613 else if (enc_utf8 && c >= 0x80)
614 {
615 transchar_hex(buf, c);
616 }
617#endif
618#ifndef EBCDIC
619 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
620 {
621 buf[0] = '|';
622 buf[1] = c - 0x80;
623 buf[2] = NUL;
624 }
625#else
626 else if (c < 64)
627 {
628 buf[0] = '~';
629 buf[1] = MetaChar(c);
630 buf[2] = NUL;
631 }
632#endif
633 else /* 0x80 - 0x9f and 0xff */
634 {
635 /*
636 * TODO: EBCDIC I don't know what to do with this chars, so I display
637 * them as '~?' for now
638 */
639 buf[0] = '~';
640#ifdef EBCDIC
641 buf[1] = '?'; /* 0xff displayed as ~? */
642#else
643 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
644#endif
645 buf[2] = NUL;
646 }
647}
648
649 void
650transchar_hex(buf, c)
651 char_u *buf;
652 int c;
653{
654 int i = 0;
655
656 buf[0] = '<';
657#ifdef FEAT_MBYTE
658 if (c > 255)
659 {
660 buf[++i] = nr2hex((unsigned)c >> 12);
661 buf[++i] = nr2hex((unsigned)c >> 8);
662 }
663#endif
664 buf[++i] = nr2hex((unsigned)c >> 4);
665 buf[++i] = nr2hex(c);
666 buf[++i] = '>';
667 buf[++i] = NUL;
668}
669
670/*
671 * Convert the lower 4 bits of byte "c" to its hex character.
672 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
673 * function key 1.
674 */
675 static int
676nr2hex(c)
677 int c;
678{
679 if ((c & 0xf) <= 9)
680 return (c & 0xf) + '0';
681 return (c & 0xf) - 10 + 'a';
682}
683
684/*
685 * Return number of display cells occupied by byte "b".
686 * Caller must make sure 0 <= b <= 255.
687 * For multi-byte mode "b" must be the first byte of a character.
688 * A TAB is counted as two cells: "^I".
689 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
690 * cells depends on further bytes.
691 */
692 int
693byte2cells(b)
694 int b;
695{
696#ifdef FEAT_MBYTE
697 if (enc_utf8 && b >= 0x80)
698 return 0;
699#endif
700 return (chartab[b] & CT_CELL_MASK);
701}
702
703/*
704 * Return number of display cells occupied by character "c".
705 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
706 * A TAB is counted as two cells: "^I" or four: "<09>".
707 */
708 int
709char2cells(c)
710 int c;
711{
712 if (IS_SPECIAL(c))
713 return char2cells(K_SECOND(c)) + 2;
714#ifdef FEAT_MBYTE
715 if (c >= 0x80)
716 {
717 /* UTF-8: above 0x80 need to check the value */
718 if (enc_utf8)
719 return utf_char2cells(c);
720 /* DBCS: double-byte means double-width, except for euc-jp with first
721 * byte 0x8e */
722 if (enc_dbcs != 0 && c >= 0x100)
723 {
724 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
725 return 1;
726 return 2;
727 }
728 }
729#endif
730 return (chartab[c & 0xff] & CT_CELL_MASK);
731}
732
733/*
734 * Return number of display cells occupied by character at "*p".
735 * A TAB is counted as two cells: "^I" or four: "<09>".
736 */
737 int
738ptr2cells(p)
739 char_u *p;
740{
741#ifdef FEAT_MBYTE
742 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
743 if (enc_utf8 && *p >= 0x80)
744 return utf_ptr2cells(p);
745 /* For DBCS we can tell the cell count from the first byte. */
746#endif
747 return (chartab[*p] & CT_CELL_MASK);
748}
749
750/*
751 * Return the number of characters string "s" will take on the screen,
752 * counting TABs as two characters: "^I".
753 */
754 int
755vim_strsize(s)
756 char_u *s;
757{
758 return vim_strnsize(s, (int)MAXCOL);
759}
760
761/*
762 * Return the number of characters string "s[len]" will take on the screen,
763 * counting TABs as two characters: "^I".
764 */
765 int
766vim_strnsize(s, len)
767 char_u *s;
768 int len;
769{
770 int size = 0;
771
772 while (*s != NUL && --len >= 0)
773 {
774#ifdef FEAT_MBYTE
775 if (has_mbyte)
776 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000777 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000778
779 size += ptr2cells(s);
780 s += l;
781 len -= l - 1;
782 }
783 else
784#endif
785 size += byte2cells(*s++);
786 }
787 return size;
788}
789
790/*
791 * Return the number of characters 'c' will take on the screen, taking
792 * into account the size of a tab.
793 * Use a define to make it fast, this is used very often!!!
794 * Also see getvcol() below.
795 */
796
797#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
798 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
799 { \
800 int ts; \
801 ts = (buf)->b_p_ts; \
802 return (int)(ts - (col % ts)); \
803 } \
804 else \
805 return ptr2cells(p);
806
807#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
808 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
809 int
810chartabsize(p, col)
811 char_u *p;
812 colnr_T col;
813{
814 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
815}
816#endif
817
818#ifdef FEAT_LINEBREAK
819 static int
820win_chartabsize(wp, p, col)
821 win_T *wp;
822 char_u *p;
823 colnr_T col;
824{
825 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
826}
827#endif
828
829/*
830 * return the number of characters the string 's' will take on the screen,
831 * taking into account the size of a tab
832 */
833 int
834linetabsize(s)
835 char_u *s;
836{
837 colnr_T col = 0;
838
839 while (*s != NUL)
840 col += lbr_chartabsize_adv(&s, col);
841 return (int)col;
842}
843
844/*
845 * Like linetabsize(), but for a given window instead of the current one.
846 */
847 int
848win_linetabsize(wp, p, len)
849 win_T *wp;
850 char_u *p;
851 colnr_T len;
852{
853 colnr_T col = 0;
854 char_u *s;
855
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000856 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000857 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000858 return (int)col;
859}
860
861/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000862 * Return TRUE if 'c' is a normal identifier character:
863 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000864 */
865 int
866vim_isIDc(c)
867 int c;
868{
869 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
870}
871
872/*
873 * return TRUE if 'c' is a keyword character: Letters and characters from
874 * 'iskeyword' option for current buffer.
875 * For multi-byte characters mb_get_class() is used (builtin rules).
876 */
877 int
878vim_iswordc(c)
879 int c;
880{
881#ifdef FEAT_MBYTE
882 if (c >= 0x100)
883 {
884 if (enc_dbcs != 0)
885 return dbcs_class((unsigned)c >> 8, c & 0xff) >= 2;
886 if (enc_utf8)
887 return utf_class(c) >= 2;
888 }
889#endif
890 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0);
891}
892
893/*
894 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
895 */
896 int
897vim_iswordp(p)
898 char_u *p;
899{
900#ifdef FEAT_MBYTE
901 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
902 return mb_get_class(p) >= 2;
903#endif
904 return GET_CHARTAB(curbuf, *p) != 0;
905}
906
907#if defined(FEAT_SYN_HL) || defined(PROTO)
908 int
909vim_iswordc_buf(p, buf)
910 char_u *p;
911 buf_T *buf;
912{
913# ifdef FEAT_MBYTE
914 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
915 return mb_get_class(p) >= 2;
916# endif
917 return (GET_CHARTAB(buf, *p) != 0);
918}
Bram Moolenaarc4956c82006-03-12 21:58:43 +0000919#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000920
921/*
922 * return TRUE if 'c' is a valid file-name character
923 * Assume characters above 0x100 are valid (multi-byte).
924 */
925 int
926vim_isfilec(c)
927 int c;
928{
929 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
930}
931
932/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000933 * return TRUE if 'c' is a valid file-name character or a wildcard character
934 * Assume characters above 0x100 are valid (multi-byte).
935 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
936 * returns false.
937 */
938 int
939vim_isfilec_or_wc(c)
940 int c;
941{
942 char_u buf[2];
943
944 buf[0] = (char_u)c;
945 buf[1] = NUL;
946 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
947}
948
949/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000950 * return TRUE if 'c' is a printable character
951 * Assume characters above 0x100 are printable (multi-byte), except for
952 * Unicode.
953 */
954 int
955vim_isprintc(c)
956 int c;
957{
958#ifdef FEAT_MBYTE
959 if (enc_utf8 && c >= 0x100)
960 return utf_printable(c);
961#endif
962 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
963}
964
965/*
966 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
967 * byte of a double-byte character.
968 */
969 int
970vim_isprintc_strict(c)
971 int c;
972{
973#ifdef FEAT_MBYTE
974 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
975 return FALSE;
976 if (enc_utf8 && c >= 0x100)
977 return utf_printable(c);
978#endif
979 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
980}
981
982/*
983 * like chartabsize(), but also check for line breaks on the screen
984 */
985 int
986lbr_chartabsize(s, col)
987 unsigned char *s;
988 colnr_T col;
989{
990#ifdef FEAT_LINEBREAK
991 if (!curwin->w_p_lbr && *p_sbr == NUL)
992 {
993#endif
994#ifdef FEAT_MBYTE
995 if (curwin->w_p_wrap)
996 return win_nolbr_chartabsize(curwin, s, col, NULL);
997#endif
998 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
999#ifdef FEAT_LINEBREAK
1000 }
1001 return win_lbr_chartabsize(curwin, s, col, NULL);
1002#endif
1003}
1004
1005/*
1006 * Call lbr_chartabsize() and advance the pointer.
1007 */
1008 int
1009lbr_chartabsize_adv(s, col)
1010 char_u **s;
1011 colnr_T col;
1012{
1013 int retval;
1014
1015 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001016 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001017 return retval;
1018}
1019
1020/*
1021 * This function is used very often, keep it fast!!!!
1022 *
1023 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1024 * string at start of line. Warning: *headp is only set if it's a non-zero
1025 * value, init to 0 before calling.
1026 */
1027/*ARGSUSED*/
1028 int
1029win_lbr_chartabsize(wp, s, col, headp)
1030 win_T *wp;
1031 char_u *s;
1032 colnr_T col;
1033 int *headp;
1034{
1035#ifdef FEAT_LINEBREAK
1036 int c;
1037 int size;
1038 colnr_T col2;
1039 colnr_T colmax;
1040 int added;
1041# ifdef FEAT_MBYTE
1042 int mb_added = 0;
1043# else
1044# define mb_added 0
1045# endif
1046 int numberextra;
1047 char_u *ps;
1048 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001049 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001050
1051 /*
1052 * No 'linebreak' and 'showbreak': return quickly.
1053 */
1054 if (!wp->w_p_lbr && *p_sbr == NUL)
1055#endif
1056 {
1057#ifdef FEAT_MBYTE
1058 if (wp->w_p_wrap)
1059 return win_nolbr_chartabsize(wp, s, col, headp);
1060#endif
1061 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1062 }
1063
1064#ifdef FEAT_LINEBREAK
1065 /*
1066 * First get normal size, without 'linebreak'
1067 */
1068 size = win_chartabsize(wp, s, col);
1069 c = *s;
1070
1071 /*
1072 * If 'linebreak' set check at a blank before a non-blank if the line
1073 * needs a break here
1074 */
1075 if (wp->w_p_lbr
1076 && vim_isbreak(c)
1077 && !vim_isbreak(s[1])
1078 && !wp->w_p_list
1079 && wp->w_p_wrap
1080# ifdef FEAT_VERTSPLIT
1081 && wp->w_width != 0
1082# endif
1083 )
1084 {
1085 /*
1086 * Count all characters from first non-blank after a blank up to next
1087 * non-blank after a blank.
1088 */
1089 numberextra = win_col_off(wp);
1090 col2 = col;
1091 colmax = W_WIDTH(wp) - numberextra;
1092 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001093 {
1094 n = colmax + win_col_off2(wp);
1095 if (n > 0)
1096 colmax += (((col - colmax) / n) + 1) * n;
1097 }
1098
Bram Moolenaar071d4272004-06-13 20:20:40 +00001099 for (;;)
1100 {
1101 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001102 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001103 c = *s;
1104 if (!(c != NUL
1105 && (vim_isbreak(c)
1106 || (!vim_isbreak(c)
1107 && (col2 == col || !vim_isbreak(*ps))))))
1108 break;
1109
1110 col2 += win_chartabsize(wp, s, col2);
1111 if (col2 >= colmax) /* doesn't fit */
1112 {
1113 size = colmax - col;
1114 tab_corr = FALSE;
1115 break;
1116 }
1117 }
1118 }
1119# ifdef FEAT_MBYTE
1120 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1121 && wp->w_p_wrap && in_win_border(wp, col))
1122 {
1123 ++size; /* Count the ">" in the last column. */
1124 mb_added = 1;
1125 }
1126# endif
1127
1128 /*
1129 * May have to add something for 'showbreak' string at start of line
1130 * Set *headp to the size of what we add.
1131 */
1132 added = 0;
1133 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1134 {
1135 numberextra = win_col_off(wp);
1136 col += numberextra + mb_added;
1137 if (col >= (colnr_T)W_WIDTH(wp))
1138 {
1139 col -= W_WIDTH(wp);
1140 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1141 if (numberextra > 0)
1142 col = col % numberextra;
1143 }
1144 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1145 {
1146 added = vim_strsize(p_sbr);
1147 if (tab_corr)
1148 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1149 else
1150 size += added;
1151 if (col != 0)
1152 added = 0;
1153 }
1154 }
1155 if (headp != NULL)
1156 *headp = added + mb_added;
1157 return size;
1158#endif
1159}
1160
1161#if defined(FEAT_MBYTE) || defined(PROTO)
1162/*
1163 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1164 * 'wrap' is on. This means we need to check for a double-byte character that
1165 * doesn't fit at the end of the screen line.
1166 */
1167 static int
1168win_nolbr_chartabsize(wp, s, col, headp)
1169 win_T *wp;
1170 char_u *s;
1171 colnr_T col;
1172 int *headp;
1173{
1174 int n;
1175
1176 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1177 {
1178 n = wp->w_buffer->b_p_ts;
1179 return (int)(n - (col % n));
1180 }
1181 n = ptr2cells(s);
1182 /* Add one cell for a double-width character in the last column of the
1183 * window, displayed with a ">". */
1184 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1185 {
1186 if (headp != NULL)
1187 *headp = 1;
1188 return 3;
1189 }
1190 return n;
1191}
1192
1193/*
1194 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1195 * "wp".
1196 */
1197 int
1198in_win_border(wp, vcol)
1199 win_T *wp;
1200 colnr_T vcol;
1201{
1202 colnr_T width1; /* width of first line (after line number) */
1203 colnr_T width2; /* width of further lines */
1204
1205#ifdef FEAT_VERTSPLIT
1206 if (wp->w_width == 0) /* there is no border */
1207 return FALSE;
1208#endif
1209 width1 = W_WIDTH(wp) - win_col_off(wp);
1210 if (vcol < width1 - 1)
1211 return FALSE;
1212 if (vcol == width1 - 1)
1213 return TRUE;
1214 width2 = width1 + win_col_off2(wp);
1215 return ((vcol - width1) % width2 == width2 - 1);
1216}
1217#endif /* FEAT_MBYTE */
1218
1219/*
1220 * Get virtual column number of pos.
1221 * start: on the first position of this character (TAB, ctrl)
1222 * cursor: where the cursor is on this character (first char, except for TAB)
1223 * end: on the last position of this character (TAB, ctrl)
1224 *
1225 * This is used very often, keep it fast!
1226 */
1227 void
1228getvcol(wp, pos, start, cursor, end)
1229 win_T *wp;
1230 pos_T *pos;
1231 colnr_T *start;
1232 colnr_T *cursor;
1233 colnr_T *end;
1234{
1235 colnr_T vcol;
1236 char_u *ptr; /* points to current char */
1237 char_u *posptr; /* points to char at pos->col */
1238 int incr;
1239 int head;
1240 int ts = wp->w_buffer->b_p_ts;
1241 int c;
1242
1243 vcol = 0;
1244 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1245 posptr = ptr + pos->col;
1246
1247 /*
1248 * This function is used very often, do some speed optimizations.
1249 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1250 * Also use this when 'list' is set but tabs take their normal size.
1251 */
1252 if ((!wp->w_p_list || lcs_tab1 != NUL)
1253#ifdef FEAT_LINEBREAK
1254 && !wp->w_p_lbr && *p_sbr == NUL
1255#endif
1256 )
1257 {
1258#ifndef FEAT_MBYTE
1259 head = 0;
1260#endif
1261 for (;;)
1262 {
1263#ifdef FEAT_MBYTE
1264 head = 0;
1265#endif
1266 c = *ptr;
1267 /* make sure we don't go past the end of the line */
1268 if (c == NUL)
1269 {
1270 incr = 1; /* NUL at end of line only takes one column */
1271 break;
1272 }
1273 /* A tab gets expanded, depending on the current column */
1274 if (c == TAB)
1275 incr = ts - (vcol % ts);
1276 else
1277 {
1278#ifdef FEAT_MBYTE
1279 if (has_mbyte)
1280 {
1281 /* For utf-8, if the byte is >= 0x80, need to look at
1282 * further bytes to find the cell width. */
1283 if (enc_utf8 && c >= 0x80)
1284 incr = utf_ptr2cells(ptr);
1285 else
1286 incr = CHARSIZE(c);
1287
1288 /* If a double-cell char doesn't fit at the end of a line
1289 * it wraps to the next line, it's like this char is three
1290 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001291 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1292 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001293 {
1294 ++incr;
1295 head = 1;
1296 }
1297 }
1298 else
1299#endif
1300 incr = CHARSIZE(c);
1301 }
1302
1303 if (ptr >= posptr) /* character at pos->col */
1304 break;
1305
1306 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001307 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001308 }
1309 }
1310 else
1311 {
1312 for (;;)
1313 {
1314 /* A tab gets expanded, depending on the current column */
1315 head = 0;
1316 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1317 /* make sure we don't go past the end of the line */
1318 if (*ptr == NUL)
1319 {
1320 incr = 1; /* NUL at end of line only takes one column */
1321 break;
1322 }
1323
1324 if (ptr >= posptr) /* character at pos->col */
1325 break;
1326
1327 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001328 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001329 }
1330 }
1331 if (start != NULL)
1332 *start = vcol + head;
1333 if (end != NULL)
1334 *end = vcol + incr - 1;
1335 if (cursor != NULL)
1336 {
1337 if (*ptr == TAB
1338 && (State & NORMAL)
1339 && !wp->w_p_list
1340 && !virtual_active()
1341#ifdef FEAT_VISUAL
1342 && !(VIsual_active
1343 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1344#endif
1345 )
1346 *cursor = vcol + incr - 1; /* cursor at end */
1347 else
1348 *cursor = vcol + head; /* cursor at start */
1349 }
1350}
1351
1352/*
1353 * Get virtual cursor column in the current window, pretending 'list' is off.
1354 */
1355 colnr_T
1356getvcol_nolist(posp)
1357 pos_T *posp;
1358{
1359 int list_save = curwin->w_p_list;
1360 colnr_T vcol;
1361
1362 curwin->w_p_list = FALSE;
1363 getvcol(curwin, posp, NULL, &vcol, NULL);
1364 curwin->w_p_list = list_save;
1365 return vcol;
1366}
1367
1368#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1369/*
1370 * Get virtual column in virtual mode.
1371 */
1372 void
1373getvvcol(wp, pos, start, cursor, end)
1374 win_T *wp;
1375 pos_T *pos;
1376 colnr_T *start;
1377 colnr_T *cursor;
1378 colnr_T *end;
1379{
1380 colnr_T col;
1381 colnr_T coladd;
1382 colnr_T endadd;
1383# ifdef FEAT_MBYTE
1384 char_u *ptr;
1385# endif
1386
1387 if (virtual_active())
1388 {
1389 /* For virtual mode, only want one value */
1390 getvcol(wp, pos, &col, NULL, NULL);
1391
1392 coladd = pos->coladd;
1393 endadd = 0;
1394# ifdef FEAT_MBYTE
1395 /* Cannot put the cursor on part of a wide character. */
1396 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1397 if (pos->col < STRLEN(ptr))
1398 {
1399 int c = (*mb_ptr2char)(ptr + pos->col);
1400
1401 if (c != TAB && vim_isprintc(c))
1402 {
1403 endadd = char2cells(c) - 1;
Bram Moolenaara5792f52005-11-23 21:25:05 +00001404 if (coladd > endadd) /* past end of line */
1405 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001406 else
1407 coladd = 0;
1408 }
1409 }
1410# endif
1411 col += coladd;
1412 if (start != NULL)
1413 *start = col;
1414 if (cursor != NULL)
1415 *cursor = col;
1416 if (end != NULL)
1417 *end = col + endadd;
1418 }
1419 else
1420 getvcol(wp, pos, start, cursor, end);
1421}
1422#endif
1423
1424#if defined(FEAT_VISUAL) || defined(PROTO)
1425/*
1426 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1427 * Used for Visual block mode.
1428 */
1429 void
1430getvcols(wp, pos1, pos2, left, right)
1431 win_T *wp;
1432 pos_T *pos1, *pos2;
1433 colnr_T *left, *right;
1434{
1435 colnr_T from1, from2, to1, to2;
1436
1437 if (ltp(pos1, pos2))
1438 {
1439 getvvcol(wp, pos1, &from1, NULL, &to1);
1440 getvvcol(wp, pos2, &from2, NULL, &to2);
1441 }
1442 else
1443 {
1444 getvvcol(wp, pos2, &from1, NULL, &to1);
1445 getvvcol(wp, pos1, &from2, NULL, &to2);
1446 }
1447 if (from2 < from1)
1448 *left = from2;
1449 else
1450 *left = from1;
1451 if (to2 > to1)
1452 {
1453 if (*p_sel == 'e' && from2 - 1 >= to1)
1454 *right = from2 - 1;
1455 else
1456 *right = to2;
1457 }
1458 else
1459 *right = to1;
1460}
1461#endif
1462
1463/*
1464 * skipwhite: skip over ' ' and '\t'.
1465 */
1466 char_u *
1467skipwhite(p)
1468 char_u *p;
1469{
1470 while (vim_iswhite(*p)) /* skip to next non-white */
1471 ++p;
1472 return p;
1473}
1474
1475/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001476 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001477 */
1478 char_u *
1479skipdigits(p)
1480 char_u *p;
1481{
1482 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1483 ++p;
1484 return p;
1485}
1486
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001487#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001488/*
1489 * skip over digits and hex characters
1490 */
1491 char_u *
1492skiphex(p)
1493 char_u *p;
1494{
1495 while (vim_isxdigit(*p)) /* skip to next non-digit */
1496 ++p;
1497 return p;
1498}
1499#endif
1500
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001501#if defined(FEAT_EX_EXTRA) || defined(PROTO)
1502/*
1503 * skip to digit (or NUL after the string)
1504 */
1505 char_u *
1506skiptodigit(p)
1507 char_u *p;
1508{
1509 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1510 ++p;
1511 return p;
1512}
1513
1514/*
1515 * skip to hex character (or NUL after the string)
1516 */
1517 char_u *
1518skiptohex(p)
1519 char_u *p;
1520{
1521 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1522 ++p;
1523 return p;
1524}
1525#endif
1526
Bram Moolenaar071d4272004-06-13 20:20:40 +00001527/*
1528 * Variant of isdigit() that can handle characters > 0x100.
1529 * We don't use isdigit() here, because on some systems it also considers
1530 * superscript 1 to be a digit.
1531 * Use the VIM_ISDIGIT() macro for simple arguments.
1532 */
1533 int
1534vim_isdigit(c)
1535 int c;
1536{
1537 return (c >= '0' && c <= '9');
1538}
1539
1540/*
1541 * Variant of isxdigit() that can handle characters > 0x100.
1542 * We don't use isxdigit() here, because on some systems it also considers
1543 * superscript 1 to be a digit.
1544 */
1545 int
1546vim_isxdigit(c)
1547 int c;
1548{
1549 return (c >= '0' && c <= '9')
1550 || (c >= 'a' && c <= 'f')
1551 || (c >= 'A' && c <= 'F');
1552}
1553
Bram Moolenaar78622822005-08-23 21:00:13 +00001554#if defined(FEAT_MBYTE) || defined(PROTO)
1555/*
1556 * Vim's own character class functions. These exist because many library
1557 * islower()/toupper() etc. do not work properly: they crash when used with
1558 * invalid values or can't handle latin1 when the locale is C.
1559 * Speed is most important here.
1560 */
1561#define LATIN1LOWER 'l'
1562#define LATIN1UPPER 'U'
1563
1564/* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001565static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1566static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
1567static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
Bram Moolenaar78622822005-08-23 21:00:13 +00001568
1569 int
1570vim_islower(c)
1571 int c;
1572{
1573 if (c <= '@')
1574 return FALSE;
1575 if (c >= 0x80)
1576 {
1577 if (enc_utf8)
1578 return utf_islower(c);
1579 if (c >= 0x100)
1580 {
1581#ifdef HAVE_ISWLOWER
1582 if (has_mbyte)
1583 return iswlower(c);
1584#endif
1585 /* islower() can't handle these chars and may crash */
1586 return FALSE;
1587 }
1588 if (enc_latin1like)
1589 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1590 }
1591 return islower(c);
1592}
1593
1594 int
1595vim_isupper(c)
1596 int c;
1597{
1598 if (c <= '@')
1599 return FALSE;
1600 if (c >= 0x80)
1601 {
1602 if (enc_utf8)
1603 return utf_isupper(c);
1604 if (c >= 0x100)
1605 {
1606#ifdef HAVE_ISWUPPER
1607 if (has_mbyte)
1608 return iswupper(c);
1609#endif
1610 /* islower() can't handle these chars and may crash */
1611 return FALSE;
1612 }
1613 if (enc_latin1like)
1614 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1615 }
1616 return isupper(c);
1617}
1618
1619 int
1620vim_toupper(c)
1621 int c;
1622{
1623 if (c <= '@')
1624 return c;
1625 if (c >= 0x80)
1626 {
1627 if (enc_utf8)
1628 return utf_toupper(c);
1629 if (c >= 0x100)
1630 {
1631#ifdef HAVE_TOWUPPER
1632 if (has_mbyte)
1633 return towupper(c);
1634#endif
1635 /* toupper() can't handle these chars and may crash */
1636 return c;
1637 }
1638 if (enc_latin1like)
1639 return latin1upper[c];
1640 }
1641 return TOUPPER_LOC(c);
1642}
1643
1644 int
1645vim_tolower(c)
1646 int c;
1647{
1648 if (c <= '@')
1649 return c;
1650 if (c >= 0x80)
1651 {
1652 if (enc_utf8)
1653 return utf_tolower(c);
1654 if (c >= 0x100)
1655 {
1656#ifdef HAVE_TOWLOWER
1657 if (has_mbyte)
1658 return towlower(c);
1659#endif
1660 /* tolower() can't handle these chars and may crash */
1661 return c;
1662 }
1663 if (enc_latin1like)
1664 return latin1lower[c];
1665 }
1666 return TOLOWER_LOC(c);
1667}
1668#endif
1669
Bram Moolenaar071d4272004-06-13 20:20:40 +00001670/*
1671 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1672 */
1673 char_u *
1674skiptowhite(p)
1675 char_u *p;
1676{
1677 while (*p != ' ' && *p != '\t' && *p != NUL)
1678 ++p;
1679 return p;
1680}
1681
1682#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1683 || defined(PROTO)
1684/*
1685 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1686 */
1687 char_u *
1688skiptowhite_esc(p)
1689 char_u *p;
1690{
1691 while (*p != ' ' && *p != '\t' && *p != NUL)
1692 {
1693 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1694 ++p;
1695 ++p;
1696 }
1697 return p;
1698}
1699#endif
1700
1701/*
1702 * Getdigits: Get a number from a string and skip over it.
1703 * Note: the argument is a pointer to a char_u pointer!
1704 */
1705 long
1706getdigits(pp)
1707 char_u **pp;
1708{
1709 char_u *p;
1710 long retval;
1711
1712 p = *pp;
1713 retval = atol((char *)p);
1714 if (*p == '-') /* skip negative sign */
1715 ++p;
1716 p = skipdigits(p); /* skip to next non-digit */
1717 *pp = p;
1718 return retval;
1719}
1720
1721/*
1722 * Return TRUE if "lbuf" is empty or only contains blanks.
1723 */
1724 int
1725vim_isblankline(lbuf)
1726 char_u *lbuf;
1727{
1728 char_u *p;
1729
1730 p = skipwhite(lbuf);
1731 return (*p == NUL || *p == '\r' || *p == '\n');
1732}
1733
1734/*
1735 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001736 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001737 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1738 * 0 decimal
1739 * '0' octal
1740 * 'X' hex
1741 * 'x' hex
1742 * If "len" is not NULL, the length of the number in characters is returned.
1743 * If "nptr" is not NULL, the signed result is returned in it.
1744 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001745 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1746 * octal number.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001747 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001748 * hex number.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001749 */
1750 void
1751vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1752 char_u *start;
1753 int *hexp; /* return: type of number 0 = decimal, 'x'
1754 or 'X' is hex, '0' = octal */
1755 int *len; /* return: detected length of number */
1756 int dooct; /* recognize octal number */
1757 int dohex; /* recognize hex number */
1758 long *nptr; /* return: signed result */
1759 unsigned long *unptr; /* return: unsigned result */
1760{
1761 char_u *ptr = start;
1762 int hex = 0; /* default is decimal */
1763 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001764 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001765 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001766
1767 if (ptr[0] == '-')
1768 {
1769 negative = TRUE;
1770 ++ptr;
1771 }
1772
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001773 /* Recognize hex and octal. */
1774 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001775 {
1776 hex = ptr[1];
1777 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1778 ptr += 2; /* hexadecimal */
1779 else
1780 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001781 hex = 0; /* default is decimal */
1782 if (dooct)
1783 {
1784 /* Don't interpret "0", "08" or "0129" as octal. */
1785 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1786 {
1787 if (ptr[n] > '7')
1788 {
1789 hex = 0; /* can't be octal */
1790 break;
1791 }
1792 if (ptr[n] > '0')
1793 hex = '0'; /* assume octal */
1794 }
1795 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001796 }
1797 }
1798
1799 /*
1800 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1801 */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001802 if (hex == '0' || dooct > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001803 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001804 /* octal */
1805 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001806 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001807 un = 8 * un + (unsigned long)(*ptr - '0');
1808 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001809 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001810 }
1811 else if (hex != 0 || dohex > 1)
1812 {
1813 /* hex */
1814 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001815 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001816 un = 16 * un + (unsigned long)hex2nr(*ptr);
1817 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001818 }
1819 }
1820 else
1821 {
1822 /* decimal */
1823 while (VIM_ISDIGIT(*ptr))
1824 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001825 un = 10 * un + (unsigned long)(*ptr - '0');
1826 ++ptr;
1827 }
1828 }
1829
Bram Moolenaar071d4272004-06-13 20:20:40 +00001830 if (hexp != NULL)
1831 *hexp = hex;
1832 if (len != NULL)
1833 *len = (int)(ptr - start);
1834 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001835 {
1836 if (negative) /* account for leading '-' for decimal numbers */
1837 *nptr = -(long)un;
1838 else
1839 *nptr = (long)un;
1840 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841 if (unptr != NULL)
1842 *unptr = un;
1843}
1844
1845/*
1846 * Return the value of a single hex character.
1847 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1848 */
1849 int
1850hex2nr(c)
1851 int c;
1852{
1853 if (c >= 'a' && c <= 'f')
1854 return c - 'a' + 10;
1855 if (c >= 'A' && c <= 'F')
1856 return c - 'A' + 10;
1857 return c - '0';
1858}
1859
1860#if defined(FEAT_TERMRESPONSE) \
1861 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1862/*
1863 * Convert two hex characters to a byte.
1864 * Return -1 if one of the characters is not hex.
1865 */
1866 int
1867hexhex2nr(p)
1868 char_u *p;
1869{
1870 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1871 return -1;
1872 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1873}
1874#endif
1875
1876/*
1877 * Return TRUE if "str" starts with a backslash that should be removed.
1878 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1879 * backslash is not a normal file name character.
1880 * '$' is a valid file name character, we don't remove the backslash before
1881 * it. This means it is not possible to use an environment variable after a
1882 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1883 * Although "\ name" is valid, the backslash in "Program\ files" must be
1884 * removed. Assume a file name doesn't start with a space.
1885 * For multi-byte names, never remove a backslash before a non-ascii
1886 * character, assume that all multi-byte characters are valid file name
1887 * characters.
1888 */
1889 int
1890rem_backslash(str)
1891 char_u *str;
1892{
1893#ifdef BACKSLASH_IN_FILENAME
1894 return (str[0] == '\\'
1895# ifdef FEAT_MBYTE
1896 && str[1] < 0x80
1897# endif
1898 && (str[1] == ' '
1899 || (str[1] != NUL
1900 && str[1] != '*'
1901 && str[1] != '?'
1902 && !vim_isfilec(str[1]))));
1903#else
1904 return (str[0] == '\\' && str[1] != NUL);
1905#endif
1906}
1907
1908/*
1909 * Halve the number of backslashes in a file name argument.
1910 * For MS-DOS we only do this if the character after the backslash
1911 * is not a normal file character.
1912 */
1913 void
1914backslash_halve(p)
1915 char_u *p;
1916{
1917 for ( ; *p; ++p)
1918 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00001919 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001920}
1921
1922/*
1923 * backslash_halve() plus save the result in allocated memory.
1924 */
1925 char_u *
1926backslash_halve_save(p)
1927 char_u *p;
1928{
1929 char_u *res;
1930
1931 res = vim_strsave(p);
1932 if (res == NULL)
1933 return p;
1934 backslash_halve(res);
1935 return res;
1936}
1937
1938#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1939/*
1940 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1941 * The first 64 entries have been added to map control characters defined in
1942 * ascii.h
1943 */
1944static char_u ebcdic2ascii_tab[256] =
1945{
1946 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1947 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1948 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
1949 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
1950 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
1951 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
1952 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1953 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
1954 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
1955 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
1956 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1957 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
1958 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
1959 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
1960 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
1961 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
1962 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
1963 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
1964 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
1965 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
1966 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
1967 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
1968 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1969 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
1970 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1971 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
1972 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
1973 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
1974 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
1975 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
1976 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
1977 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
1978};
1979
1980/*
1981 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
1982 * wanting 7-bit ASCII characters out the other end.
1983 */
1984 void
1985ebcdic2ascii(buffer, len)
1986 char_u *buffer;
1987 int len;
1988{
1989 int i;
1990
1991 for (i = 0; i < len; i++)
1992 buffer[i] = ebcdic2ascii_tab[buffer[i]];
1993}
1994#endif