blob: fd4b500be58072785533e785e5bde7171841bcbb [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
12#ifdef FEAT_LINEBREAK
13static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14#endif
15
16#ifdef FEAT_MBYTE
Bram Moolenaard7b734a2010-08-12 20:17:02 +020017# if defined(HAVE_WCHAR_H)
18# include <wchar.h> /* for towupper() and towlower() */
19# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000020static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
21#endif
22
Bram Moolenaar0ab2a882009-05-13 10:51:08 +000023static unsigned nr2hex __ARGS((unsigned c));
Bram Moolenaar071d4272004-06-13 20:20:40 +000024
25static int chartab_initialized = FALSE;
26
27/* b_chartab[] is an array of 32 bytes, each bit representing one of the
28 * characters 0-255. */
29#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
30#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
31#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
32
33/*
34 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
35 * characters for current buffer.
36 *
37 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
38 * 'isprint' and 'encoding'.
39 *
40 * The index in chartab[] depends on 'encoding':
41 * - For non-multi-byte index with the byte (same as the character).
42 * - For DBCS index with the first byte.
43 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
44 * the same as the character, if the first byte is 0x80 and above it depends
45 * on further bytes).
46 *
47 * The contents of chartab[]:
48 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
49 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
50 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
51 * translate the character before displaying it). Note that only DBCS
52 * characters can have 2 display cells and still be printable.
53 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
54 * - CT_ID_CHAR bit is set when the character can be in an identifier.
55 *
56 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
57 * error, OK otherwise.
58 */
59 int
60init_chartab()
61{
62 return buf_init_chartab(curbuf, TRUE);
63}
64
65 int
66buf_init_chartab(buf, global)
67 buf_T *buf;
68 int global; /* FALSE: only set buf->b_chartab[] */
69{
70 int c;
71 int c2;
72 char_u *p;
73 int i;
74 int tilde;
75 int do_isalpha;
76
77 if (global)
78 {
79 /*
80 * Set the default size for printable characters:
81 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
82 * This also inits all 'isident' and 'isfname' flags to FALSE.
83 *
84 * EBCDIC: all chars below ' ' are not printable, all others are
85 * printable.
86 */
87 c = 0;
88 while (c < ' ')
89 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
90#ifdef EBCDIC
91 while (c < 255)
92#else
93 while (c <= '~')
94#endif
95 chartab[c++] = 1 + CT_PRINT_CHAR;
96#ifdef FEAT_FKMAP
97 if (p_altkeymap)
98 {
99 while (c < YE)
100 chartab[c++] = 1 + CT_PRINT_CHAR;
101 }
102#endif
103 while (c < 256)
104 {
105#ifdef FEAT_MBYTE
106 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
107 if (enc_utf8 && c >= 0xa0)
108 chartab[c++] = CT_PRINT_CHAR + 1;
109 /* euc-jp characters starting with 0x8e are single width */
110 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
111 chartab[c++] = CT_PRINT_CHAR + 1;
112 /* other double-byte chars can be printable AND double-width */
113 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
114 chartab[c++] = CT_PRINT_CHAR + 2;
115 else
116#endif
117 /* the rest is unprintable by default */
118 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
119 }
120
121#ifdef FEAT_MBYTE
122 /* Assume that every multi-byte char is a filename character. */
123 for (c = 1; c < 256; ++c)
124 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
125 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
126 || (enc_utf8 && c >= 0xa0))
127 chartab[c] |= CT_FNAME_CHAR;
128#endif
129 }
130
131 /*
132 * Init word char flags all to FALSE
133 */
134 vim_memset(buf->b_chartab, 0, (size_t)32);
135#ifdef FEAT_MBYTE
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000136 if (enc_dbcs != 0)
137 for (c = 0; c < 256; ++c)
138 {
139 /* double-byte characters are probably word characters */
140 if (MB_BYTE2LEN(c) == 2)
141 SET_CHARTAB(buf, c);
142 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000143#endif
144
145#ifdef FEAT_LISP
146 /*
147 * In lisp mode the '-' character is included in keywords.
148 */
149 if (buf->b_p_lisp)
150 SET_CHARTAB(buf, '-');
151#endif
152
153 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
154 * options Each option is a list of characters, character numbers or
155 * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
156 */
157 for (i = global ? 0 : 3; i <= 3; ++i)
158 {
159 if (i == 0)
160 p = p_isi; /* first round: 'isident' */
161 else if (i == 1)
162 p = p_isp; /* second round: 'isprint' */
163 else if (i == 2)
164 p = p_isf; /* third round: 'isfname' */
165 else /* i == 3 */
166 p = buf->b_p_isk; /* fourth round: 'iskeyword' */
167
168 while (*p)
169 {
170 tilde = FALSE;
171 do_isalpha = FALSE;
172 if (*p == '^' && p[1] != NUL)
173 {
174 tilde = TRUE;
175 ++p;
176 }
177 if (VIM_ISDIGIT(*p))
178 c = getdigits(&p);
179 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000180#ifdef FEAT_MBYTE
181 if (has_mbyte)
182 c = mb_ptr2char_adv(&p);
183 else
184#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185 c = *p++;
186 c2 = -1;
187 if (*p == '-' && p[1] != NUL)
188 {
189 ++p;
190 if (VIM_ISDIGIT(*p))
191 c2 = getdigits(&p);
192 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000193#ifdef FEAT_MBYTE
194 if (has_mbyte)
195 c2 = mb_ptr2char_adv(&p);
196 else
197#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000198 c2 = *p++;
199 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000200 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000201 || !(*p == NUL || *p == ','))
202 return FAIL;
203
204 if (c2 == -1) /* not a range */
205 {
206 /*
207 * A single '@' (not "@-@"):
208 * Decide on letters being ID/printable/keyword chars with
209 * standard function isalpha(). This takes care of locale for
210 * single-byte characters).
211 */
212 if (c == '@')
213 {
214 do_isalpha = TRUE;
215 c = 1;
216 c2 = 255;
217 }
218 else
219 c2 = c;
220 }
221 while (c <= c2)
222 {
Bram Moolenaardeefb632007-08-15 18:41:34 +0000223 /* Use the MB_ functions here, because isalpha() doesn't
224 * work properly when 'encoding' is "latin1" and the locale is
225 * "C". */
226 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000227#ifdef FEAT_FKMAP
228 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
229#endif
230 )
231 {
232 if (i == 0) /* (re)set ID flag */
233 {
234 if (tilde)
235 chartab[c] &= ~CT_ID_CHAR;
236 else
237 chartab[c] |= CT_ID_CHAR;
238 }
239 else if (i == 1) /* (re)set printable */
240 {
241 if ((c < ' '
242#ifndef EBCDIC
243 || c > '~'
244#endif
245#ifdef FEAT_FKMAP
246 || (p_altkeymap
247 && (F_isalpha(c) || F_isdigit(c)))
248#endif
249 )
250#ifdef FEAT_MBYTE
251 /* For double-byte we keep the cell width, so
252 * that we can detect it from the first byte. */
253 && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
254#endif
255 )
256 {
257 if (tilde)
258 {
259 chartab[c] = (chartab[c] & ~CT_CELL_MASK)
260 + ((dy_flags & DY_UHEX) ? 4 : 2);
261 chartab[c] &= ~CT_PRINT_CHAR;
262 }
263 else
264 {
265 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
266 chartab[c] |= CT_PRINT_CHAR;
267 }
268 }
269 }
270 else if (i == 2) /* (re)set fname flag */
271 {
272 if (tilde)
273 chartab[c] &= ~CT_FNAME_CHAR;
274 else
275 chartab[c] |= CT_FNAME_CHAR;
276 }
277 else /* i == 3 */ /* (re)set keyword flag */
278 {
279 if (tilde)
280 RESET_CHARTAB(buf, c);
281 else
282 SET_CHARTAB(buf, c);
283 }
284 }
285 ++c;
286 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100287
288 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000289 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100290 if (c == ',' && *p == NUL)
291 /* Trailing comma is not allowed. */
292 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000293 }
294 }
295 chartab_initialized = TRUE;
296 return OK;
297}
298
299/*
300 * Translate any special characters in buf[bufsize] in-place.
301 * The result is a string with only printable characters, but if there is not
302 * enough room, not all characters will be translated.
303 */
304 void
305trans_characters(buf, bufsize)
306 char_u *buf;
307 int bufsize;
308{
309 int len; /* length of string needing translation */
310 int room; /* room in buffer after string */
311 char_u *trs; /* translated character */
312 int trs_len; /* length of trs[] */
313
314 len = (int)STRLEN(buf);
315 room = bufsize - len;
316 while (*buf != 0)
317 {
318# ifdef FEAT_MBYTE
319 /* Assume a multi-byte character doesn't need translation. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000320 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000321 len -= trs_len;
322 else
323# endif
324 {
325 trs = transchar_byte(*buf);
326 trs_len = (int)STRLEN(trs);
327 if (trs_len > 1)
328 {
329 room -= trs_len - 1;
330 if (room <= 0)
331 return;
332 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
333 }
334 mch_memmove(buf, trs, (size_t)trs_len);
335 --len;
336 }
337 buf += trs_len;
338 }
339}
340
Bram Moolenaar7cc36e92007-03-27 10:42:05 +0000341#if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
342 || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343/*
344 * Translate a string into allocated memory, replacing special chars with
345 * printable chars. Returns NULL when out of memory.
346 */
347 char_u *
348transstr(s)
349 char_u *s;
350{
351 char_u *res;
352 char_u *p;
353#ifdef FEAT_MBYTE
354 int l, len, c;
355 char_u hexbuf[11];
356#endif
357
358#ifdef FEAT_MBYTE
359 if (has_mbyte)
360 {
361 /* Compute the length of the result, taking account of unprintable
362 * multi-byte characters. */
363 len = 0;
364 p = s;
365 while (*p != NUL)
366 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000367 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000368 {
369 c = (*mb_ptr2char)(p);
370 p += l;
371 if (vim_isprintc(c))
372 len += l;
373 else
374 {
375 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000376 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000377 }
378 }
379 else
380 {
381 l = byte2cells(*p++);
382 if (l > 0)
383 len += l;
384 else
385 len += 4; /* illegal byte sequence */
386 }
387 }
388 res = alloc((unsigned)(len + 1));
389 }
390 else
391#endif
392 res = alloc((unsigned)(vim_strsize(s) + 1));
393 if (res != NULL)
394 {
395 *res = NUL;
396 p = s;
397 while (*p != NUL)
398 {
399#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000400 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000401 {
402 c = (*mb_ptr2char)(p);
403 if (vim_isprintc(c))
404 STRNCAT(res, p, l); /* append printable multi-byte char */
405 else
406 transchar_hex(res + STRLEN(res), c);
407 p += l;
408 }
409 else
410#endif
411 STRCAT(res, transchar_byte(*p++));
412 }
413 }
414 return res;
415}
416#endif
417
418#if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
419/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000420 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
421 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000422 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
423 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424 */
425 char_u *
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000426str_foldcase(str, orglen, buf, buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000427 char_u *str;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000428 int orglen;
429 char_u *buf;
430 int buflen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000431{
432 garray_T ga;
433 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000434 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000435
436#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
437#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000438#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
439#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000440
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000441 /* Copy "str" into "buf" or allocated memory, unmodified. */
442 if (buf == NULL)
443 {
444 ga_init2(&ga, 1, 10);
445 if (ga_grow(&ga, len + 1) == FAIL)
446 return NULL;
447 mch_memmove(ga.ga_data, str, (size_t)len);
448 ga.ga_len = len;
449 }
450 else
451 {
452 if (len >= buflen) /* Ugly! */
453 len = buflen - 1;
454 mch_memmove(buf, str, (size_t)len);
455 }
456 if (buf == NULL)
457 GA_CHAR(len) = NUL;
458 else
459 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000460
461 /* Make each character lower case. */
462 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000463 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000464 {
465#ifdef FEAT_MBYTE
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000466 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000467 {
468 if (enc_utf8)
469 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000470 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100471 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000472 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000473
Bram Moolenaarb9839212008-06-28 11:03:50 +0000474 /* Only replace the character when it is not an invalid
475 * sequence (ASCII character or more than one byte) and
476 * utf_tolower() doesn't return the original character. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100477 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100479 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000480
481 /* If the byte length changes need to shift the following
482 * characters forward or backward. */
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100483 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000484 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100485 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000486 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100487 if (buf == NULL
488 ? ga_grow(&ga, nlen - olen + 1) == FAIL
489 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490 {
491 /* out of memory, keep old char */
492 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100493 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000495 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100496 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000497 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000498 if (buf == NULL)
499 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100500 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
501 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000502 }
503 else
504 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100505 STRMOVE(buf + i + nlen, buf + i + olen);
506 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000507 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508 }
509 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000510 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511 }
512 }
513 /* skip to next multi-byte char */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000514 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 }
516 else
517#endif
518 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000519 if (buf == NULL)
520 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
521 else
522 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000523 ++i;
524 }
525 }
526
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000527 if (buf == NULL)
528 return (char_u *)ga.ga_data;
529 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000530}
531#endif
532
533/*
534 * Catch 22: chartab[] can't be initialized before the options are
535 * initialized, and initializing options may cause transchar() to be called!
536 * When chartab_initialized == FALSE don't use chartab[].
537 * Does NOT work for multi-byte characters, c must be <= 255.
538 * Also doesn't work for the first byte of a multi-byte, "c" must be a
539 * character!
540 */
541static char_u transchar_buf[7];
542
543 char_u *
544transchar(c)
545 int c;
546{
547 int i;
548
549 i = 0;
550 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */
551 {
552 transchar_buf[0] = '~';
553 transchar_buf[1] = '@';
554 i = 2;
555 c = K_SECOND(c);
556 }
557
558 if ((!chartab_initialized && (
559#ifdef EBCDIC
560 (c >= 64 && c < 255)
561#else
562 (c >= ' ' && c <= '~')
563#endif
564#ifdef FEAT_FKMAP
565 || F_ischar(c)
566#endif
567 )) || (c < 256 && vim_isprintc_strict(c)))
568 {
569 /* printable character */
570 transchar_buf[i] = c;
571 transchar_buf[i + 1] = NUL;
572 }
573 else
574 transchar_nonprint(transchar_buf + i, c);
575 return transchar_buf;
576}
577
578#if defined(FEAT_MBYTE) || defined(PROTO)
579/*
580 * Like transchar(), but called with a byte instead of a character. Checks
581 * for an illegal UTF-8 byte.
582 */
583 char_u *
584transchar_byte(c)
585 int c;
586{
587 if (enc_utf8 && c >= 0x80)
588 {
589 transchar_nonprint(transchar_buf, c);
590 return transchar_buf;
591 }
592 return transchar(c);
593}
594#endif
595
596/*
597 * Convert non-printable character to two or more printable characters in
598 * "buf[]". "buf" needs to be able to hold five bytes.
599 * Does NOT work for multi-byte characters, c must be <= 255.
600 */
601 void
602transchar_nonprint(buf, c)
603 char_u *buf;
604 int c;
605{
606 if (c == NL)
607 c = NUL; /* we use newline in place of a NUL */
608 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
609 c = NL; /* we use CR in place of NL in this case */
610
611 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */
612 transchar_hex(buf, c);
613
614#ifdef EBCDIC
615 /* For EBCDIC only the characters 0-63 and 255 are not printable */
616 else if (CtrlChar(c) != 0 || c == DEL)
617#else
618 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */
619#endif
620 {
621 buf[0] = '^';
622#ifdef EBCDIC
623 if (c == DEL)
624 buf[1] = '?'; /* DEL displayed as ^? */
625 else
626 buf[1] = CtrlChar(c);
627#else
628 buf[1] = c ^ 0x40; /* DEL displayed as ^? */
629#endif
630
631 buf[2] = NUL;
632 }
633#ifdef FEAT_MBYTE
634 else if (enc_utf8 && c >= 0x80)
635 {
636 transchar_hex(buf, c);
637 }
638#endif
639#ifndef EBCDIC
640 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */
641 {
642 buf[0] = '|';
643 buf[1] = c - 0x80;
644 buf[2] = NUL;
645 }
646#else
647 else if (c < 64)
648 {
649 buf[0] = '~';
650 buf[1] = MetaChar(c);
651 buf[2] = NUL;
652 }
653#endif
654 else /* 0x80 - 0x9f and 0xff */
655 {
656 /*
657 * TODO: EBCDIC I don't know what to do with this chars, so I display
658 * them as '~?' for now
659 */
660 buf[0] = '~';
661#ifdef EBCDIC
662 buf[1] = '?'; /* 0xff displayed as ~? */
663#else
664 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */
665#endif
666 buf[2] = NUL;
667 }
668}
669
670 void
671transchar_hex(buf, c)
672 char_u *buf;
673 int c;
674{
675 int i = 0;
676
677 buf[0] = '<';
678#ifdef FEAT_MBYTE
679 if (c > 255)
680 {
681 buf[++i] = nr2hex((unsigned)c >> 12);
682 buf[++i] = nr2hex((unsigned)c >> 8);
683 }
684#endif
685 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000686 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687 buf[++i] = '>';
688 buf[++i] = NUL;
689}
690
691/*
692 * Convert the lower 4 bits of byte "c" to its hex character.
693 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
694 * function key 1.
695 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000696 static unsigned
Bram Moolenaar071d4272004-06-13 20:20:40 +0000697nr2hex(c)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000698 unsigned c;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000699{
700 if ((c & 0xf) <= 9)
701 return (c & 0xf) + '0';
702 return (c & 0xf) - 10 + 'a';
703}
704
705/*
706 * Return number of display cells occupied by byte "b".
707 * Caller must make sure 0 <= b <= 255.
708 * For multi-byte mode "b" must be the first byte of a character.
709 * A TAB is counted as two cells: "^I".
710 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
711 * cells depends on further bytes.
712 */
713 int
714byte2cells(b)
715 int b;
716{
717#ifdef FEAT_MBYTE
718 if (enc_utf8 && b >= 0x80)
719 return 0;
720#endif
721 return (chartab[b] & CT_CELL_MASK);
722}
723
724/*
725 * Return number of display cells occupied by character "c".
726 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
727 * A TAB is counted as two cells: "^I" or four: "<09>".
728 */
729 int
730char2cells(c)
731 int c;
732{
733 if (IS_SPECIAL(c))
734 return char2cells(K_SECOND(c)) + 2;
735#ifdef FEAT_MBYTE
736 if (c >= 0x80)
737 {
738 /* UTF-8: above 0x80 need to check the value */
739 if (enc_utf8)
740 return utf_char2cells(c);
741 /* DBCS: double-byte means double-width, except for euc-jp with first
742 * byte 0x8e */
743 if (enc_dbcs != 0 && c >= 0x100)
744 {
745 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
746 return 1;
747 return 2;
748 }
749 }
750#endif
751 return (chartab[c & 0xff] & CT_CELL_MASK);
752}
753
754/*
755 * Return number of display cells occupied by character at "*p".
756 * A TAB is counted as two cells: "^I" or four: "<09>".
757 */
758 int
759ptr2cells(p)
760 char_u *p;
761{
762#ifdef FEAT_MBYTE
763 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
764 if (enc_utf8 && *p >= 0x80)
765 return utf_ptr2cells(p);
766 /* For DBCS we can tell the cell count from the first byte. */
767#endif
768 return (chartab[*p] & CT_CELL_MASK);
769}
770
771/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100772 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000773 * counting TABs as two characters: "^I".
774 */
775 int
776vim_strsize(s)
777 char_u *s;
778{
779 return vim_strnsize(s, (int)MAXCOL);
780}
781
782/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100783 * Return the number of character cells string "s[len]" will take on the
784 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000785 */
786 int
787vim_strnsize(s, len)
788 char_u *s;
789 int len;
790{
791 int size = 0;
792
793 while (*s != NUL && --len >= 0)
794 {
795#ifdef FEAT_MBYTE
796 if (has_mbyte)
797 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000798 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000799
800 size += ptr2cells(s);
801 s += l;
802 len -= l - 1;
803 }
804 else
805#endif
806 size += byte2cells(*s++);
807 }
808 return size;
809}
810
811/*
812 * Return the number of characters 'c' will take on the screen, taking
813 * into account the size of a tab.
814 * Use a define to make it fast, this is used very often!!!
815 * Also see getvcol() below.
816 */
817
818#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
819 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
820 { \
821 int ts; \
822 ts = (buf)->b_p_ts; \
823 return (int)(ts - (col % ts)); \
824 } \
825 else \
826 return ptr2cells(p);
827
828#if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
829 || defined(FEAT_VIRTUALEDIT) || defined(PROTO)
830 int
831chartabsize(p, col)
832 char_u *p;
833 colnr_T col;
834{
835 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
836}
837#endif
838
839#ifdef FEAT_LINEBREAK
840 static int
841win_chartabsize(wp, p, col)
842 win_T *wp;
843 char_u *p;
844 colnr_T col;
845{
846 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
847}
848#endif
849
850/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200851 * Return the number of characters the string 's' will take on the screen,
852 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000853 */
854 int
855linetabsize(s)
856 char_u *s;
857{
Bram Moolenaardc536092010-07-18 15:45:49 +0200858 return linetabsize_col(0, s);
859}
860
861/*
862 * Like linetabsize(), but starting at column "startcol".
863 */
864 int
865linetabsize_col(startcol, s)
866 int startcol;
867 char_u *s;
868{
869 colnr_T col = startcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000870
871 while (*s != NUL)
872 col += lbr_chartabsize_adv(&s, col);
873 return (int)col;
874}
875
876/*
877 * Like linetabsize(), but for a given window instead of the current one.
878 */
879 int
880win_linetabsize(wp, p, len)
881 win_T *wp;
882 char_u *p;
883 colnr_T len;
884{
885 colnr_T col = 0;
886 char_u *s;
887
Bram Moolenaarb5bf5b82004-12-24 14:35:23 +0000888 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889 col += win_lbr_chartabsize(wp, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000890 return (int)col;
891}
892
893/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000894 * Return TRUE if 'c' is a normal identifier character:
895 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000896 */
897 int
898vim_isIDc(c)
899 int c;
900{
901 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
902}
903
904/*
905 * return TRUE if 'c' is a keyword character: Letters and characters from
906 * 'iskeyword' option for current buffer.
907 * For multi-byte characters mb_get_class() is used (builtin rules).
908 */
909 int
910vim_iswordc(c)
911 int c;
912{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100913 return vim_iswordc_buf(c, curbuf);
914}
915
916 int
917vim_iswordc_buf(c, buf)
918 int c;
919 buf_T *buf;
920{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000921#ifdef FEAT_MBYTE
922 if (c >= 0x100)
923 {
924 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000925 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000926 if (enc_utf8)
927 return utf_class(c) >= 2;
928 }
929#endif
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100930 return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000931}
932
933/*
934 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
935 */
936 int
937vim_iswordp(p)
938 char_u *p;
939{
940#ifdef FEAT_MBYTE
941 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
942 return mb_get_class(p) >= 2;
943#endif
944 return GET_CHARTAB(curbuf, *p) != 0;
945}
946
Bram Moolenaar071d4272004-06-13 20:20:40 +0000947 int
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100948vim_iswordp_buf(p, buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000949 char_u *p;
950 buf_T *buf;
951{
Bram Moolenaara50e5862013-01-30 17:30:17 +0100952#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +0000953 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
954 return mb_get_class(p) >= 2;
Bram Moolenaara50e5862013-01-30 17:30:17 +0100955#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000956 return (GET_CHARTAB(buf, *p) != 0);
957}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958
959/*
960 * return TRUE if 'c' is a valid file-name character
961 * Assume characters above 0x100 are valid (multi-byte).
962 */
963 int
964vim_isfilec(c)
965 int c;
966{
967 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
968}
969
970/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000971 * return TRUE if 'c' is a valid file-name character or a wildcard character
972 * Assume characters above 0x100 are valid (multi-byte).
973 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
974 * returns false.
975 */
976 int
977vim_isfilec_or_wc(c)
978 int c;
979{
980 char_u buf[2];
981
982 buf[0] = (char_u)c;
983 buf[1] = NUL;
984 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
985}
986
987/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000988 * return TRUE if 'c' is a printable character
989 * Assume characters above 0x100 are printable (multi-byte), except for
990 * Unicode.
991 */
992 int
993vim_isprintc(c)
994 int c;
995{
996#ifdef FEAT_MBYTE
997 if (enc_utf8 && c >= 0x100)
998 return utf_printable(c);
999#endif
1000 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1001}
1002
1003/*
1004 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
1005 * byte of a double-byte character.
1006 */
1007 int
1008vim_isprintc_strict(c)
1009 int c;
1010{
1011#ifdef FEAT_MBYTE
1012 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
1013 return FALSE;
1014 if (enc_utf8 && c >= 0x100)
1015 return utf_printable(c);
1016#endif
1017 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1018}
1019
1020/*
1021 * like chartabsize(), but also check for line breaks on the screen
1022 */
1023 int
1024lbr_chartabsize(s, col)
1025 unsigned char *s;
1026 colnr_T col;
1027{
1028#ifdef FEAT_LINEBREAK
1029 if (!curwin->w_p_lbr && *p_sbr == NUL)
1030 {
1031#endif
1032#ifdef FEAT_MBYTE
1033 if (curwin->w_p_wrap)
1034 return win_nolbr_chartabsize(curwin, s, col, NULL);
1035#endif
1036 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1037#ifdef FEAT_LINEBREAK
1038 }
1039 return win_lbr_chartabsize(curwin, s, col, NULL);
1040#endif
1041}
1042
1043/*
1044 * Call lbr_chartabsize() and advance the pointer.
1045 */
1046 int
1047lbr_chartabsize_adv(s, col)
1048 char_u **s;
1049 colnr_T col;
1050{
1051 int retval;
1052
1053 retval = lbr_chartabsize(*s, col);
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001054 mb_ptr_adv(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055 return retval;
1056}
1057
1058/*
1059 * This function is used very often, keep it fast!!!!
1060 *
1061 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1062 * string at start of line. Warning: *headp is only set if it's a non-zero
1063 * value, init to 0 before calling.
1064 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001065 int
1066win_lbr_chartabsize(wp, s, col, headp)
1067 win_T *wp;
1068 char_u *s;
1069 colnr_T col;
Bram Moolenaar0c094b92009-05-14 20:20:33 +00001070 int *headp UNUSED;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001071{
1072#ifdef FEAT_LINEBREAK
1073 int c;
1074 int size;
1075 colnr_T col2;
1076 colnr_T colmax;
1077 int added;
1078# ifdef FEAT_MBYTE
1079 int mb_added = 0;
1080# else
1081# define mb_added 0
1082# endif
1083 int numberextra;
1084 char_u *ps;
1085 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001086 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001087
1088 /*
1089 * No 'linebreak' and 'showbreak': return quickly.
1090 */
1091 if (!wp->w_p_lbr && *p_sbr == NUL)
1092#endif
1093 {
1094#ifdef FEAT_MBYTE
1095 if (wp->w_p_wrap)
1096 return win_nolbr_chartabsize(wp, s, col, headp);
1097#endif
1098 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1099 }
1100
1101#ifdef FEAT_LINEBREAK
1102 /*
1103 * First get normal size, without 'linebreak'
1104 */
1105 size = win_chartabsize(wp, s, col);
1106 c = *s;
1107
1108 /*
1109 * If 'linebreak' set check at a blank before a non-blank if the line
1110 * needs a break here
1111 */
1112 if (wp->w_p_lbr
1113 && vim_isbreak(c)
1114 && !vim_isbreak(s[1])
1115 && !wp->w_p_list
1116 && wp->w_p_wrap
1117# ifdef FEAT_VERTSPLIT
1118 && wp->w_width != 0
1119# endif
1120 )
1121 {
1122 /*
1123 * Count all characters from first non-blank after a blank up to next
1124 * non-blank after a blank.
1125 */
1126 numberextra = win_col_off(wp);
1127 col2 = col;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001128 colmax = (colnr_T)(W_WIDTH(wp) - numberextra);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001129 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001130 {
1131 n = colmax + win_col_off2(wp);
1132 if (n > 0)
1133 colmax += (((col - colmax) / n) + 1) * n;
1134 }
1135
Bram Moolenaar071d4272004-06-13 20:20:40 +00001136 for (;;)
1137 {
1138 ps = s;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001139 mb_ptr_adv(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001140 c = *s;
1141 if (!(c != NUL
1142 && (vim_isbreak(c)
1143 || (!vim_isbreak(c)
1144 && (col2 == col || !vim_isbreak(*ps))))))
1145 break;
1146
1147 col2 += win_chartabsize(wp, s, col2);
1148 if (col2 >= colmax) /* doesn't fit */
1149 {
1150 size = colmax - col;
1151 tab_corr = FALSE;
1152 break;
1153 }
1154 }
1155 }
1156# ifdef FEAT_MBYTE
1157 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1158 && wp->w_p_wrap && in_win_border(wp, col))
1159 {
1160 ++size; /* Count the ">" in the last column. */
1161 mb_added = 1;
1162 }
1163# endif
1164
1165 /*
1166 * May have to add something for 'showbreak' string at start of line
1167 * Set *headp to the size of what we add.
1168 */
1169 added = 0;
1170 if (*p_sbr != NUL && wp->w_p_wrap && col != 0)
1171 {
1172 numberextra = win_col_off(wp);
1173 col += numberextra + mb_added;
1174 if (col >= (colnr_T)W_WIDTH(wp))
1175 {
1176 col -= W_WIDTH(wp);
1177 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1178 if (numberextra > 0)
1179 col = col % numberextra;
1180 }
1181 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp))
1182 {
1183 added = vim_strsize(p_sbr);
1184 if (tab_corr)
1185 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts;
1186 else
1187 size += added;
1188 if (col != 0)
1189 added = 0;
1190 }
1191 }
1192 if (headp != NULL)
1193 *headp = added + mb_added;
1194 return size;
1195#endif
1196}
1197
1198#if defined(FEAT_MBYTE) || defined(PROTO)
1199/*
1200 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1201 * 'wrap' is on. This means we need to check for a double-byte character that
1202 * doesn't fit at the end of the screen line.
1203 */
1204 static int
1205win_nolbr_chartabsize(wp, s, col, headp)
1206 win_T *wp;
1207 char_u *s;
1208 colnr_T col;
1209 int *headp;
1210{
1211 int n;
1212
1213 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1214 {
1215 n = wp->w_buffer->b_p_ts;
1216 return (int)(n - (col % n));
1217 }
1218 n = ptr2cells(s);
1219 /* Add one cell for a double-width character in the last column of the
1220 * window, displayed with a ">". */
1221 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1222 {
1223 if (headp != NULL)
1224 *headp = 1;
1225 return 3;
1226 }
1227 return n;
1228}
1229
1230/*
1231 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1232 * "wp".
1233 */
1234 int
1235in_win_border(wp, vcol)
1236 win_T *wp;
1237 colnr_T vcol;
1238{
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001239 int width1; /* width of first line (after line number) */
1240 int width2; /* width of further lines */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001241
1242#ifdef FEAT_VERTSPLIT
1243 if (wp->w_width == 0) /* there is no border */
1244 return FALSE;
1245#endif
1246 width1 = W_WIDTH(wp) - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001247 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001248 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001249 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001250 return TRUE;
1251 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001252 if (width2 <= 0)
1253 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001254 return ((vcol - width1) % width2 == width2 - 1);
1255}
1256#endif /* FEAT_MBYTE */
1257
1258/*
1259 * Get virtual column number of pos.
1260 * start: on the first position of this character (TAB, ctrl)
1261 * cursor: where the cursor is on this character (first char, except for TAB)
1262 * end: on the last position of this character (TAB, ctrl)
1263 *
1264 * This is used very often, keep it fast!
1265 */
1266 void
1267getvcol(wp, pos, start, cursor, end)
1268 win_T *wp;
1269 pos_T *pos;
1270 colnr_T *start;
1271 colnr_T *cursor;
1272 colnr_T *end;
1273{
1274 colnr_T vcol;
1275 char_u *ptr; /* points to current char */
1276 char_u *posptr; /* points to char at pos->col */
1277 int incr;
1278 int head;
1279 int ts = wp->w_buffer->b_p_ts;
1280 int c;
1281
1282 vcol = 0;
1283 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001284 if (pos->col == MAXCOL)
1285 posptr = NULL; /* continue until the NUL */
1286 else
1287 posptr = ptr + pos->col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001288
1289 /*
1290 * This function is used very often, do some speed optimizations.
1291 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop.
1292 * Also use this when 'list' is set but tabs take their normal size.
1293 */
1294 if ((!wp->w_p_list || lcs_tab1 != NUL)
1295#ifdef FEAT_LINEBREAK
1296 && !wp->w_p_lbr && *p_sbr == NUL
1297#endif
1298 )
1299 {
1300#ifndef FEAT_MBYTE
1301 head = 0;
1302#endif
1303 for (;;)
1304 {
1305#ifdef FEAT_MBYTE
1306 head = 0;
1307#endif
1308 c = *ptr;
1309 /* make sure we don't go past the end of the line */
1310 if (c == NUL)
1311 {
1312 incr = 1; /* NUL at end of line only takes one column */
1313 break;
1314 }
1315 /* A tab gets expanded, depending on the current column */
1316 if (c == TAB)
1317 incr = ts - (vcol % ts);
1318 else
1319 {
1320#ifdef FEAT_MBYTE
1321 if (has_mbyte)
1322 {
1323 /* For utf-8, if the byte is >= 0x80, need to look at
1324 * further bytes to find the cell width. */
1325 if (enc_utf8 && c >= 0x80)
1326 incr = utf_ptr2cells(ptr);
1327 else
1328 incr = CHARSIZE(c);
1329
1330 /* If a double-cell char doesn't fit at the end of a line
1331 * it wraps to the next line, it's like this char is three
1332 * cells wide. */
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001333 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1334 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001335 {
1336 ++incr;
1337 head = 1;
1338 }
1339 }
1340 else
1341#endif
1342 incr = CHARSIZE(c);
1343 }
1344
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001345 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001346 break;
1347
1348 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001349 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001350 }
1351 }
1352 else
1353 {
1354 for (;;)
1355 {
1356 /* A tab gets expanded, depending on the current column */
1357 head = 0;
1358 incr = win_lbr_chartabsize(wp, ptr, vcol, &head);
1359 /* make sure we don't go past the end of the line */
1360 if (*ptr == NUL)
1361 {
1362 incr = 1; /* NUL at end of line only takes one column */
1363 break;
1364 }
1365
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001366 if (posptr != NULL && ptr >= posptr) /* character at pos->col */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001367 break;
1368
1369 vcol += incr;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001370 mb_ptr_adv(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001371 }
1372 }
1373 if (start != NULL)
1374 *start = vcol + head;
1375 if (end != NULL)
1376 *end = vcol + incr - 1;
1377 if (cursor != NULL)
1378 {
1379 if (*ptr == TAB
1380 && (State & NORMAL)
1381 && !wp->w_p_list
1382 && !virtual_active()
1383#ifdef FEAT_VISUAL
1384 && !(VIsual_active
1385 && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1386#endif
1387 )
1388 *cursor = vcol + incr - 1; /* cursor at end */
1389 else
1390 *cursor = vcol + head; /* cursor at start */
1391 }
1392}
1393
1394/*
1395 * Get virtual cursor column in the current window, pretending 'list' is off.
1396 */
1397 colnr_T
1398getvcol_nolist(posp)
1399 pos_T *posp;
1400{
1401 int list_save = curwin->w_p_list;
1402 colnr_T vcol;
1403
1404 curwin->w_p_list = FALSE;
1405 getvcol(curwin, posp, NULL, &vcol, NULL);
1406 curwin->w_p_list = list_save;
1407 return vcol;
1408}
1409
1410#if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1411/*
1412 * Get virtual column in virtual mode.
1413 */
1414 void
1415getvvcol(wp, pos, start, cursor, end)
1416 win_T *wp;
1417 pos_T *pos;
1418 colnr_T *start;
1419 colnr_T *cursor;
1420 colnr_T *end;
1421{
1422 colnr_T col;
1423 colnr_T coladd;
1424 colnr_T endadd;
1425# ifdef FEAT_MBYTE
1426 char_u *ptr;
1427# endif
1428
1429 if (virtual_active())
1430 {
1431 /* For virtual mode, only want one value */
1432 getvcol(wp, pos, &col, NULL, NULL);
1433
1434 coladd = pos->coladd;
1435 endadd = 0;
1436# ifdef FEAT_MBYTE
1437 /* Cannot put the cursor on part of a wide character. */
1438 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001439 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001440 {
1441 int c = (*mb_ptr2char)(ptr + pos->col);
1442
1443 if (c != TAB && vim_isprintc(c))
1444 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001445 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaara5792f52005-11-23 21:25:05 +00001446 if (coladd > endadd) /* past end of line */
1447 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001448 else
1449 coladd = 0;
1450 }
1451 }
1452# endif
1453 col += coladd;
1454 if (start != NULL)
1455 *start = col;
1456 if (cursor != NULL)
1457 *cursor = col;
1458 if (end != NULL)
1459 *end = col + endadd;
1460 }
1461 else
1462 getvcol(wp, pos, start, cursor, end);
1463}
1464#endif
1465
1466#if defined(FEAT_VISUAL) || defined(PROTO)
1467/*
1468 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1469 * Used for Visual block mode.
1470 */
1471 void
1472getvcols(wp, pos1, pos2, left, right)
1473 win_T *wp;
1474 pos_T *pos1, *pos2;
1475 colnr_T *left, *right;
1476{
1477 colnr_T from1, from2, to1, to2;
1478
1479 if (ltp(pos1, pos2))
1480 {
1481 getvvcol(wp, pos1, &from1, NULL, &to1);
1482 getvvcol(wp, pos2, &from2, NULL, &to2);
1483 }
1484 else
1485 {
1486 getvvcol(wp, pos2, &from1, NULL, &to1);
1487 getvvcol(wp, pos1, &from2, NULL, &to2);
1488 }
1489 if (from2 < from1)
1490 *left = from2;
1491 else
1492 *left = from1;
1493 if (to2 > to1)
1494 {
1495 if (*p_sel == 'e' && from2 - 1 >= to1)
1496 *right = from2 - 1;
1497 else
1498 *right = to2;
1499 }
1500 else
1501 *right = to1;
1502}
1503#endif
1504
1505/*
1506 * skipwhite: skip over ' ' and '\t'.
1507 */
1508 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001509skipwhite(q)
1510 char_u *q;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001511{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001512 char_u *p = q;
1513
Bram Moolenaar071d4272004-06-13 20:20:40 +00001514 while (vim_iswhite(*p)) /* skip to next non-white */
1515 ++p;
1516 return p;
1517}
1518
1519/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001520 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001521 */
1522 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001523skipdigits(q)
1524 char_u *q;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001525{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001526 char_u *p = q;
1527
Bram Moolenaar071d4272004-06-13 20:20:40 +00001528 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */
1529 ++p;
1530 return p;
1531}
1532
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001533#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001534/*
1535 * skip over digits and hex characters
1536 */
1537 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001538skiphex(q)
1539 char_u *q;
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001540{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001541 char_u *p = q;
1542
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001543 while (vim_isxdigit(*p)) /* skip to next non-digit */
1544 ++p;
1545 return p;
1546}
1547#endif
1548
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001549#if defined(FEAT_EX_EXTRA) || defined(PROTO)
1550/*
1551 * skip to digit (or NUL after the string)
1552 */
1553 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001554skiptodigit(q)
1555 char_u *q;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001556{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001557 char_u *p = q;
1558
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001559 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */
1560 ++p;
1561 return p;
1562}
1563
1564/*
1565 * skip to hex character (or NUL after the string)
1566 */
1567 char_u *
Bram Moolenaar1387a602008-07-24 19:31:11 +00001568skiptohex(q)
1569 char_u *q;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001570{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001571 char_u *p = q;
1572
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001573 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */
1574 ++p;
1575 return p;
1576}
1577#endif
1578
Bram Moolenaar071d4272004-06-13 20:20:40 +00001579/*
1580 * Variant of isdigit() that can handle characters > 0x100.
1581 * We don't use isdigit() here, because on some systems it also considers
1582 * superscript 1 to be a digit.
1583 * Use the VIM_ISDIGIT() macro for simple arguments.
1584 */
1585 int
1586vim_isdigit(c)
1587 int c;
1588{
1589 return (c >= '0' && c <= '9');
1590}
1591
1592/*
1593 * Variant of isxdigit() that can handle characters > 0x100.
1594 * We don't use isxdigit() here, because on some systems it also considers
1595 * superscript 1 to be a digit.
1596 */
1597 int
1598vim_isxdigit(c)
1599 int c;
1600{
1601 return (c >= '0' && c <= '9')
1602 || (c >= 'a' && c <= 'f')
1603 || (c >= 'A' && c <= 'F');
1604}
1605
Bram Moolenaar78622822005-08-23 21:00:13 +00001606#if defined(FEAT_MBYTE) || defined(PROTO)
1607/*
1608 * Vim's own character class functions. These exist because many library
1609 * islower()/toupper() etc. do not work properly: they crash when used with
1610 * invalid values or can't handle latin1 when the locale is C.
1611 * Speed is most important here.
1612 */
1613#define LATIN1LOWER 'l'
1614#define LATIN1UPPER 'U'
1615
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001616static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001617static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1618static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001619
1620 int
1621vim_islower(c)
1622 int c;
1623{
1624 if (c <= '@')
1625 return FALSE;
1626 if (c >= 0x80)
1627 {
1628 if (enc_utf8)
1629 return utf_islower(c);
1630 if (c >= 0x100)
1631 {
1632#ifdef HAVE_ISWLOWER
1633 if (has_mbyte)
1634 return iswlower(c);
1635#endif
1636 /* islower() can't handle these chars and may crash */
1637 return FALSE;
1638 }
1639 if (enc_latin1like)
1640 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1641 }
1642 return islower(c);
1643}
1644
1645 int
1646vim_isupper(c)
1647 int c;
1648{
1649 if (c <= '@')
1650 return FALSE;
1651 if (c >= 0x80)
1652 {
1653 if (enc_utf8)
1654 return utf_isupper(c);
1655 if (c >= 0x100)
1656 {
1657#ifdef HAVE_ISWUPPER
1658 if (has_mbyte)
1659 return iswupper(c);
1660#endif
1661 /* islower() can't handle these chars and may crash */
1662 return FALSE;
1663 }
1664 if (enc_latin1like)
1665 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1666 }
1667 return isupper(c);
1668}
1669
1670 int
1671vim_toupper(c)
1672 int c;
1673{
1674 if (c <= '@')
1675 return c;
1676 if (c >= 0x80)
1677 {
1678 if (enc_utf8)
1679 return utf_toupper(c);
1680 if (c >= 0x100)
1681 {
1682#ifdef HAVE_TOWUPPER
1683 if (has_mbyte)
1684 return towupper(c);
1685#endif
1686 /* toupper() can't handle these chars and may crash */
1687 return c;
1688 }
1689 if (enc_latin1like)
1690 return latin1upper[c];
1691 }
1692 return TOUPPER_LOC(c);
1693}
1694
1695 int
1696vim_tolower(c)
1697 int c;
1698{
1699 if (c <= '@')
1700 return c;
1701 if (c >= 0x80)
1702 {
1703 if (enc_utf8)
1704 return utf_tolower(c);
1705 if (c >= 0x100)
1706 {
1707#ifdef HAVE_TOWLOWER
1708 if (has_mbyte)
1709 return towlower(c);
1710#endif
1711 /* tolower() can't handle these chars and may crash */
1712 return c;
1713 }
1714 if (enc_latin1like)
1715 return latin1lower[c];
1716 }
1717 return TOLOWER_LOC(c);
1718}
1719#endif
1720
Bram Moolenaar071d4272004-06-13 20:20:40 +00001721/*
1722 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1723 */
1724 char_u *
1725skiptowhite(p)
1726 char_u *p;
1727{
1728 while (*p != ' ' && *p != '\t' && *p != NUL)
1729 ++p;
1730 return p;
1731}
1732
1733#if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1734 || defined(PROTO)
1735/*
1736 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1737 */
1738 char_u *
1739skiptowhite_esc(p)
1740 char_u *p;
1741{
1742 while (*p != ' ' && *p != '\t' && *p != NUL)
1743 {
1744 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1745 ++p;
1746 ++p;
1747 }
1748 return p;
1749}
1750#endif
1751
1752/*
1753 * Getdigits: Get a number from a string and skip over it.
1754 * Note: the argument is a pointer to a char_u pointer!
1755 */
1756 long
1757getdigits(pp)
1758 char_u **pp;
1759{
1760 char_u *p;
1761 long retval;
1762
1763 p = *pp;
1764 retval = atol((char *)p);
1765 if (*p == '-') /* skip negative sign */
1766 ++p;
1767 p = skipdigits(p); /* skip to next non-digit */
1768 *pp = p;
1769 return retval;
1770}
1771
1772/*
1773 * Return TRUE if "lbuf" is empty or only contains blanks.
1774 */
1775 int
1776vim_isblankline(lbuf)
1777 char_u *lbuf;
1778{
1779 char_u *p;
1780
1781 p = skipwhite(lbuf);
1782 return (*p == NUL || *p == '\r' || *p == '\n');
1783}
1784
1785/*
1786 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001787 * hexadecimal and octal numbers. Accepts a '-' sign.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001788 * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1789 * 0 decimal
1790 * '0' octal
1791 * 'X' hex
1792 * 'x' hex
1793 * If "len" is not NULL, the length of the number in characters is returned.
1794 * If "nptr" is not NULL, the signed result is returned in it.
1795 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001796 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1797 * octal number.
Bram Moolenaar97b2ad32006-03-18 21:40:56 +00001798 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001799 * hex number.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001800 */
1801 void
1802vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr)
1803 char_u *start;
1804 int *hexp; /* return: type of number 0 = decimal, 'x'
1805 or 'X' is hex, '0' = octal */
1806 int *len; /* return: detected length of number */
1807 int dooct; /* recognize octal number */
1808 int dohex; /* recognize hex number */
1809 long *nptr; /* return: signed result */
1810 unsigned long *unptr; /* return: unsigned result */
1811{
1812 char_u *ptr = start;
1813 int hex = 0; /* default is decimal */
1814 int negative = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001815 unsigned long un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001816 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001817
1818 if (ptr[0] == '-')
1819 {
1820 negative = TRUE;
1821 ++ptr;
1822 }
1823
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001824 /* Recognize hex and octal. */
1825 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001826 {
1827 hex = ptr[1];
1828 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]))
1829 ptr += 2; /* hexadecimal */
1830 else
1831 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001832 hex = 0; /* default is decimal */
1833 if (dooct)
1834 {
1835 /* Don't interpret "0", "08" or "0129" as octal. */
1836 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1837 {
1838 if (ptr[n] > '7')
1839 {
1840 hex = 0; /* can't be octal */
1841 break;
1842 }
Bram Moolenaar06af6022012-01-26 13:40:08 +01001843 if (ptr[n] >= '0')
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001844 hex = '0'; /* assume octal */
1845 }
1846 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001847 }
1848 }
1849
1850 /*
1851 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1852 */
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001853 if (hex == '0' || dooct > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001854 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001855 /* octal */
1856 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001857 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001858 un = 8 * un + (unsigned long)(*ptr - '0');
1859 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001860 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001861 }
1862 else if (hex != 0 || dohex > 1)
1863 {
1864 /* hex */
1865 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001866 {
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001867 un = 16 * un + (unsigned long)hex2nr(*ptr);
1868 ++ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001869 }
1870 }
1871 else
1872 {
1873 /* decimal */
1874 while (VIM_ISDIGIT(*ptr))
1875 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001876 un = 10 * un + (unsigned long)(*ptr - '0');
1877 ++ptr;
1878 }
1879 }
1880
Bram Moolenaar071d4272004-06-13 20:20:40 +00001881 if (hexp != NULL)
1882 *hexp = hex;
1883 if (len != NULL)
1884 *len = (int)(ptr - start);
1885 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001886 {
1887 if (negative) /* account for leading '-' for decimal numbers */
1888 *nptr = -(long)un;
1889 else
1890 *nptr = (long)un;
1891 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001892 if (unptr != NULL)
1893 *unptr = un;
1894}
1895
1896/*
1897 * Return the value of a single hex character.
1898 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1899 */
1900 int
1901hex2nr(c)
1902 int c;
1903{
1904 if (c >= 'a' && c <= 'f')
1905 return c - 'a' + 10;
1906 if (c >= 'A' && c <= 'F')
1907 return c - 'A' + 10;
1908 return c - '0';
1909}
1910
1911#if defined(FEAT_TERMRESPONSE) \
1912 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1913/*
1914 * Convert two hex characters to a byte.
1915 * Return -1 if one of the characters is not hex.
1916 */
1917 int
1918hexhex2nr(p)
1919 char_u *p;
1920{
1921 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1922 return -1;
1923 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1924}
1925#endif
1926
1927/*
1928 * Return TRUE if "str" starts with a backslash that should be removed.
1929 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1930 * backslash is not a normal file name character.
1931 * '$' is a valid file name character, we don't remove the backslash before
1932 * it. This means it is not possible to use an environment variable after a
1933 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1934 * Although "\ name" is valid, the backslash in "Program\ files" must be
1935 * removed. Assume a file name doesn't start with a space.
1936 * For multi-byte names, never remove a backslash before a non-ascii
1937 * character, assume that all multi-byte characters are valid file name
1938 * characters.
1939 */
1940 int
1941rem_backslash(str)
1942 char_u *str;
1943{
1944#ifdef BACKSLASH_IN_FILENAME
1945 return (str[0] == '\\'
1946# ifdef FEAT_MBYTE
1947 && str[1] < 0x80
1948# endif
1949 && (str[1] == ' '
1950 || (str[1] != NUL
1951 && str[1] != '*'
1952 && str[1] != '?'
1953 && !vim_isfilec(str[1]))));
1954#else
1955 return (str[0] == '\\' && str[1] != NUL);
1956#endif
1957}
1958
1959/*
1960 * Halve the number of backslashes in a file name argument.
1961 * For MS-DOS we only do this if the character after the backslash
1962 * is not a normal file character.
1963 */
1964 void
1965backslash_halve(p)
1966 char_u *p;
1967{
1968 for ( ; *p; ++p)
1969 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00001970 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001971}
1972
1973/*
1974 * backslash_halve() plus save the result in allocated memory.
1975 */
1976 char_u *
1977backslash_halve_save(p)
1978 char_u *p;
1979{
1980 char_u *res;
1981
1982 res = vim_strsave(p);
1983 if (res == NULL)
1984 return p;
1985 backslash_halve(res);
1986 return res;
1987}
1988
1989#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
1990/*
1991 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
1992 * The first 64 entries have been added to map control characters defined in
1993 * ascii.h
1994 */
1995static char_u ebcdic2ascii_tab[256] =
1996{
1997 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
1998 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
1999 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2000 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2001 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2002 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2003 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2004 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2005 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2006 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2007 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2008 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2009 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2010 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2011 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2012 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2013 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2014 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2015 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2016 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2017 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2018 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2019 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2020 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2021 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2022 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2023 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2024 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2025 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2026 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2027 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2028 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2029};
2030
2031/*
2032 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2033 * wanting 7-bit ASCII characters out the other end.
2034 */
2035 void
2036ebcdic2ascii(buffer, len)
2037 char_u *buffer;
2038 int len;
2039{
2040 int i;
2041
2042 for (i = 0; i < len; i++)
2043 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2044}
2045#endif