blob: 12d63597088f45d76053cbdbb1f7286cd60ea8f3 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
Bram Moolenaarc667da52019-11-30 20:52:27 +010013# include <wchar.h> // for towupper() and towlower()
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
Bram Moolenaarc667da52019-11-30 20:52:27 +010021// b_chartab[] is an array of 32 bytes, each bit representing one of the
22// characters 0-255.
Bram Moolenaar071d4272004-06-13 20:20:40 +000023#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaarc667da52019-11-30 20:52:27 +010027// table used below, see init_chartab() for an explanation
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010028static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
Bram Moolenaarc667da52019-11-30 20:52:27 +010033#define CT_CELL_MASK 0x07 // mask: nr of display cells (1, 2 or 4)
34#define CT_PRINT_CHAR 0x10 // flag: set for printable chars
35#define CT_ID_CHAR 0x20 // flag: set for ID chars
36#define CT_FNAME_CHAR 0x40 // flag: set for file name chars
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010037
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
Bram Moolenaarc667da52019-11-30 20:52:27 +010075 int global) // FALSE: only set buf->b_chartab[]
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100105 // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100108 // euc-jp characters starting with 0x8e are single width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100111 // other double-byte chars can be printable AND double-width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100115 // the rest is unprintable by default
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaarc667da52019-11-30 20:52:27 +0100119 // Assume that every multi-byte char is a filename character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
Bram Moolenaara80faa82020-04-12 19:37:17 +0200130 CLEAR_FIELD(buf->b_chartab);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100134 // double-byte characters are probably word characters
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
Bram Moolenaarc667da52019-11-30 20:52:27 +0100147 // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 // options Each option is a list of characters, character numbers or
149 // ranges, separated by commas, e.g.: "200-210,x,#-178,-"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000150 for (i = global ? 0 : 3; i <= 3; ++i)
151 {
152 if (i == 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100153 p = p_isi; // first round: 'isident'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154 else if (i == 1)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100155 p = p_isp; // second round: 'isprint'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000156 else if (i == 2)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100157 p = p_isf; // third round: 'isfname'
158 else // i == 3
159 p = buf->b_p_isk; // fourth round: 'iskeyword'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000160
161 while (*p)
162 {
163 tilde = FALSE;
164 do_isalpha = FALSE;
165 if (*p == '^' && p[1] != NUL)
166 {
167 tilde = TRUE;
168 ++p;
169 }
170 if (VIM_ISDIGIT(*p))
171 c = getdigits(&p);
Dominique Pelle4781d6f2021-05-18 21:46:31 +0200172 else if (has_mbyte)
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000173 c = mb_ptr2char_adv(&p);
174 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000175 c = *p++;
176 c2 = -1;
177 if (*p == '-' && p[1] != NUL)
178 {
179 ++p;
180 if (VIM_ISDIGIT(*p))
181 c2 = getdigits(&p);
Dominique Pelle4781d6f2021-05-18 21:46:31 +0200182 else if (has_mbyte)
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000183 c2 = mb_ptr2char_adv(&p);
184 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185 c2 = *p++;
186 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000187 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
Bram Moolenaarc667da52019-11-30 20:52:27 +0100191 if (c2 == -1) // not a range
Bram Moolenaar071d4272004-06-13 20:20:40 +0000192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100210 // Use the MB_ functions here, because isalpha() doesn't
211 // work properly when 'encoding' is "latin1" and the locale is
212 // "C".
Bram Moolenaar14184a32019-02-16 15:10:30 +0100213 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000214 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100215 if (i == 0) // (re)set ID flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000216 {
217 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100218 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000219 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100220 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000221 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100222 else if (i == 1) // (re)set printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000223 {
224 if ((c < ' '
225#ifndef EBCDIC
226 || c > '~'
227#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100228 // For double-byte we keep the cell width, so
229 // that we can detect it from the first byte.
230 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000231 {
232 if (tilde)
233 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100234 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000235 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100236 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000237 }
238 else
239 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100240 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
241 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000242 }
243 }
244 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100245 else if (i == 2) // (re)set fname flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000246 {
247 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100248 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100252 else // i == 3 (re)set keyword flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 {
254 if (tilde)
255 RESET_CHARTAB(buf, c);
256 else
257 SET_CHARTAB(buf, c);
258 }
259 }
260 ++c;
261 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100262
263 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000264 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100265 if (c == ',' && *p == NUL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100266 // Trailing comma is not allowed.
Bram Moolenaar309379f2013-02-06 16:26:26 +0100267 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268 }
269 }
270 chartab_initialized = TRUE;
271 return OK;
272}
273
274/*
275 * Translate any special characters in buf[bufsize] in-place.
276 * The result is a string with only printable characters, but if there is not
277 * enough room, not all characters will be translated.
278 */
279 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100280trans_characters(
281 char_u *buf,
282 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100284 int len; // length of string needing translation
285 int room; // room in buffer after string
286 char_u *trs; // translated character
287 int trs_len; // length of trs[]
Bram Moolenaar071d4272004-06-13 20:20:40 +0000288
289 len = (int)STRLEN(buf);
290 room = bufsize - len;
291 while (*buf != 0)
292 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100293 // Assume a multi-byte character doesn't need translation.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000294 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000295 len -= trs_len;
296 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297 {
298 trs = transchar_byte(*buf);
299 trs_len = (int)STRLEN(trs);
300 if (trs_len > 1)
301 {
302 room -= trs_len - 1;
303 if (room <= 0)
304 return;
305 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
306 }
307 mch_memmove(buf, trs, (size_t)trs_len);
308 --len;
309 }
310 buf += trs_len;
311 }
312}
313
Bram Moolenaar071d4272004-06-13 20:20:40 +0000314/*
315 * Translate a string into allocated memory, replacing special chars with
316 * printable chars. Returns NULL when out of memory.
317 */
318 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100319transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320{
321 char_u *res;
322 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323 int l, len, c;
324 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 if (has_mbyte)
327 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100328 // Compute the length of the result, taking account of unprintable
329 // multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000330 len = 0;
331 p = s;
332 while (*p != NUL)
333 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000334 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000335 {
336 c = (*mb_ptr2char)(p);
337 p += l;
338 if (vim_isprintc(c))
339 len += l;
340 else
341 {
342 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000343 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344 }
345 }
346 else
347 {
348 l = byte2cells(*p++);
349 if (l > 0)
350 len += l;
351 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100352 len += 4; // illegal byte sequence
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353 }
354 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200355 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356 }
357 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200358 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 if (res != NULL)
360 {
361 *res = NUL;
362 p = s;
363 while (*p != NUL)
364 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000365 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000366 {
367 c = (*mb_ptr2char)(p);
368 if (vim_isprintc(c))
Bram Moolenaarc667da52019-11-30 20:52:27 +0100369 STRNCAT(res, p, l); // append printable multi-byte char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370 else
371 transchar_hex(res + STRLEN(res), c);
372 p += l;
373 }
374 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000375 STRCAT(res, transchar_byte(*p++));
376 }
377 }
378 return res;
379}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380
Bram Moolenaar071d4272004-06-13 20:20:40 +0000381/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000382 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
383 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000384 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
385 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000386 */
387 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100388str_foldcase(
389 char_u *str,
390 int orglen,
391 char_u *buf,
392 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393{
394 garray_T ga;
395 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000396 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000397
398#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
399#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000400#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
401#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402
Bram Moolenaarc667da52019-11-30 20:52:27 +0100403 // Copy "str" into "buf" or allocated memory, unmodified.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 if (buf == NULL)
405 {
406 ga_init2(&ga, 1, 10);
407 if (ga_grow(&ga, len + 1) == FAIL)
408 return NULL;
409 mch_memmove(ga.ga_data, str, (size_t)len);
410 ga.ga_len = len;
411 }
412 else
413 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100414 if (len >= buflen) // Ugly!
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000415 len = buflen - 1;
416 mch_memmove(buf, str, (size_t)len);
417 }
418 if (buf == NULL)
419 GA_CHAR(len) = NUL;
420 else
421 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaarc667da52019-11-30 20:52:27 +0100423 // Make each character lower case.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000425 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000427 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428 {
429 if (enc_utf8)
430 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000431 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100432 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000433 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434
Bram Moolenaarc667da52019-11-30 20:52:27 +0100435 // Only replace the character when it is not an invalid
436 // sequence (ASCII character or more than one byte) and
437 // utf_tolower() doesn't return the original character.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100438 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000439 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100440 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441
Bram Moolenaarc667da52019-11-30 20:52:27 +0100442 // If the byte length changes need to shift the following
443 // characters forward or backward.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100444 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100446 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000447 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100448 if (buf == NULL
449 ? ga_grow(&ga, nlen - olen + 1) == FAIL
450 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100452 // out of memory, keep old char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100454 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000456 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100457 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000459 if (buf == NULL)
460 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100461 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
462 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000463 }
464 else
465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 STRMOVE(buf + i + nlen, buf + i + olen);
467 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000468 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000469 }
470 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000471 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000472 }
473 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100474 // skip to next multi-byte char
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000475 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000476 }
477 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000479 if (buf == NULL)
480 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
481 else
482 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000483 ++i;
484 }
485 }
486
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000487 if (buf == NULL)
488 return (char_u *)ga.ga_data;
489 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000491
492/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100493 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100495 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 * Does NOT work for multi-byte characters, c must be <= 255.
497 * Also doesn't work for the first byte of a multi-byte, "c" must be a
498 * character!
499 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200500static char_u transchar_charbuf[7];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000501
502 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100503transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504{
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200505 return transchar_buf(curbuf, c);
506}
507
508 char_u *
509transchar_buf(buf_T *buf, int c)
510{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511 int i;
512
513 i = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100514 if (IS_SPECIAL(c)) // special key code, display as ~@ char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200516 transchar_charbuf[0] = '~';
517 transchar_charbuf[1] = '@';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000518 i = 2;
519 c = K_SECOND(c);
520 }
521
522 if ((!chartab_initialized && (
523#ifdef EBCDIC
524 (c >= 64 && c < 255)
525#else
526 (c >= ' ' && c <= '~')
527#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000528 )) || (c < 256 && vim_isprintc_strict(c)))
529 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100530 // printable character
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200531 transchar_charbuf[i] = c;
532 transchar_charbuf[i + 1] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000533 }
534 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200535 transchar_nonprint(buf, transchar_charbuf + i, c);
536 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000537}
538
Bram Moolenaar071d4272004-06-13 20:20:40 +0000539/*
540 * Like transchar(), but called with a byte instead of a character. Checks
541 * for an illegal UTF-8 byte.
542 */
543 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100544transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545{
546 if (enc_utf8 && c >= 0x80)
547 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200548 transchar_nonprint(curbuf, transchar_charbuf, c);
549 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550 }
551 return transchar(c);
552}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000553
554/*
555 * Convert non-printable character to two or more printable characters in
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200556 * "buf[]". "charbuf" needs to be able to hold five bytes.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000557 * Does NOT work for multi-byte characters, c must be <= 255.
558 */
559 void
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200560transchar_nonprint(buf_T *buf, char_u *charbuf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000561{
562 if (c == NL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100563 c = NUL; // we use newline in place of a NUL
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200564 else if (c == CAR && get_fileformat(buf) == EOL_MAC)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100565 c = NL; // we use CR in place of NL in this case
Bram Moolenaar071d4272004-06-13 20:20:40 +0000566
Bram Moolenaarc667da52019-11-30 20:52:27 +0100567 if (dy_flags & DY_UHEX) // 'display' has "uhex"
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200568 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000569
570#ifdef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100571 // For EBCDIC only the characters 0-63 and 255 are not printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000572 else if (CtrlChar(c) != 0 || c == DEL)
573#else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100574 else if (c <= 0x7f) // 0x00 - 0x1f and 0x7f
Bram Moolenaar071d4272004-06-13 20:20:40 +0000575#endif
576 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200577 charbuf[0] = '^';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000578#ifdef EBCDIC
579 if (c == DEL)
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200580 charbuf[1] = '?'; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000581 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200582 charbuf[1] = CtrlChar(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000583#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200584 charbuf[1] = c ^ 0x40; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000585#endif
586
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200587 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000588 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000589 else if (enc_utf8 && c >= 0x80)
590 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200591 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000592 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000593#ifndef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100594 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) // 0xa0 - 0xfe
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200596 charbuf[0] = '|';
597 charbuf[1] = c - 0x80;
598 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000599 }
600#else
601 else if (c < 64)
602 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200603 charbuf[0] = '~';
604 charbuf[1] = MetaChar(c);
605 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000606 }
607#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +0100608 else // 0x80 - 0x9f and 0xff
Bram Moolenaar071d4272004-06-13 20:20:40 +0000609 {
610 /*
611 * TODO: EBCDIC I don't know what to do with this chars, so I display
612 * them as '~?' for now
613 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200614 charbuf[0] = '~';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000615#ifdef EBCDIC
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200616 charbuf[1] = '?'; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000617#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200618 charbuf[1] = (c - 0x80) ^ 0x40; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000619#endif
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200620 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000621 }
622}
623
624 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100625transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000626{
627 int i = 0;
628
629 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630 if (c > 255)
631 {
632 buf[++i] = nr2hex((unsigned)c >> 12);
633 buf[++i] = nr2hex((unsigned)c >> 8);
634 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000635 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000636 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = '>';
638 buf[++i] = NUL;
639}
640
641/*
642 * Convert the lower 4 bits of byte "c" to its hex character.
643 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
644 * function key 1.
645 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000646 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100647nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648{
649 if ((c & 0xf) <= 9)
650 return (c & 0xf) + '0';
651 return (c & 0xf) - 10 + 'a';
652}
653
654/*
655 * Return number of display cells occupied by byte "b".
656 * Caller must make sure 0 <= b <= 255.
657 * For multi-byte mode "b" must be the first byte of a character.
658 * A TAB is counted as two cells: "^I".
659 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
660 * cells depends on further bytes.
661 */
662 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100663byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 if (enc_utf8 && b >= 0x80)
666 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100667 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000668}
669
670/*
671 * Return number of display cells occupied by character "c".
672 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
673 * A TAB is counted as two cells: "^I" or four: "<09>".
674 */
675 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100676char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677{
678 if (IS_SPECIAL(c))
679 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680 if (c >= 0x80)
681 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100682 // UTF-8: above 0x80 need to check the value
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683 if (enc_utf8)
684 return utf_char2cells(c);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100685 // DBCS: double-byte means double-width, except for euc-jp with first
686 // byte 0x8e
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687 if (enc_dbcs != 0 && c >= 0x100)
688 {
689 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
690 return 1;
691 return 2;
692 }
693 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100694 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695}
696
697/*
698 * Return number of display cells occupied by character at "*p".
699 * A TAB is counted as two cells: "^I" or four: "<09>".
700 */
701 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100702ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100704 // For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705 if (enc_utf8 && *p >= 0x80)
706 return utf_ptr2cells(p);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100707 // For DBCS we can tell the cell count from the first byte.
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100708 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000709}
710
711/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100712 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713 * counting TABs as two characters: "^I".
714 */
715 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100716vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717{
718 return vim_strnsize(s, (int)MAXCOL);
719}
720
721/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100722 * Return the number of character cells string "s[len]" will take on the
723 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724 */
725 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100726vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727{
728 int size = 0;
729
730 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 if (has_mbyte)
732 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000733 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734
735 size += ptr2cells(s);
736 s += l;
737 len -= l - 1;
738 }
739 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000740 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100741
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 return size;
743}
744
745/*
746 * Return the number of characters 'c' will take on the screen, taking
747 * into account the size of a tab.
748 * Use a define to make it fast, this is used very often!!!
749 * Also see getvcol() below.
750 */
751
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200752#ifdef FEAT_VARTABS
753# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100754 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200755 { \
756 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
757 } \
758 else \
759 return ptr2cells(p);
760#else
761# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100762 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763 { \
764 int ts; \
765 ts = (buf)->b_p_ts; \
766 return (int)(ts - (col % ts)); \
767 } \
768 else \
769 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000771
Bram Moolenaar071d4272004-06-13 20:20:40 +0000772 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100773chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774{
775 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
776}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777
778#ifdef FEAT_LINEBREAK
779 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100780win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000781{
782 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
783}
784#endif
785
786/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200787 * Return the number of characters the string 's' will take on the screen,
788 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789 */
790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
Bram Moolenaardc536092010-07-18 15:45:49 +0200793 return linetabsize_col(0, s);
794}
795
796/*
797 * Like linetabsize(), but starting at column "startcol".
798 */
799 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100800linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200801{
802 colnr_T col = startcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100803 char_u *line = s; // pointer to start of line, for breakindent
Bram Moolenaar071d4272004-06-13 20:20:40 +0000804
805 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200806 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 return (int)col;
808}
809
810/*
811 * Like linetabsize(), but for a given window instead of the current one.
812 */
813 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100814win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815{
816 colnr_T col = 0;
817 char_u *s;
818
Bram Moolenaar597a4222014-06-25 14:39:50 +0200819 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100820 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822 return (int)col;
823}
824
825/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000826 * Return TRUE if 'c' is a normal identifier character:
827 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828 */
829 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100830vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100832 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833}
834
835/*
Bram Moolenaare3d1f4c2021-04-06 20:21:59 +0200836 * Like vim_isIDc() but not using the 'isident' option: letters, numbers and
837 * underscore.
838 */
839 int
840vim_isNormalIDc(int c)
841{
842 return ASCII_ISALNUM(c) || c == '_';
843}
844
845/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100847 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000848 * For multi-byte characters mb_get_class() is used (builtin rules).
849 */
850 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100851vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000852{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100853 return vim_iswordc_buf(c, curbuf);
854}
855
856 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100857vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100858{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000859 if (c >= 0x100)
860 {
861 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000862 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000863 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100864 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100865 return FALSE;
866 }
867 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000868}
869
870/*
871 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
872 */
873 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100874vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100876 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877}
878
Bram Moolenaar071d4272004-06-13 20:20:40 +0000879 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100880vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000881{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100882 int c = *p;
883
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100884 if (has_mbyte && MB_BYTE2LEN(c) > 1)
885 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100886 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000888
889/*
890 * return TRUE if 'c' is a valid file-name character
891 * Assume characters above 0x100 are valid (multi-byte).
892 */
893 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100894vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000895{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100896 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000897}
898
899/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000900 * return TRUE if 'c' is a valid file-name character or a wildcard character
901 * Assume characters above 0x100 are valid (multi-byte).
902 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
903 * returns false.
904 */
905 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100906vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000907{
908 char_u buf[2];
909
910 buf[0] = (char_u)c;
911 buf[1] = NUL;
912 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
913}
914
915/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200916 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000917 * Assume characters above 0x100 are printable (multi-byte), except for
918 * Unicode.
919 */
920 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100921vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000922{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000923 if (enc_utf8 && c >= 0x100)
924 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100925 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000926}
927
928/*
929 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
930 * byte of a double-byte character.
931 */
932 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100933vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000935 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
936 return FALSE;
937 if (enc_utf8 && c >= 0x100)
938 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100939 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000940}
941
942/*
943 * like chartabsize(), but also check for line breaks on the screen
944 */
945 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100946lbr_chartabsize(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100947 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100948 unsigned char *s,
949 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000950{
951#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +0100952 if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL
953 && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000954 {
955#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000956 if (curwin->w_p_wrap)
957 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
959#ifdef FEAT_LINEBREAK
960 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200961 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000962#endif
963}
964
965/*
966 * Call lbr_chartabsize() and advance the pointer.
967 */
968 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100969lbr_chartabsize_adv(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100970 char_u *line, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100971 char_u **s,
972 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000973{
974 int retval;
975
Bram Moolenaar597a4222014-06-25 14:39:50 +0200976 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100977 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000978 return retval;
979}
980
981/*
982 * This function is used very often, keep it fast!!!!
983 *
984 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
985 * string at start of line. Warning: *headp is only set if it's a non-zero
986 * value, init to 0 before calling.
987 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000988 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100989win_lbr_chartabsize(
990 win_T *wp,
Bram Moolenaarc667da52019-11-30 20:52:27 +0100991 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100992 char_u *s,
993 colnr_T col,
994 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995{
996#ifdef FEAT_LINEBREAK
997 int c;
998 int size;
999 colnr_T col2;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001000 colnr_T col_adj = 0; // col + screen size of tab
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001 colnr_T colmax;
1002 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001003 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004 int numberextra;
1005 char_u *ps;
1006 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001007 int n;
Bram Moolenaaree857022019-11-09 23:26:40 +01001008 char_u *sbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001009
1010 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001011 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001012 */
Bram Moolenaaree857022019-11-09 23:26:40 +01001013 if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001014#endif
1015 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001016 if (wp->w_p_wrap)
1017 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1019 }
1020
1021#ifdef FEAT_LINEBREAK
1022 /*
1023 * First get normal size, without 'linebreak'
1024 */
1025 size = win_chartabsize(wp, s, col);
1026 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001027 if (tab_corr)
1028 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001029
1030 /*
1031 * If 'linebreak' set check at a blank before a non-blank if the line
1032 * needs a break here
1033 */
1034 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001035 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001036 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001037 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001038 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001039 {
1040 /*
1041 * Count all characters from first non-blank after a blank up to next
1042 * non-blank after a blank.
1043 */
1044 numberextra = win_col_off(wp);
1045 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001046 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001047 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001048 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001049 colmax += col_adj;
1050 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001051 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001052 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001053 }
1054
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055 for (;;)
1056 {
1057 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001058 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 c = *s;
1060 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001061 && (VIM_ISBREAK(c)
1062 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001063 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001064 break;
1065
1066 col2 += win_chartabsize(wp, s, col2);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001067 if (col2 >= colmax) // doesn't fit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001068 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001069 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 break;
1071 }
1072 }
1073 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001074 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1075 && wp->w_p_wrap && in_win_border(wp, col))
1076 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001077 ++size; // Count the ">" in the last column.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001078 mb_added = 1;
1079 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080
1081 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001082 * May have to add something for 'breakindent' and/or 'showbreak'
1083 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001084 * Set *headp to the size of what we add.
1085 */
1086 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001087 sbr = get_showbreak_value(wp);
1088 if ((*sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001089 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001090 colnr_T sbrlen = 0;
1091 int numberwidth = win_col_off(wp);
1092
1093 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001094 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001095 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001096 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001097 col -= wp->w_width;
1098 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001099 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001100 col %= numberextra;
Bram Moolenaaree857022019-11-09 23:26:40 +01001101 if (*sbr != NUL)
Bram Moolenaar1c852102014-10-15 21:26:40 +02001102 {
Bram Moolenaaree857022019-11-09 23:26:40 +01001103 sbrlen = (colnr_T)MB_CHARLEN(sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001104 if (col >= sbrlen)
1105 col -= sbrlen;
1106 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001107 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001108 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 else if (col > 0 && numberextra > 0)
1110 col += numberwidth - win_col_off2(wp);
1111
1112 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001113 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001114 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001115 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001116 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001117 if (*sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001118 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001119 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001120 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001121 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001122 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001123 int prev_width = col
1124 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001125
1126 if (width <= 0)
1127 width = (colnr_T)1;
Bram Moolenaaree857022019-11-09 23:26:40 +01001128 added += ((size - prev_width) / width) * vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001129 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001130 // wrapped, add another length of 'sbr'
Bram Moolenaaree857022019-11-09 23:26:40 +01001131 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001132 }
1133 else
Bram Moolenaaree857022019-11-09 23:26:40 +01001134 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001135 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001136 if (wp->w_p_bri)
1137 added += get_breakindent_win(wp, line);
1138
Bram Moolenaar95765082014-08-24 21:19:25 +02001139 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001140 if (col != 0)
1141 added = 0;
1142 }
1143 }
1144 if (headp != NULL)
1145 *headp = added + mb_added;
1146 return size;
1147#endif
1148}
1149
Bram Moolenaar071d4272004-06-13 20:20:40 +00001150/*
1151 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1152 * 'wrap' is on. This means we need to check for a double-byte character that
1153 * doesn't fit at the end of the screen line.
1154 */
1155 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001156win_nolbr_chartabsize(
1157 win_T *wp,
1158 char_u *s,
1159 colnr_T col,
1160 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001161{
1162 int n;
1163
Bram Moolenaareed9d462021-02-15 20:38:25 +01001164 if (*s == TAB && (!wp->w_p_list || wp->w_lcs_chars.tab1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001166# ifdef FEAT_VARTABS
1167 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1168 wp->w_buffer->b_p_vts_array);
1169# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001170 n = wp->w_buffer->b_p_ts;
1171 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001172# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001173 }
1174 n = ptr2cells(s);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001175 // Add one cell for a double-width character in the last column of the
1176 // window, displayed with a ">".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1178 {
1179 if (headp != NULL)
1180 *headp = 1;
1181 return 3;
1182 }
1183 return n;
1184}
1185
1186/*
1187 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1188 * "wp".
1189 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001190 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001191in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192{
Bram Moolenaarc667da52019-11-30 20:52:27 +01001193 int width1; // width of first line (after line number)
1194 int width2; // width of further lines
Bram Moolenaar071d4272004-06-13 20:20:40 +00001195
Bram Moolenaarc667da52019-11-30 20:52:27 +01001196 if (wp->w_width == 0) // there is no border
Bram Moolenaar071d4272004-06-13 20:20:40 +00001197 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001198 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001199 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001200 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001201 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001202 return TRUE;
1203 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001204 if (width2 <= 0)
1205 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206 return ((vcol - width1) % width2 == width2 - 1);
1207}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001208
1209/*
1210 * Get virtual column number of pos.
1211 * start: on the first position of this character (TAB, ctrl)
1212 * cursor: where the cursor is on this character (first char, except for TAB)
1213 * end: on the last position of this character (TAB, ctrl)
1214 *
1215 * This is used very often, keep it fast!
1216 */
1217 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001218getvcol(
1219 win_T *wp,
1220 pos_T *pos,
1221 colnr_T *start,
1222 colnr_T *cursor,
1223 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001224{
1225 colnr_T vcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001226 char_u *ptr; // points to current char
1227 char_u *posptr; // points to char at pos->col
1228 char_u *line; // start of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001229 int incr;
1230 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001231#ifdef FEAT_VARTABS
1232 int *vts = wp->w_buffer->b_p_vts_array;
1233#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001234 int ts = wp->w_buffer->b_p_ts;
1235 int c;
1236
1237 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001238 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001239 if (pos->col == MAXCOL)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001240 posptr = NULL; // continue until the NUL
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001241 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001242 {
Bram Moolenaar94f31922021-12-30 15:29:18 +00001243 colnr_T i;
1244
1245 // In a few cases the position can be beyond the end of the line.
1246 for (i = 0; i < pos->col; ++i)
1247 if (ptr[i] == NUL)
1248 {
1249 pos->col = i;
1250 break;
1251 }
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001252 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001253 if (has_mbyte)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001254 // always start on the first byte
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001255 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001256 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001257
1258 /*
1259 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001260 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1261 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262 * Also use this when 'list' is set but tabs take their normal size.
1263 */
Bram Moolenaareed9d462021-02-15 20:38:25 +01001264 if ((!wp->w_p_list || wp->w_lcs_chars.tab1 != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001265#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +01001266 && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267#endif
1268 )
1269 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 for (;;)
1271 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001272 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273 c = *ptr;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001274 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001275 if (c == NUL)
1276 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001277 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001278 break;
1279 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001280 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001281 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001282#ifdef FEAT_VARTABS
1283 incr = tabstop_padding(vcol, ts, vts);
1284#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001285 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001286#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001287 else
1288 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001289 if (has_mbyte)
1290 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001291 // For utf-8, if the byte is >= 0x80, need to look at
1292 // further bytes to find the cell width.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001293 if (enc_utf8 && c >= 0x80)
1294 incr = utf_ptr2cells(ptr);
1295 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001296 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001297
Bram Moolenaarc667da52019-11-30 20:52:27 +01001298 // If a double-cell char doesn't fit at the end of a line
1299 // it wraps to the next line, it's like this char is three
1300 // cells wide.
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001301 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1302 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001303 {
1304 ++incr;
1305 head = 1;
1306 }
1307 }
1308 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001309 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310 }
1311
Bram Moolenaarc667da52019-11-30 20:52:27 +01001312 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001313 break;
1314
1315 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001316 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001317 }
1318 }
1319 else
1320 {
1321 for (;;)
1322 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001323 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001324 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001325 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001326 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001327 if (*ptr == NUL)
1328 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001329 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 break;
1331 }
1332
Bram Moolenaarc667da52019-11-30 20:52:27 +01001333 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 break;
1335
1336 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001337 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001338 }
1339 }
1340 if (start != NULL)
1341 *start = vcol + head;
1342 if (end != NULL)
1343 *end = vcol + incr - 1;
1344 if (cursor != NULL)
1345 {
1346 if (*ptr == TAB
1347 && (State & NORMAL)
1348 && !wp->w_p_list
1349 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001350 && !(VIsual_active
1351 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001352 )
Bram Moolenaarc667da52019-11-30 20:52:27 +01001353 *cursor = vcol + incr - 1; // cursor at end
Bram Moolenaar071d4272004-06-13 20:20:40 +00001354 else
Bram Moolenaarc667da52019-11-30 20:52:27 +01001355 *cursor = vcol + head; // cursor at start
Bram Moolenaar071d4272004-06-13 20:20:40 +00001356 }
1357}
1358
1359/*
1360 * Get virtual cursor column in the current window, pretending 'list' is off.
1361 */
1362 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001363getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001364{
1365 int list_save = curwin->w_p_list;
1366 colnr_T vcol;
1367
1368 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001369 if (posp->coladd)
1370 getvvcol(curwin, posp, NULL, &vcol, NULL);
1371 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001372 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001373 curwin->w_p_list = list_save;
1374 return vcol;
1375}
1376
Bram Moolenaar071d4272004-06-13 20:20:40 +00001377/*
1378 * Get virtual column in virtual mode.
1379 */
1380 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001381getvvcol(
1382 win_T *wp,
1383 pos_T *pos,
1384 colnr_T *start,
1385 colnr_T *cursor,
1386 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387{
1388 colnr_T col;
1389 colnr_T coladd;
1390 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001391 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001392
1393 if (virtual_active())
1394 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001395 // For virtual mode, only want one value
Bram Moolenaar071d4272004-06-13 20:20:40 +00001396 getvcol(wp, pos, &col, NULL, NULL);
1397
1398 coladd = pos->coladd;
1399 endadd = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001400 // Cannot put the cursor on part of a wide character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001401 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001402 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001403 {
1404 int c = (*mb_ptr2char)(ptr + pos->col);
1405
1406 if (c != TAB && vim_isprintc(c))
1407 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001408 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001409 if (coladd > endadd) // past end of line
Bram Moolenaara5792f52005-11-23 21:25:05 +00001410 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001411 else
1412 coladd = 0;
1413 }
1414 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001415 col += coladd;
1416 if (start != NULL)
1417 *start = col;
1418 if (cursor != NULL)
1419 *cursor = col;
1420 if (end != NULL)
1421 *end = col + endadd;
1422 }
1423 else
1424 getvcol(wp, pos, start, cursor, end);
1425}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001426
Bram Moolenaar071d4272004-06-13 20:20:40 +00001427/*
1428 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1429 * Used for Visual block mode.
1430 */
1431 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001432getvcols(
1433 win_T *wp,
1434 pos_T *pos1,
1435 pos_T *pos2,
1436 colnr_T *left,
1437 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001438{
1439 colnr_T from1, from2, to1, to2;
1440
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001441 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001442 {
1443 getvvcol(wp, pos1, &from1, NULL, &to1);
1444 getvvcol(wp, pos2, &from2, NULL, &to2);
1445 }
1446 else
1447 {
1448 getvvcol(wp, pos2, &from1, NULL, &to1);
1449 getvvcol(wp, pos1, &from2, NULL, &to2);
1450 }
1451 if (from2 < from1)
1452 *left = from2;
1453 else
1454 *left = from1;
1455 if (to2 > to1)
1456 {
1457 if (*p_sel == 'e' && from2 - 1 >= to1)
1458 *right = from2 - 1;
1459 else
1460 *right = to2;
1461 }
1462 else
1463 *right = to1;
1464}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001465
1466/*
Bram Moolenaarce7eada2021-12-15 15:41:44 +00001467 * Skip over ' ' and '\t'.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001468 */
1469 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001470skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001471{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001472 char_u *p = q;
1473
Bram Moolenaarce7eada2021-12-15 15:41:44 +00001474 while (VIM_ISWHITE(*p))
1475 ++p;
1476 return p;
1477}
1478
Dominique Pelle748b3082022-01-08 12:41:16 +00001479#if defined(FEAT_EVAL) || defined(PROTO)
Bram Moolenaarce7eada2021-12-15 15:41:44 +00001480/*
1481 * skip over ' ', '\t' and '\n'.
1482 */
1483 char_u *
1484skipwhite_and_nl(char_u *q)
1485{
1486 char_u *p = q;
1487
1488 while (VIM_ISWHITE(*p) || *p == NL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001489 ++p;
1490 return p;
1491}
Dominique Pelle748b3082022-01-08 12:41:16 +00001492#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001493
1494/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001495 * getwhitecols: return the number of whitespace
1496 * columns (bytes) at the start of a given line
1497 */
1498 int
1499getwhitecols_curline()
1500{
1501 return getwhitecols(ml_get_curline());
1502}
1503
1504 int
1505getwhitecols(char_u *p)
1506{
1507 return skipwhite(p) - p;
1508}
1509
1510/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001511 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001512 */
1513 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001514skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001515{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001516 char_u *p = q;
1517
Bram Moolenaarc667da52019-11-30 20:52:27 +01001518 while (VIM_ISDIGIT(*p)) // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001519 ++p;
1520 return p;
1521}
1522
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001523#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001524/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001525 * skip over binary digits
1526 */
1527 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001528skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001529{
1530 char_u *p = q;
1531
Bram Moolenaarc667da52019-11-30 20:52:27 +01001532 while (vim_isbdigit(*p)) // skip to next non-digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001533 ++p;
1534 return p;
1535}
1536
1537/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001538 * skip over digits and hex characters
1539 */
1540 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001541skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001542{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001543 char_u *p = q;
1544
Bram Moolenaarc667da52019-11-30 20:52:27 +01001545 while (vim_isxdigit(*p)) // skip to next non-digit
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001546 ++p;
1547 return p;
1548}
1549#endif
1550
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001551/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001552 * skip to bin digit (or NUL after the string)
1553 */
1554 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001555skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001556{
1557 char_u *p = q;
1558
Bram Moolenaarc667da52019-11-30 20:52:27 +01001559 while (*p != NUL && !vim_isbdigit(*p)) // skip to next digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001560 ++p;
1561 return p;
1562}
1563
1564/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001565 * skip to digit (or NUL after the string)
1566 */
1567 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001568skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001569{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001570 char_u *p = q;
1571
Bram Moolenaarc667da52019-11-30 20:52:27 +01001572 while (*p != NUL && !VIM_ISDIGIT(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001573 ++p;
1574 return p;
1575}
1576
1577/*
1578 * skip to hex character (or NUL after the string)
1579 */
1580 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001581skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001582{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001583 char_u *p = q;
1584
Bram Moolenaarc667da52019-11-30 20:52:27 +01001585 while (*p != NUL && !vim_isxdigit(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001586 ++p;
1587 return p;
1588}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001589
Bram Moolenaar071d4272004-06-13 20:20:40 +00001590/*
1591 * Variant of isdigit() that can handle characters > 0x100.
1592 * We don't use isdigit() here, because on some systems it also considers
1593 * superscript 1 to be a digit.
1594 * Use the VIM_ISDIGIT() macro for simple arguments.
1595 */
1596 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001597vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001598{
1599 return (c >= '0' && c <= '9');
1600}
1601
1602/*
1603 * Variant of isxdigit() that can handle characters > 0x100.
1604 * We don't use isxdigit() here, because on some systems it also considers
1605 * superscript 1 to be a digit.
1606 */
1607 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001608vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609{
1610 return (c >= '0' && c <= '9')
1611 || (c >= 'a' && c <= 'f')
1612 || (c >= 'A' && c <= 'F');
1613}
1614
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001615/*
1616 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1617 * characters > 0x100.
1618 */
1619 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001620vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001621{
1622 return (c == '0' || c == '1');
1623}
1624
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001625 static int
1626vim_isodigit(int c)
1627{
1628 return (c >= '0' && c <= '7');
1629}
1630
Bram Moolenaar78622822005-08-23 21:00:13 +00001631/*
1632 * Vim's own character class functions. These exist because many library
1633 * islower()/toupper() etc. do not work properly: they crash when used with
1634 * invalid values or can't handle latin1 when the locale is C.
1635 * Speed is most important here.
1636 */
1637#define LATIN1LOWER 'l'
1638#define LATIN1UPPER 'U'
1639
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001640static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001641static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1642static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001643
1644 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001645vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001646{
1647 if (c <= '@')
1648 return FALSE;
1649 if (c >= 0x80)
1650 {
1651 if (enc_utf8)
1652 return utf_islower(c);
1653 if (c >= 0x100)
1654 {
1655#ifdef HAVE_ISWLOWER
1656 if (has_mbyte)
1657 return iswlower(c);
1658#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001659 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001660 return FALSE;
1661 }
1662 if (enc_latin1like)
1663 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1664 }
1665 return islower(c);
1666}
1667
1668 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001669vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001670{
1671 if (c <= '@')
1672 return FALSE;
1673 if (c >= 0x80)
1674 {
1675 if (enc_utf8)
1676 return utf_isupper(c);
1677 if (c >= 0x100)
1678 {
1679#ifdef HAVE_ISWUPPER
1680 if (has_mbyte)
1681 return iswupper(c);
1682#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001683 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001684 return FALSE;
1685 }
1686 if (enc_latin1like)
1687 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1688 }
1689 return isupper(c);
1690}
1691
1692 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001693vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001694{
1695 if (c <= '@')
1696 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001697 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001698 {
1699 if (enc_utf8)
1700 return utf_toupper(c);
1701 if (c >= 0x100)
1702 {
1703#ifdef HAVE_TOWUPPER
1704 if (has_mbyte)
1705 return towupper(c);
1706#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001707 // toupper() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001708 return c;
1709 }
1710 if (enc_latin1like)
1711 return latin1upper[c];
1712 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001713 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1714 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001715 return TOUPPER_LOC(c);
1716}
1717
1718 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001719vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001720{
1721 if (c <= '@')
1722 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001723 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001724 {
1725 if (enc_utf8)
1726 return utf_tolower(c);
1727 if (c >= 0x100)
1728 {
1729#ifdef HAVE_TOWLOWER
1730 if (has_mbyte)
1731 return towlower(c);
1732#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001733 // tolower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001734 return c;
1735 }
1736 if (enc_latin1like)
1737 return latin1lower[c];
1738 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001739 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1740 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001741 return TOLOWER_LOC(c);
1742}
Bram Moolenaar78622822005-08-23 21:00:13 +00001743
Bram Moolenaar071d4272004-06-13 20:20:40 +00001744/*
1745 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1746 */
1747 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001748skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001749{
1750 while (*p != ' ' && *p != '\t' && *p != NUL)
1751 ++p;
1752 return p;
1753}
1754
Bram Moolenaar071d4272004-06-13 20:20:40 +00001755/*
1756 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1757 */
1758 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001759skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001760{
1761 while (*p != ' ' && *p != '\t' && *p != NUL)
1762 {
1763 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1764 ++p;
1765 ++p;
1766 }
1767 return p;
1768}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001769
1770/*
Bram Moolenaaraf377e32021-11-29 12:12:43 +00001771 * Get a number from a string and skip over it.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001772 * Note: the argument is a pointer to a char_u pointer!
1773 */
1774 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001775getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001776{
1777 char_u *p;
1778 long retval;
1779
1780 p = *pp;
1781 retval = atol((char *)p);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001782 if (*p == '-') // skip negative sign
Bram Moolenaar071d4272004-06-13 20:20:40 +00001783 ++p;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001784 p = skipdigits(p); // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001785 *pp = p;
1786 return retval;
1787}
1788
1789/*
Bram Moolenaaraf377e32021-11-29 12:12:43 +00001790 * Like getdigits() but allow for embedded single quotes.
1791 */
1792 long
1793getdigits_quoted(char_u **pp)
1794{
1795 char_u *p = *pp;
1796 long retval = 0;
1797
1798 if (*p == '-')
1799 ++p;
1800 while (VIM_ISDIGIT(*p))
1801 {
1802 if (retval >= LONG_MAX / 10 - 10)
1803 retval = LONG_MAX;
1804 else
1805 retval = retval * 10 - '0' + *p;
1806 ++p;
1807 if (in_vim9script() && *p == '\'' && VIM_ISDIGIT(p[1]))
1808 ++p;
1809 }
1810 if (**pp == '-')
1811 {
1812 if (retval == LONG_MAX)
1813 retval = LONG_MIN;
1814 else
1815 retval = -retval;
1816 }
1817 *pp = p;
1818 return retval;
1819}
1820
1821/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001822 * Return TRUE if "lbuf" is empty or only contains blanks.
1823 */
1824 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001825vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001826{
1827 char_u *p;
1828
1829 p = skipwhite(lbuf);
1830 return (*p == NUL || *p == '\r' || *p == '\n');
1831}
1832
1833/*
1834 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001835 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1836 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001837 * 0 decimal
1838 * '0' octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001839 * 'O' octal
1840 * 'o' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001841 * 'B' bin
1842 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001843 * 'X' hex
1844 * 'x' hex
1845 * If "len" is not NULL, the length of the number in characters is returned.
1846 * If "nptr" is not NULL, the signed result is returned in it.
1847 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001848 * If "what" contains STR2NR_BIN recognize binary numbers
1849 * If "what" contains STR2NR_OCT recognize octal numbers
1850 * If "what" contains STR2NR_HEX recognize hex numbers
1851 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001852 * If "what" contains STR2NR_QUOTE ignore embedded single quotes
Bram Moolenaarce157752017-10-28 16:07:33 +02001853 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001854 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001855 */
1856 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001857vim_str2nr(
1858 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001859 int *prep, // return: type of number 0 = decimal, 'x'
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001860 // or 'X' is hex, '0', 'o' or 'O' is octal,
1861 // 'b' or 'B' is bin
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001862 int *len, // return: detected length of number
1863 int what, // what numbers to recognize
1864 varnumber_T *nptr, // return: signed result
1865 uvarnumber_T *unptr, // return: unsigned result
1866 int maxlen, // max length of string to check
1867 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001868{
1869 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001870 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001871 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001872 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001873 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001874
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001875 if (len != NULL)
1876 *len = 0;
1877
Bram Moolenaar071d4272004-06-13 20:20:40 +00001878 if (ptr[0] == '-')
1879 {
1880 negative = TRUE;
1881 ++ptr;
1882 }
1883
Bram Moolenaarc667da52019-11-30 20:52:27 +01001884 // Recognize hex, octal, and bin.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001885 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1886 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001887 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001888 pre = ptr[1];
1889 if ((what & STR2NR_HEX)
1890 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1891 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001892 // hexadecimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001893 ptr += 2;
1894 else if ((what & STR2NR_BIN)
1895 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1896 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001897 // binary
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001898 ptr += 2;
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001899 else if ((what & STR2NR_OOCT)
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001900 && (pre == 'O' || pre == 'o') && vim_isodigit(ptr[2])
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001901 && (maxlen == 0 || maxlen > 2))
1902 // octal with prefix "0o"
1903 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001904 else
1905 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001906 // decimal or octal, default is decimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001907 pre = 0;
1908 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001909 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001910 // Don't interpret "0", "08" or "0129" as octal.
Bram Moolenaarce157752017-10-28 16:07:33 +02001911 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001912 {
1913 if (ptr[n] > '7')
1914 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001915 pre = 0; // can't be octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001916 break;
1917 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001918 pre = '0'; // assume octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001919 }
1920 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001921 }
1922 }
1923
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001924 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001925 n = 1;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001926 if (pre == 'B' || pre == 'b'
1927 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001928 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001929 // bin
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001930 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001931 n += 2; // skip over "0b"
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001932 while ('0' <= *ptr && *ptr <= '1')
1933 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001934 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001935 if (un <= UVARNUM_MAX / 2)
1936 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001937 else
1938 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001939 ++ptr;
1940 if (n++ == maxlen)
1941 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001942 if ((what & STR2NR_QUOTE) && *ptr == '\''
1943 && '0' <= ptr[1] && ptr[1] <= '1')
1944 {
1945 ++ptr;
1946 if (n++ == maxlen)
1947 break;
1948 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001949 }
1950 }
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001951 else if (pre == 'O' || pre == 'o' ||
1952 pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001953 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001954 // octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001955 if (pre != 0 && pre != '0')
1956 n += 2; // skip over "0o"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001957 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001958 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001959 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001960 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001961 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1962 else
1963 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001964 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001965 if (n++ == maxlen)
1966 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001967 if ((what & STR2NR_QUOTE) && *ptr == '\''
1968 && '0' <= ptr[1] && ptr[1] <= '7')
1969 {
1970 ++ptr;
1971 if (n++ == maxlen)
1972 break;
1973 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001974 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001975 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001976 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001977 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001978 // hex
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001979 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001980 n += 2; // skip over "0x"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001981 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001982 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001983 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001984 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001985 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1986 else
1987 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001988 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001989 if (n++ == maxlen)
1990 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001991 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1992 {
1993 ++ptr;
1994 if (n++ == maxlen)
1995 break;
1996 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001997 }
1998 }
1999 else
2000 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01002001 // decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00002002 while (VIM_ISDIGIT(*ptr))
2003 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02002004 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
2005
Bram Moolenaarc667da52019-11-30 20:52:27 +01002006 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02002007 if (un < UVARNUM_MAX / 10
2008 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
2009 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002010 else
2011 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002012 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02002013 if (n++ == maxlen)
2014 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02002015 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
2016 {
2017 ++ptr;
2018 if (n++ == maxlen)
2019 break;
2020 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002021 }
2022 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02002023
Bram Moolenaar4b96df52020-01-26 22:00:26 +01002024 // Check for an alphanumeric character immediately following, that is
Bram Moolenaar16e9b852019-05-19 19:59:35 +02002025 // most likely a typo.
2026 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
2027 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002028
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01002029 if (prep != NULL)
2030 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002031 if (len != NULL)
2032 *len = (int)(ptr - start);
2033 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002034 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01002035 if (negative) // account for leading '-' for decimal numbers
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002036 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01002037 // avoid ubsan error for overflow
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002038 if (un > VARNUM_MAX)
2039 *nptr = VARNUM_MIN;
2040 else
2041 *nptr = -(varnumber_T)un;
2042 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002043 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002044 {
2045 if (un > VARNUM_MAX)
2046 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02002047 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002048 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002049 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002050 if (unptr != NULL)
2051 *unptr = un;
2052}
2053
2054/*
2055 * Return the value of a single hex character.
2056 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2057 */
2058 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002059hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002060{
2061 if (c >= 'a' && c <= 'f')
2062 return c - 'a' + 10;
2063 if (c >= 'A' && c <= 'F')
2064 return c - 'A' + 10;
2065 return c - '0';
2066}
2067
Bram Moolenaar071d4272004-06-13 20:20:40 +00002068/*
2069 * Convert two hex characters to a byte.
2070 * Return -1 if one of the characters is not hex.
2071 */
2072 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002073hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002074{
2075 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2076 return -1;
2077 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2078}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002079
2080/*
2081 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002082 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00002083 * backslash is not a normal file name character.
2084 * '$' is a valid file name character, we don't remove the backslash before
2085 * it. This means it is not possible to use an environment variable after a
2086 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2087 * Although "\ name" is valid, the backslash in "Program\ files" must be
2088 * removed. Assume a file name doesn't start with a space.
2089 * For multi-byte names, never remove a backslash before a non-ascii
2090 * character, assume that all multi-byte characters are valid file name
2091 * characters.
2092 */
2093 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002094rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002095{
2096#ifdef BACKSLASH_IN_FILENAME
2097 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002098 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002099 && (str[1] == ' '
2100 || (str[1] != NUL
2101 && str[1] != '*'
2102 && str[1] != '?'
2103 && !vim_isfilec(str[1]))));
2104#else
2105 return (str[0] == '\\' && str[1] != NUL);
2106#endif
2107}
2108
2109/*
2110 * Halve the number of backslashes in a file name argument.
2111 * For MS-DOS we only do this if the character after the backslash
2112 * is not a normal file character.
2113 */
2114 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002115backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002116{
2117 for ( ; *p; ++p)
2118 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002119 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002120}
2121
2122/*
2123 * backslash_halve() plus save the result in allocated memory.
Bram Moolenaare2c453d2019-08-21 14:37:09 +02002124 * However, returns "p" when out of memory.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002125 */
2126 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002127backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002128{
2129 char_u *res;
2130
2131 res = vim_strsave(p);
2132 if (res == NULL)
2133 return p;
2134 backslash_halve(res);
2135 return res;
2136}
2137
2138#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2139/*
2140 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2141 * The first 64 entries have been added to map control characters defined in
2142 * ascii.h
2143 */
2144static char_u ebcdic2ascii_tab[256] =
2145{
2146 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2147 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2148 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2149 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2150 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2151 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2152 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2153 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2154 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2155 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2156 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2157 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2158 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2159 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2160 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2161 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2162 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2163 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2164 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2165 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2166 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2167 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2168 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2169 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2170 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2171 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2172 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2173 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2174 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2175 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2176 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2177 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2178};
2179
2180/*
2181 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2182 * wanting 7-bit ASCII characters out the other end.
2183 */
2184 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002185ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002186{
2187 int i;
2188
2189 for (i = 0; i < len; i++)
2190 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2191}
2192#endif