blob: 31b03eb38a15a342023f36f2b9037458a9d734ab [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
Bram Moolenaarc667da52019-11-30 20:52:27 +010013# include <wchar.h> // for towupper() and towlower()
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
Bram Moolenaarc667da52019-11-30 20:52:27 +010021// b_chartab[] is an array of 32 bytes, each bit representing one of the
22// characters 0-255.
Bram Moolenaar071d4272004-06-13 20:20:40 +000023#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaarc667da52019-11-30 20:52:27 +010027// table used below, see init_chartab() for an explanation
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010028static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
Bram Moolenaarc667da52019-11-30 20:52:27 +010033#define CT_CELL_MASK 0x07 // mask: nr of display cells (1, 2 or 4)
34#define CT_PRINT_CHAR 0x10 // flag: set for printable chars
35#define CT_ID_CHAR 0x20 // flag: set for ID chars
36#define CT_FNAME_CHAR 0x40 // flag: set for file name chars
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010037
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
Bram Moolenaarc667da52019-11-30 20:52:27 +010075 int global) // FALSE: only set buf->b_chartab[]
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100105 // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100108 // euc-jp characters starting with 0x8e are single width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100111 // other double-byte chars can be printable AND double-width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100115 // the rest is unprintable by default
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaarc667da52019-11-30 20:52:27 +0100119 // Assume that every multi-byte char is a filename character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
Bram Moolenaara80faa82020-04-12 19:37:17 +0200130 CLEAR_FIELD(buf->b_chartab);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100134 // double-byte characters are probably word characters
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
Bram Moolenaarc667da52019-11-30 20:52:27 +0100147 // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 // options Each option is a list of characters, character numbers or
149 // ranges, separated by commas, e.g.: "200-210,x,#-178,-"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000150 for (i = global ? 0 : 3; i <= 3; ++i)
151 {
152 if (i == 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100153 p = p_isi; // first round: 'isident'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154 else if (i == 1)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100155 p = p_isp; // second round: 'isprint'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000156 else if (i == 2)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100157 p = p_isf; // third round: 'isfname'
158 else // i == 3
159 p = buf->b_p_isk; // fourth round: 'iskeyword'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000160
161 while (*p)
162 {
163 tilde = FALSE;
164 do_isalpha = FALSE;
165 if (*p == '^' && p[1] != NUL)
166 {
167 tilde = TRUE;
168 ++p;
169 }
170 if (VIM_ISDIGIT(*p))
171 c = getdigits(&p);
Dominique Pelle4781d6f2021-05-18 21:46:31 +0200172 else if (has_mbyte)
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000173 c = mb_ptr2char_adv(&p);
174 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000175 c = *p++;
176 c2 = -1;
177 if (*p == '-' && p[1] != NUL)
178 {
179 ++p;
180 if (VIM_ISDIGIT(*p))
181 c2 = getdigits(&p);
Dominique Pelle4781d6f2021-05-18 21:46:31 +0200182 else if (has_mbyte)
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000183 c2 = mb_ptr2char_adv(&p);
184 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185 c2 = *p++;
186 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000187 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
Bram Moolenaarc667da52019-11-30 20:52:27 +0100191 if (c2 == -1) // not a range
Bram Moolenaar071d4272004-06-13 20:20:40 +0000192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100210 // Use the MB_ functions here, because isalpha() doesn't
211 // work properly when 'encoding' is "latin1" and the locale is
212 // "C".
Bram Moolenaar14184a32019-02-16 15:10:30 +0100213 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000214 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100215 if (i == 0) // (re)set ID flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000216 {
217 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100218 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000219 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100220 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000221 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100222 else if (i == 1) // (re)set printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000223 {
224 if ((c < ' '
225#ifndef EBCDIC
226 || c > '~'
227#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100228 // For double-byte we keep the cell width, so
229 // that we can detect it from the first byte.
230 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000231 {
232 if (tilde)
233 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100234 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000235 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100236 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000237 }
238 else
239 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100240 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
241 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000242 }
243 }
244 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100245 else if (i == 2) // (re)set fname flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000246 {
247 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100248 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100252 else // i == 3 (re)set keyword flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 {
254 if (tilde)
255 RESET_CHARTAB(buf, c);
256 else
257 SET_CHARTAB(buf, c);
258 }
259 }
260 ++c;
261 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100262
263 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000264 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100265 if (c == ',' && *p == NUL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100266 // Trailing comma is not allowed.
Bram Moolenaar309379f2013-02-06 16:26:26 +0100267 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268 }
269 }
270 chartab_initialized = TRUE;
271 return OK;
272}
273
274/*
275 * Translate any special characters in buf[bufsize] in-place.
276 * The result is a string with only printable characters, but if there is not
277 * enough room, not all characters will be translated.
278 */
279 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100280trans_characters(
281 char_u *buf,
282 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100284 int len; // length of string needing translation
285 int room; // room in buffer after string
286 char_u *trs; // translated character
287 int trs_len; // length of trs[]
Bram Moolenaar071d4272004-06-13 20:20:40 +0000288
289 len = (int)STRLEN(buf);
290 room = bufsize - len;
291 while (*buf != 0)
292 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100293 // Assume a multi-byte character doesn't need translation.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000294 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000295 len -= trs_len;
296 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297 {
298 trs = transchar_byte(*buf);
299 trs_len = (int)STRLEN(trs);
300 if (trs_len > 1)
301 {
302 room -= trs_len - 1;
303 if (room <= 0)
304 return;
305 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
306 }
307 mch_memmove(buf, trs, (size_t)trs_len);
308 --len;
309 }
310 buf += trs_len;
311 }
312}
313
Bram Moolenaar071d4272004-06-13 20:20:40 +0000314/*
315 * Translate a string into allocated memory, replacing special chars with
316 * printable chars. Returns NULL when out of memory.
317 */
318 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100319transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320{
321 char_u *res;
322 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323 int l, len, c;
324 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 if (has_mbyte)
327 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100328 // Compute the length of the result, taking account of unprintable
329 // multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000330 len = 0;
331 p = s;
332 while (*p != NUL)
333 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000334 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000335 {
336 c = (*mb_ptr2char)(p);
337 p += l;
338 if (vim_isprintc(c))
339 len += l;
340 else
341 {
342 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000343 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344 }
345 }
346 else
347 {
348 l = byte2cells(*p++);
349 if (l > 0)
350 len += l;
351 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100352 len += 4; // illegal byte sequence
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353 }
354 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200355 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356 }
357 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200358 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 if (res != NULL)
360 {
361 *res = NUL;
362 p = s;
363 while (*p != NUL)
364 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000365 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000366 {
367 c = (*mb_ptr2char)(p);
368 if (vim_isprintc(c))
Bram Moolenaarc667da52019-11-30 20:52:27 +0100369 STRNCAT(res, p, l); // append printable multi-byte char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370 else
371 transchar_hex(res + STRLEN(res), c);
372 p += l;
373 }
374 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000375 STRCAT(res, transchar_byte(*p++));
376 }
377 }
378 return res;
379}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380
Bram Moolenaar071d4272004-06-13 20:20:40 +0000381/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000382 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
383 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000384 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
385 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000386 */
387 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100388str_foldcase(
389 char_u *str,
390 int orglen,
391 char_u *buf,
392 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393{
394 garray_T ga;
395 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000396 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000397
398#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
399#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000400#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
401#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402
Bram Moolenaarc667da52019-11-30 20:52:27 +0100403 // Copy "str" into "buf" or allocated memory, unmodified.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 if (buf == NULL)
405 {
406 ga_init2(&ga, 1, 10);
407 if (ga_grow(&ga, len + 1) == FAIL)
408 return NULL;
409 mch_memmove(ga.ga_data, str, (size_t)len);
410 ga.ga_len = len;
411 }
412 else
413 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100414 if (len >= buflen) // Ugly!
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000415 len = buflen - 1;
416 mch_memmove(buf, str, (size_t)len);
417 }
418 if (buf == NULL)
419 GA_CHAR(len) = NUL;
420 else
421 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaarc667da52019-11-30 20:52:27 +0100423 // Make each character lower case.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000425 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000427 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428 {
429 if (enc_utf8)
430 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000431 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100432 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000433 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434
Bram Moolenaarc667da52019-11-30 20:52:27 +0100435 // Only replace the character when it is not an invalid
436 // sequence (ASCII character or more than one byte) and
437 // utf_tolower() doesn't return the original character.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100438 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000439 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100440 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441
Bram Moolenaarc667da52019-11-30 20:52:27 +0100442 // If the byte length changes need to shift the following
443 // characters forward or backward.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100444 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100446 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000447 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100448 if (buf == NULL
449 ? ga_grow(&ga, nlen - olen + 1) == FAIL
450 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100452 // out of memory, keep old char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100454 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000456 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100457 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000459 if (buf == NULL)
460 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100461 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
462 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000463 }
464 else
465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 STRMOVE(buf + i + nlen, buf + i + olen);
467 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000468 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000469 }
470 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000471 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000472 }
473 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100474 // skip to next multi-byte char
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000475 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000476 }
477 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000479 if (buf == NULL)
480 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
481 else
482 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000483 ++i;
484 }
485 }
486
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000487 if (buf == NULL)
488 return (char_u *)ga.ga_data;
489 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000491
492/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100493 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100495 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 * Does NOT work for multi-byte characters, c must be <= 255.
497 * Also doesn't work for the first byte of a multi-byte, "c" must be a
498 * character!
499 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200500static char_u transchar_charbuf[7];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000501
502 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100503transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504{
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200505 return transchar_buf(curbuf, c);
506}
507
508 char_u *
509transchar_buf(buf_T *buf, int c)
510{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511 int i;
512
513 i = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100514 if (IS_SPECIAL(c)) // special key code, display as ~@ char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200516 transchar_charbuf[0] = '~';
517 transchar_charbuf[1] = '@';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000518 i = 2;
519 c = K_SECOND(c);
520 }
521
522 if ((!chartab_initialized && (
523#ifdef EBCDIC
524 (c >= 64 && c < 255)
525#else
526 (c >= ' ' && c <= '~')
527#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000528 )) || (c < 256 && vim_isprintc_strict(c)))
529 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100530 // printable character
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200531 transchar_charbuf[i] = c;
532 transchar_charbuf[i + 1] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000533 }
534 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200535 transchar_nonprint(buf, transchar_charbuf + i, c);
536 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000537}
538
Bram Moolenaar071d4272004-06-13 20:20:40 +0000539/*
540 * Like transchar(), but called with a byte instead of a character. Checks
541 * for an illegal UTF-8 byte.
542 */
543 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100544transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545{
546 if (enc_utf8 && c >= 0x80)
547 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200548 transchar_nonprint(curbuf, transchar_charbuf, c);
549 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550 }
551 return transchar(c);
552}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000553
554/*
555 * Convert non-printable character to two or more printable characters in
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200556 * "buf[]". "charbuf" needs to be able to hold five bytes.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000557 * Does NOT work for multi-byte characters, c must be <= 255.
558 */
559 void
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200560transchar_nonprint(buf_T *buf, char_u *charbuf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000561{
562 if (c == NL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100563 c = NUL; // we use newline in place of a NUL
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200564 else if (c == CAR && get_fileformat(buf) == EOL_MAC)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100565 c = NL; // we use CR in place of NL in this case
Bram Moolenaar071d4272004-06-13 20:20:40 +0000566
Bram Moolenaarc667da52019-11-30 20:52:27 +0100567 if (dy_flags & DY_UHEX) // 'display' has "uhex"
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200568 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000569
570#ifdef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100571 // For EBCDIC only the characters 0-63 and 255 are not printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000572 else if (CtrlChar(c) != 0 || c == DEL)
573#else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100574 else if (c <= 0x7f) // 0x00 - 0x1f and 0x7f
Bram Moolenaar071d4272004-06-13 20:20:40 +0000575#endif
576 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200577 charbuf[0] = '^';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000578#ifdef EBCDIC
579 if (c == DEL)
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200580 charbuf[1] = '?'; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000581 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200582 charbuf[1] = CtrlChar(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000583#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200584 charbuf[1] = c ^ 0x40; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000585#endif
586
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200587 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000588 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000589 else if (enc_utf8 && c >= 0x80)
590 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200591 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000592 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000593#ifndef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100594 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) // 0xa0 - 0xfe
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200596 charbuf[0] = '|';
597 charbuf[1] = c - 0x80;
598 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000599 }
600#else
601 else if (c < 64)
602 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200603 charbuf[0] = '~';
604 charbuf[1] = MetaChar(c);
605 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000606 }
607#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +0100608 else // 0x80 - 0x9f and 0xff
Bram Moolenaar071d4272004-06-13 20:20:40 +0000609 {
610 /*
611 * TODO: EBCDIC I don't know what to do with this chars, so I display
612 * them as '~?' for now
613 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200614 charbuf[0] = '~';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000615#ifdef EBCDIC
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200616 charbuf[1] = '?'; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000617#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200618 charbuf[1] = (c - 0x80) ^ 0x40; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000619#endif
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200620 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000621 }
622}
623
624 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100625transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000626{
627 int i = 0;
628
629 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630 if (c > 255)
631 {
632 buf[++i] = nr2hex((unsigned)c >> 12);
633 buf[++i] = nr2hex((unsigned)c >> 8);
634 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000635 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000636 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = '>';
638 buf[++i] = NUL;
639}
640
641/*
642 * Convert the lower 4 bits of byte "c" to its hex character.
643 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
644 * function key 1.
645 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000646 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100647nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648{
649 if ((c & 0xf) <= 9)
650 return (c & 0xf) + '0';
651 return (c & 0xf) - 10 + 'a';
652}
653
654/*
655 * Return number of display cells occupied by byte "b".
656 * Caller must make sure 0 <= b <= 255.
657 * For multi-byte mode "b" must be the first byte of a character.
658 * A TAB is counted as two cells: "^I".
659 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
660 * cells depends on further bytes.
661 */
662 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100663byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 if (enc_utf8 && b >= 0x80)
666 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100667 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000668}
669
670/*
671 * Return number of display cells occupied by character "c".
672 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
673 * A TAB is counted as two cells: "^I" or four: "<09>".
674 */
675 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100676char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677{
678 if (IS_SPECIAL(c))
679 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680 if (c >= 0x80)
681 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100682 // UTF-8: above 0x80 need to check the value
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683 if (enc_utf8)
684 return utf_char2cells(c);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100685 // DBCS: double-byte means double-width, except for euc-jp with first
686 // byte 0x8e
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687 if (enc_dbcs != 0 && c >= 0x100)
688 {
689 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
690 return 1;
691 return 2;
692 }
693 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100694 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695}
696
697/*
698 * Return number of display cells occupied by character at "*p".
699 * A TAB is counted as two cells: "^I" or four: "<09>".
700 */
701 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100702ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100704 // For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705 if (enc_utf8 && *p >= 0x80)
706 return utf_ptr2cells(p);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100707 // For DBCS we can tell the cell count from the first byte.
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100708 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000709}
710
711/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100712 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713 * counting TABs as two characters: "^I".
714 */
715 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100716vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717{
718 return vim_strnsize(s, (int)MAXCOL);
719}
720
721/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100722 * Return the number of character cells string "s[len]" will take on the
723 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724 */
725 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100726vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727{
728 int size = 0;
729
730 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 if (has_mbyte)
732 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000733 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734
735 size += ptr2cells(s);
736 s += l;
737 len -= l - 1;
738 }
739 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000740 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100741
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 return size;
743}
744
745/*
746 * Return the number of characters 'c' will take on the screen, taking
747 * into account the size of a tab.
748 * Use a define to make it fast, this is used very often!!!
749 * Also see getvcol() below.
750 */
751
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200752#ifdef FEAT_VARTABS
753# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100754 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200755 { \
756 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
757 } \
758 else \
759 return ptr2cells(p);
760#else
761# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100762 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763 { \
764 int ts; \
765 ts = (buf)->b_p_ts; \
766 return (int)(ts - (col % ts)); \
767 } \
768 else \
769 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000771
Bram Moolenaar071d4272004-06-13 20:20:40 +0000772 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100773chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774{
775 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
776}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777
778#ifdef FEAT_LINEBREAK
779 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100780win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000781{
782 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
783}
784#endif
785
786/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200787 * Return the number of characters the string 's' will take on the screen,
788 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789 */
790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
Bram Moolenaardc536092010-07-18 15:45:49 +0200793 return linetabsize_col(0, s);
794}
795
796/*
797 * Like linetabsize(), but starting at column "startcol".
798 */
799 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100800linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200801{
802 colnr_T col = startcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100803 char_u *line = s; // pointer to start of line, for breakindent
Bram Moolenaar071d4272004-06-13 20:20:40 +0000804
805 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200806 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 return (int)col;
808}
809
810/*
811 * Like linetabsize(), but for a given window instead of the current one.
812 */
813 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100814win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815{
816 colnr_T col = 0;
817 char_u *s;
818
Bram Moolenaar597a4222014-06-25 14:39:50 +0200819 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100820 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822 return (int)col;
823}
824
825/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000826 * Return TRUE if 'c' is a normal identifier character:
827 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828 */
829 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100830vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100832 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833}
834
835/*
Bram Moolenaare3d1f4c2021-04-06 20:21:59 +0200836 * Like vim_isIDc() but not using the 'isident' option: letters, numbers and
837 * underscore.
838 */
839 int
840vim_isNormalIDc(int c)
841{
842 return ASCII_ISALNUM(c) || c == '_';
843}
844
845/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100847 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000848 * For multi-byte characters mb_get_class() is used (builtin rules).
849 */
850 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100851vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000852{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100853 return vim_iswordc_buf(c, curbuf);
854}
855
856 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100857vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100858{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000859 if (c >= 0x100)
860 {
861 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000862 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000863 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100864 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100865 return FALSE;
866 }
867 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000868}
869
870/*
871 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
872 */
873 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100874vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100876 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877}
878
Bram Moolenaar071d4272004-06-13 20:20:40 +0000879 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100880vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000881{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100882 int c = *p;
883
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100884 if (has_mbyte && MB_BYTE2LEN(c) > 1)
885 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100886 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000888
889/*
890 * return TRUE if 'c' is a valid file-name character
891 * Assume characters above 0x100 are valid (multi-byte).
892 */
893 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100894vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000895{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100896 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000897}
898
899/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000900 * return TRUE if 'c' is a valid file-name character or a wildcard character
901 * Assume characters above 0x100 are valid (multi-byte).
902 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
903 * returns false.
904 */
905 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100906vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000907{
908 char_u buf[2];
909
910 buf[0] = (char_u)c;
911 buf[1] = NUL;
912 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
913}
914
915/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200916 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000917 * Assume characters above 0x100 are printable (multi-byte), except for
918 * Unicode.
919 */
920 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100921vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000922{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000923 if (enc_utf8 && c >= 0x100)
924 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100925 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000926}
927
928/*
929 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
930 * byte of a double-byte character.
931 */
932 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100933vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000935 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
936 return FALSE;
937 if (enc_utf8 && c >= 0x100)
938 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100939 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000940}
941
942/*
943 * like chartabsize(), but also check for line breaks on the screen
944 */
945 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100946lbr_chartabsize(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100947 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100948 unsigned char *s,
949 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000950{
951#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +0100952 if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL
953 && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000954 {
955#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000956 if (curwin->w_p_wrap)
957 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
959#ifdef FEAT_LINEBREAK
960 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200961 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000962#endif
963}
964
965/*
966 * Call lbr_chartabsize() and advance the pointer.
967 */
968 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100969lbr_chartabsize_adv(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100970 char_u *line, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100971 char_u **s,
972 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000973{
974 int retval;
975
Bram Moolenaar597a4222014-06-25 14:39:50 +0200976 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100977 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000978 return retval;
979}
980
981/*
982 * This function is used very often, keep it fast!!!!
983 *
984 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
985 * string at start of line. Warning: *headp is only set if it's a non-zero
986 * value, init to 0 before calling.
987 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000988 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100989win_lbr_chartabsize(
990 win_T *wp,
Bram Moolenaarc667da52019-11-30 20:52:27 +0100991 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100992 char_u *s,
993 colnr_T col,
994 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995{
996#ifdef FEAT_LINEBREAK
997 int c;
998 int size;
999 colnr_T col2;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001000 colnr_T col_adj = 0; // col + screen size of tab
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001 colnr_T colmax;
1002 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001003 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004 int numberextra;
1005 char_u *ps;
1006 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001007 int n;
Bram Moolenaaree857022019-11-09 23:26:40 +01001008 char_u *sbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001009
1010 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001011 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001012 */
Bram Moolenaaree857022019-11-09 23:26:40 +01001013 if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001014#endif
1015 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001016 if (wp->w_p_wrap)
1017 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1019 }
1020
1021#ifdef FEAT_LINEBREAK
1022 /*
1023 * First get normal size, without 'linebreak'
1024 */
1025 size = win_chartabsize(wp, s, col);
1026 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001027 if (tab_corr)
1028 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001029
1030 /*
1031 * If 'linebreak' set check at a blank before a non-blank if the line
1032 * needs a break here
1033 */
1034 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001035 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001036 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001037 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001038 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001039 {
1040 /*
1041 * Count all characters from first non-blank after a blank up to next
1042 * non-blank after a blank.
1043 */
1044 numberextra = win_col_off(wp);
1045 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001046 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001047 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001048 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001049 colmax += col_adj;
1050 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001051 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001052 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001053 }
1054
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055 for (;;)
1056 {
1057 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001058 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 c = *s;
1060 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001061 && (VIM_ISBREAK(c)
1062 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001063 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001064 break;
1065
1066 col2 += win_chartabsize(wp, s, col2);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001067 if (col2 >= colmax) // doesn't fit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001068 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001069 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 break;
1071 }
1072 }
1073 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001074 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1075 && wp->w_p_wrap && in_win_border(wp, col))
1076 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001077 ++size; // Count the ">" in the last column.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001078 mb_added = 1;
1079 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080
1081 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001082 * May have to add something for 'breakindent' and/or 'showbreak'
1083 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001084 * Set *headp to the size of what we add.
1085 */
1086 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001087 sbr = get_showbreak_value(wp);
1088 if ((*sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001089 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001090 colnr_T sbrlen = 0;
1091 int numberwidth = win_col_off(wp);
1092
1093 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001094 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001095 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001096 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001097 col -= wp->w_width;
1098 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001099 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001100 col %= numberextra;
Bram Moolenaaree857022019-11-09 23:26:40 +01001101 if (*sbr != NUL)
Bram Moolenaar1c852102014-10-15 21:26:40 +02001102 {
Bram Moolenaaree857022019-11-09 23:26:40 +01001103 sbrlen = (colnr_T)MB_CHARLEN(sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001104 if (col >= sbrlen)
1105 col -= sbrlen;
1106 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001107 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001108 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 else if (col > 0 && numberextra > 0)
1110 col += numberwidth - win_col_off2(wp);
1111
1112 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001113 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001114 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001115 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001116 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001117 if (*sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001118 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001119 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001120 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001121 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001122 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001123 int prev_width = col
1124 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001125
1126 if (width <= 0)
1127 width = (colnr_T)1;
Bram Moolenaaree857022019-11-09 23:26:40 +01001128 added += ((size - prev_width) / width) * vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001129 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001130 // wrapped, add another length of 'sbr'
Bram Moolenaaree857022019-11-09 23:26:40 +01001131 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001132 }
1133 else
Bram Moolenaaree857022019-11-09 23:26:40 +01001134 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001135 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001136 if (wp->w_p_bri)
1137 added += get_breakindent_win(wp, line);
1138
Bram Moolenaar95765082014-08-24 21:19:25 +02001139 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001140 if (col != 0)
1141 added = 0;
1142 }
1143 }
1144 if (headp != NULL)
1145 *headp = added + mb_added;
1146 return size;
1147#endif
1148}
1149
Bram Moolenaar071d4272004-06-13 20:20:40 +00001150/*
1151 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1152 * 'wrap' is on. This means we need to check for a double-byte character that
1153 * doesn't fit at the end of the screen line.
1154 */
1155 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001156win_nolbr_chartabsize(
1157 win_T *wp,
1158 char_u *s,
1159 colnr_T col,
1160 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001161{
1162 int n;
1163
Bram Moolenaareed9d462021-02-15 20:38:25 +01001164 if (*s == TAB && (!wp->w_p_list || wp->w_lcs_chars.tab1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001166# ifdef FEAT_VARTABS
1167 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1168 wp->w_buffer->b_p_vts_array);
1169# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001170 n = wp->w_buffer->b_p_ts;
1171 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001172# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001173 }
1174 n = ptr2cells(s);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001175 // Add one cell for a double-width character in the last column of the
1176 // window, displayed with a ">".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1178 {
1179 if (headp != NULL)
1180 *headp = 1;
1181 return 3;
1182 }
1183 return n;
1184}
1185
1186/*
1187 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1188 * "wp".
1189 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001190 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001191in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192{
Bram Moolenaarc667da52019-11-30 20:52:27 +01001193 int width1; // width of first line (after line number)
1194 int width2; // width of further lines
Bram Moolenaar071d4272004-06-13 20:20:40 +00001195
Bram Moolenaarc667da52019-11-30 20:52:27 +01001196 if (wp->w_width == 0) // there is no border
Bram Moolenaar071d4272004-06-13 20:20:40 +00001197 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001198 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001199 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001200 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001201 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001202 return TRUE;
1203 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001204 if (width2 <= 0)
1205 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206 return ((vcol - width1) % width2 == width2 - 1);
1207}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001208
1209/*
1210 * Get virtual column number of pos.
1211 * start: on the first position of this character (TAB, ctrl)
1212 * cursor: where the cursor is on this character (first char, except for TAB)
1213 * end: on the last position of this character (TAB, ctrl)
1214 *
1215 * This is used very often, keep it fast!
1216 */
1217 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001218getvcol(
1219 win_T *wp,
1220 pos_T *pos,
1221 colnr_T *start,
1222 colnr_T *cursor,
1223 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001224{
1225 colnr_T vcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001226 char_u *ptr; // points to current char
1227 char_u *posptr; // points to char at pos->col
1228 char_u *line; // start of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001229 int incr;
1230 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001231#ifdef FEAT_VARTABS
1232 int *vts = wp->w_buffer->b_p_vts_array;
1233#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001234 int ts = wp->w_buffer->b_p_ts;
1235 int c;
1236
1237 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001238 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001239 if (pos->col == MAXCOL)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001240 posptr = NULL; // continue until the NUL
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001241 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001242 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001243 // Special check for an empty line, which can happen on exit, when
1244 // ml_get_buf() always returns an empty string.
Bram Moolenaar955f1982017-02-05 15:10:51 +01001245 if (*ptr == NUL)
1246 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001247 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001248 if (has_mbyte)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001249 // always start on the first byte
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001250 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001251 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001252
1253 /*
1254 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001255 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1256 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001257 * Also use this when 'list' is set but tabs take their normal size.
1258 */
Bram Moolenaareed9d462021-02-15 20:38:25 +01001259 if ((!wp->w_p_list || wp->w_lcs_chars.tab1 != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001260#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +01001261 && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262#endif
1263 )
1264 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001265 for (;;)
1266 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001268 c = *ptr;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001269 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 if (c == NUL)
1271 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001272 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273 break;
1274 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001275 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001276 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001277#ifdef FEAT_VARTABS
1278 incr = tabstop_padding(vcol, ts, vts);
1279#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001280 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001281#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001282 else
1283 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001284 if (has_mbyte)
1285 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001286 // For utf-8, if the byte is >= 0x80, need to look at
1287 // further bytes to find the cell width.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001288 if (enc_utf8 && c >= 0x80)
1289 incr = utf_ptr2cells(ptr);
1290 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001291 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001292
Bram Moolenaarc667da52019-11-30 20:52:27 +01001293 // If a double-cell char doesn't fit at the end of a line
1294 // it wraps to the next line, it's like this char is three
1295 // cells wide.
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001296 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1297 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001298 {
1299 ++incr;
1300 head = 1;
1301 }
1302 }
1303 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001304 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001305 }
1306
Bram Moolenaarc667da52019-11-30 20:52:27 +01001307 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001308 break;
1309
1310 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001311 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001312 }
1313 }
1314 else
1315 {
1316 for (;;)
1317 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001318 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001320 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001321 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001322 if (*ptr == NUL)
1323 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001324 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001325 break;
1326 }
1327
Bram Moolenaarc667da52019-11-30 20:52:27 +01001328 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001329 break;
1330
1331 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001332 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001333 }
1334 }
1335 if (start != NULL)
1336 *start = vcol + head;
1337 if (end != NULL)
1338 *end = vcol + incr - 1;
1339 if (cursor != NULL)
1340 {
1341 if (*ptr == TAB
1342 && (State & NORMAL)
1343 && !wp->w_p_list
1344 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001345 && !(VIsual_active
1346 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001347 )
Bram Moolenaarc667da52019-11-30 20:52:27 +01001348 *cursor = vcol + incr - 1; // cursor at end
Bram Moolenaar071d4272004-06-13 20:20:40 +00001349 else
Bram Moolenaarc667da52019-11-30 20:52:27 +01001350 *cursor = vcol + head; // cursor at start
Bram Moolenaar071d4272004-06-13 20:20:40 +00001351 }
1352}
1353
1354/*
1355 * Get virtual cursor column in the current window, pretending 'list' is off.
1356 */
1357 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001358getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001359{
1360 int list_save = curwin->w_p_list;
1361 colnr_T vcol;
1362
1363 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001364 if (posp->coladd)
1365 getvvcol(curwin, posp, NULL, &vcol, NULL);
1366 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001367 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001368 curwin->w_p_list = list_save;
1369 return vcol;
1370}
1371
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372/*
1373 * Get virtual column in virtual mode.
1374 */
1375 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001376getvvcol(
1377 win_T *wp,
1378 pos_T *pos,
1379 colnr_T *start,
1380 colnr_T *cursor,
1381 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001382{
1383 colnr_T col;
1384 colnr_T coladd;
1385 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387
1388 if (virtual_active())
1389 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001390 // For virtual mode, only want one value
Bram Moolenaar071d4272004-06-13 20:20:40 +00001391 getvcol(wp, pos, &col, NULL, NULL);
1392
1393 coladd = pos->coladd;
1394 endadd = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001395 // Cannot put the cursor on part of a wide character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001396 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001397 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001398 {
1399 int c = (*mb_ptr2char)(ptr + pos->col);
1400
1401 if (c != TAB && vim_isprintc(c))
1402 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001403 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001404 if (coladd > endadd) // past end of line
Bram Moolenaara5792f52005-11-23 21:25:05 +00001405 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001406 else
1407 coladd = 0;
1408 }
1409 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001410 col += coladd;
1411 if (start != NULL)
1412 *start = col;
1413 if (cursor != NULL)
1414 *cursor = col;
1415 if (end != NULL)
1416 *end = col + endadd;
1417 }
1418 else
1419 getvcol(wp, pos, start, cursor, end);
1420}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001421
Bram Moolenaar071d4272004-06-13 20:20:40 +00001422/*
1423 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1424 * Used for Visual block mode.
1425 */
1426 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001427getvcols(
1428 win_T *wp,
1429 pos_T *pos1,
1430 pos_T *pos2,
1431 colnr_T *left,
1432 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001433{
1434 colnr_T from1, from2, to1, to2;
1435
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001436 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001437 {
1438 getvvcol(wp, pos1, &from1, NULL, &to1);
1439 getvvcol(wp, pos2, &from2, NULL, &to2);
1440 }
1441 else
1442 {
1443 getvvcol(wp, pos2, &from1, NULL, &to1);
1444 getvvcol(wp, pos1, &from2, NULL, &to2);
1445 }
1446 if (from2 < from1)
1447 *left = from2;
1448 else
1449 *left = from1;
1450 if (to2 > to1)
1451 {
1452 if (*p_sel == 'e' && from2 - 1 >= to1)
1453 *right = from2 - 1;
1454 else
1455 *right = to2;
1456 }
1457 else
1458 *right = to1;
1459}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001460
1461/*
Bram Moolenaarce7eada2021-12-15 15:41:44 +00001462 * Skip over ' ' and '\t'.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001463 */
1464 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001465skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001466{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001467 char_u *p = q;
1468
Bram Moolenaarce7eada2021-12-15 15:41:44 +00001469 while (VIM_ISWHITE(*p))
1470 ++p;
1471 return p;
1472}
1473
1474/*
1475 * skip over ' ', '\t' and '\n'.
1476 */
1477 char_u *
1478skipwhite_and_nl(char_u *q)
1479{
1480 char_u *p = q;
1481
1482 while (VIM_ISWHITE(*p) || *p == NL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001483 ++p;
1484 return p;
1485}
1486
1487/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001488 * getwhitecols: return the number of whitespace
1489 * columns (bytes) at the start of a given line
1490 */
1491 int
1492getwhitecols_curline()
1493{
1494 return getwhitecols(ml_get_curline());
1495}
1496
1497 int
1498getwhitecols(char_u *p)
1499{
1500 return skipwhite(p) - p;
1501}
1502
1503/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001504 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001505 */
1506 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001507skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001508{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001509 char_u *p = q;
1510
Bram Moolenaarc667da52019-11-30 20:52:27 +01001511 while (VIM_ISDIGIT(*p)) // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001512 ++p;
1513 return p;
1514}
1515
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001516#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001517/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001518 * skip over binary digits
1519 */
1520 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001521skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001522{
1523 char_u *p = q;
1524
Bram Moolenaarc667da52019-11-30 20:52:27 +01001525 while (vim_isbdigit(*p)) // skip to next non-digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001526 ++p;
1527 return p;
1528}
1529
1530/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001531 * skip over digits and hex characters
1532 */
1533 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001534skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001535{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001536 char_u *p = q;
1537
Bram Moolenaarc667da52019-11-30 20:52:27 +01001538 while (vim_isxdigit(*p)) // skip to next non-digit
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001539 ++p;
1540 return p;
1541}
1542#endif
1543
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001544/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001545 * skip to bin digit (or NUL after the string)
1546 */
1547 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001548skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001549{
1550 char_u *p = q;
1551
Bram Moolenaarc667da52019-11-30 20:52:27 +01001552 while (*p != NUL && !vim_isbdigit(*p)) // skip to next digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001553 ++p;
1554 return p;
1555}
1556
1557/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001558 * skip to digit (or NUL after the string)
1559 */
1560 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001561skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001562{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001563 char_u *p = q;
1564
Bram Moolenaarc667da52019-11-30 20:52:27 +01001565 while (*p != NUL && !VIM_ISDIGIT(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001566 ++p;
1567 return p;
1568}
1569
1570/*
1571 * skip to hex character (or NUL after the string)
1572 */
1573 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001574skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001575{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001576 char_u *p = q;
1577
Bram Moolenaarc667da52019-11-30 20:52:27 +01001578 while (*p != NUL && !vim_isxdigit(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001579 ++p;
1580 return p;
1581}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001582
Bram Moolenaar071d4272004-06-13 20:20:40 +00001583/*
1584 * Variant of isdigit() that can handle characters > 0x100.
1585 * We don't use isdigit() here, because on some systems it also considers
1586 * superscript 1 to be a digit.
1587 * Use the VIM_ISDIGIT() macro for simple arguments.
1588 */
1589 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001590vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001591{
1592 return (c >= '0' && c <= '9');
1593}
1594
1595/*
1596 * Variant of isxdigit() that can handle characters > 0x100.
1597 * We don't use isxdigit() here, because on some systems it also considers
1598 * superscript 1 to be a digit.
1599 */
1600 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001601vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001602{
1603 return (c >= '0' && c <= '9')
1604 || (c >= 'a' && c <= 'f')
1605 || (c >= 'A' && c <= 'F');
1606}
1607
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001608/*
1609 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1610 * characters > 0x100.
1611 */
1612 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001613vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001614{
1615 return (c == '0' || c == '1');
1616}
1617
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001618 static int
1619vim_isodigit(int c)
1620{
1621 return (c >= '0' && c <= '7');
1622}
1623
Bram Moolenaar78622822005-08-23 21:00:13 +00001624/*
1625 * Vim's own character class functions. These exist because many library
1626 * islower()/toupper() etc. do not work properly: they crash when used with
1627 * invalid values or can't handle latin1 when the locale is C.
1628 * Speed is most important here.
1629 */
1630#define LATIN1LOWER 'l'
1631#define LATIN1UPPER 'U'
1632
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001633static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001634static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1635static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001636
1637 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001638vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001639{
1640 if (c <= '@')
1641 return FALSE;
1642 if (c >= 0x80)
1643 {
1644 if (enc_utf8)
1645 return utf_islower(c);
1646 if (c >= 0x100)
1647 {
1648#ifdef HAVE_ISWLOWER
1649 if (has_mbyte)
1650 return iswlower(c);
1651#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001652 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001653 return FALSE;
1654 }
1655 if (enc_latin1like)
1656 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1657 }
1658 return islower(c);
1659}
1660
1661 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001662vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001663{
1664 if (c <= '@')
1665 return FALSE;
1666 if (c >= 0x80)
1667 {
1668 if (enc_utf8)
1669 return utf_isupper(c);
1670 if (c >= 0x100)
1671 {
1672#ifdef HAVE_ISWUPPER
1673 if (has_mbyte)
1674 return iswupper(c);
1675#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001676 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001677 return FALSE;
1678 }
1679 if (enc_latin1like)
1680 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1681 }
1682 return isupper(c);
1683}
1684
1685 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001686vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001687{
1688 if (c <= '@')
1689 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001690 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001691 {
1692 if (enc_utf8)
1693 return utf_toupper(c);
1694 if (c >= 0x100)
1695 {
1696#ifdef HAVE_TOWUPPER
1697 if (has_mbyte)
1698 return towupper(c);
1699#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001700 // toupper() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001701 return c;
1702 }
1703 if (enc_latin1like)
1704 return latin1upper[c];
1705 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001706 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1707 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001708 return TOUPPER_LOC(c);
1709}
1710
1711 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001712vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001713{
1714 if (c <= '@')
1715 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001716 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001717 {
1718 if (enc_utf8)
1719 return utf_tolower(c);
1720 if (c >= 0x100)
1721 {
1722#ifdef HAVE_TOWLOWER
1723 if (has_mbyte)
1724 return towlower(c);
1725#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001726 // tolower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001727 return c;
1728 }
1729 if (enc_latin1like)
1730 return latin1lower[c];
1731 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001732 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1733 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001734 return TOLOWER_LOC(c);
1735}
Bram Moolenaar78622822005-08-23 21:00:13 +00001736
Bram Moolenaar071d4272004-06-13 20:20:40 +00001737/*
1738 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1739 */
1740 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001741skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001742{
1743 while (*p != ' ' && *p != '\t' && *p != NUL)
1744 ++p;
1745 return p;
1746}
1747
Bram Moolenaar071d4272004-06-13 20:20:40 +00001748/*
1749 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1750 */
1751 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001752skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001753{
1754 while (*p != ' ' && *p != '\t' && *p != NUL)
1755 {
1756 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1757 ++p;
1758 ++p;
1759 }
1760 return p;
1761}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001762
1763/*
Bram Moolenaaraf377e32021-11-29 12:12:43 +00001764 * Get a number from a string and skip over it.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001765 * Note: the argument is a pointer to a char_u pointer!
1766 */
1767 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001768getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001769{
1770 char_u *p;
1771 long retval;
1772
1773 p = *pp;
1774 retval = atol((char *)p);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001775 if (*p == '-') // skip negative sign
Bram Moolenaar071d4272004-06-13 20:20:40 +00001776 ++p;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001777 p = skipdigits(p); // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001778 *pp = p;
1779 return retval;
1780}
1781
1782/*
Bram Moolenaaraf377e32021-11-29 12:12:43 +00001783 * Like getdigits() but allow for embedded single quotes.
1784 */
1785 long
1786getdigits_quoted(char_u **pp)
1787{
1788 char_u *p = *pp;
1789 long retval = 0;
1790
1791 if (*p == '-')
1792 ++p;
1793 while (VIM_ISDIGIT(*p))
1794 {
1795 if (retval >= LONG_MAX / 10 - 10)
1796 retval = LONG_MAX;
1797 else
1798 retval = retval * 10 - '0' + *p;
1799 ++p;
1800 if (in_vim9script() && *p == '\'' && VIM_ISDIGIT(p[1]))
1801 ++p;
1802 }
1803 if (**pp == '-')
1804 {
1805 if (retval == LONG_MAX)
1806 retval = LONG_MIN;
1807 else
1808 retval = -retval;
1809 }
1810 *pp = p;
1811 return retval;
1812}
1813
1814/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001815 * Return TRUE if "lbuf" is empty or only contains blanks.
1816 */
1817 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001818vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001819{
1820 char_u *p;
1821
1822 p = skipwhite(lbuf);
1823 return (*p == NUL || *p == '\r' || *p == '\n');
1824}
1825
1826/*
1827 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001828 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1829 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001830 * 0 decimal
1831 * '0' octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001832 * 'O' octal
1833 * 'o' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001834 * 'B' bin
1835 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001836 * 'X' hex
1837 * 'x' hex
1838 * If "len" is not NULL, the length of the number in characters is returned.
1839 * If "nptr" is not NULL, the signed result is returned in it.
1840 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001841 * If "what" contains STR2NR_BIN recognize binary numbers
1842 * If "what" contains STR2NR_OCT recognize octal numbers
1843 * If "what" contains STR2NR_HEX recognize hex numbers
1844 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001845 * If "what" contains STR2NR_QUOTE ignore embedded single quotes
Bram Moolenaarce157752017-10-28 16:07:33 +02001846 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001847 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001848 */
1849 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001850vim_str2nr(
1851 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001852 int *prep, // return: type of number 0 = decimal, 'x'
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001853 // or 'X' is hex, '0', 'o' or 'O' is octal,
1854 // 'b' or 'B' is bin
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001855 int *len, // return: detected length of number
1856 int what, // what numbers to recognize
1857 varnumber_T *nptr, // return: signed result
1858 uvarnumber_T *unptr, // return: unsigned result
1859 int maxlen, // max length of string to check
1860 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001861{
1862 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001863 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001864 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001865 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001866 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001867
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001868 if (len != NULL)
1869 *len = 0;
1870
Bram Moolenaar071d4272004-06-13 20:20:40 +00001871 if (ptr[0] == '-')
1872 {
1873 negative = TRUE;
1874 ++ptr;
1875 }
1876
Bram Moolenaarc667da52019-11-30 20:52:27 +01001877 // Recognize hex, octal, and bin.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001878 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1879 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001880 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001881 pre = ptr[1];
1882 if ((what & STR2NR_HEX)
1883 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1884 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001885 // hexadecimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001886 ptr += 2;
1887 else if ((what & STR2NR_BIN)
1888 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1889 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001890 // binary
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001891 ptr += 2;
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001892 else if ((what & STR2NR_OOCT)
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001893 && (pre == 'O' || pre == 'o') && vim_isodigit(ptr[2])
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001894 && (maxlen == 0 || maxlen > 2))
1895 // octal with prefix "0o"
1896 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001897 else
1898 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001899 // decimal or octal, default is decimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001900 pre = 0;
1901 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001902 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001903 // Don't interpret "0", "08" or "0129" as octal.
Bram Moolenaarce157752017-10-28 16:07:33 +02001904 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001905 {
1906 if (ptr[n] > '7')
1907 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001908 pre = 0; // can't be octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001909 break;
1910 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001911 pre = '0'; // assume octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001912 }
1913 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001914 }
1915 }
1916
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001917 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001918 n = 1;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001919 if (pre == 'B' || pre == 'b'
1920 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001921 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001922 // bin
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001923 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001924 n += 2; // skip over "0b"
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001925 while ('0' <= *ptr && *ptr <= '1')
1926 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001927 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001928 if (un <= UVARNUM_MAX / 2)
1929 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001930 else
1931 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001932 ++ptr;
1933 if (n++ == maxlen)
1934 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001935 if ((what & STR2NR_QUOTE) && *ptr == '\''
1936 && '0' <= ptr[1] && ptr[1] <= '1')
1937 {
1938 ++ptr;
1939 if (n++ == maxlen)
1940 break;
1941 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001942 }
1943 }
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001944 else if (pre == 'O' || pre == 'o' ||
1945 pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001946 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001947 // octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001948 if (pre != 0 && pre != '0')
1949 n += 2; // skip over "0o"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001950 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001951 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001952 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001953 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001954 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1955 else
1956 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001957 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001958 if (n++ == maxlen)
1959 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001960 if ((what & STR2NR_QUOTE) && *ptr == '\''
1961 && '0' <= ptr[1] && ptr[1] <= '7')
1962 {
1963 ++ptr;
1964 if (n++ == maxlen)
1965 break;
1966 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001967 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001968 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001969 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001970 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001971 // hex
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001972 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001973 n += 2; // skip over "0x"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001974 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001975 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001976 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001977 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001978 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1979 else
1980 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001981 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001982 if (n++ == maxlen)
1983 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001984 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1985 {
1986 ++ptr;
1987 if (n++ == maxlen)
1988 break;
1989 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001990 }
1991 }
1992 else
1993 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001994 // decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001995 while (VIM_ISDIGIT(*ptr))
1996 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001997 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1998
Bram Moolenaarc667da52019-11-30 20:52:27 +01001999 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02002000 if (un < UVARNUM_MAX / 10
2001 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
2002 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002003 else
2004 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002005 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02002006 if (n++ == maxlen)
2007 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02002008 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
2009 {
2010 ++ptr;
2011 if (n++ == maxlen)
2012 break;
2013 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002014 }
2015 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02002016
Bram Moolenaar4b96df52020-01-26 22:00:26 +01002017 // Check for an alphanumeric character immediately following, that is
Bram Moolenaar16e9b852019-05-19 19:59:35 +02002018 // most likely a typo.
2019 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
2020 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002021
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01002022 if (prep != NULL)
2023 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002024 if (len != NULL)
2025 *len = (int)(ptr - start);
2026 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002027 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01002028 if (negative) // account for leading '-' for decimal numbers
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002029 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01002030 // avoid ubsan error for overflow
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002031 if (un > VARNUM_MAX)
2032 *nptr = VARNUM_MIN;
2033 else
2034 *nptr = -(varnumber_T)un;
2035 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002036 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002037 {
2038 if (un > VARNUM_MAX)
2039 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02002040 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002041 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002042 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002043 if (unptr != NULL)
2044 *unptr = un;
2045}
2046
2047/*
2048 * Return the value of a single hex character.
2049 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2050 */
2051 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002052hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002053{
2054 if (c >= 'a' && c <= 'f')
2055 return c - 'a' + 10;
2056 if (c >= 'A' && c <= 'F')
2057 return c - 'A' + 10;
2058 return c - '0';
2059}
2060
Bram Moolenaar071d4272004-06-13 20:20:40 +00002061/*
2062 * Convert two hex characters to a byte.
2063 * Return -1 if one of the characters is not hex.
2064 */
2065 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002066hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002067{
2068 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2069 return -1;
2070 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2071}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002072
2073/*
2074 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002075 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00002076 * backslash is not a normal file name character.
2077 * '$' is a valid file name character, we don't remove the backslash before
2078 * it. This means it is not possible to use an environment variable after a
2079 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2080 * Although "\ name" is valid, the backslash in "Program\ files" must be
2081 * removed. Assume a file name doesn't start with a space.
2082 * For multi-byte names, never remove a backslash before a non-ascii
2083 * character, assume that all multi-byte characters are valid file name
2084 * characters.
2085 */
2086 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002087rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002088{
2089#ifdef BACKSLASH_IN_FILENAME
2090 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002091 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002092 && (str[1] == ' '
2093 || (str[1] != NUL
2094 && str[1] != '*'
2095 && str[1] != '?'
2096 && !vim_isfilec(str[1]))));
2097#else
2098 return (str[0] == '\\' && str[1] != NUL);
2099#endif
2100}
2101
2102/*
2103 * Halve the number of backslashes in a file name argument.
2104 * For MS-DOS we only do this if the character after the backslash
2105 * is not a normal file character.
2106 */
2107 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002108backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002109{
2110 for ( ; *p; ++p)
2111 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002112 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002113}
2114
2115/*
2116 * backslash_halve() plus save the result in allocated memory.
Bram Moolenaare2c453d2019-08-21 14:37:09 +02002117 * However, returns "p" when out of memory.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002118 */
2119 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002120backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002121{
2122 char_u *res;
2123
2124 res = vim_strsave(p);
2125 if (res == NULL)
2126 return p;
2127 backslash_halve(res);
2128 return res;
2129}
2130
2131#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2132/*
2133 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2134 * The first 64 entries have been added to map control characters defined in
2135 * ascii.h
2136 */
2137static char_u ebcdic2ascii_tab[256] =
2138{
2139 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2140 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2141 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2142 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2143 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2144 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2145 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2146 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2147 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2148 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2149 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2150 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2151 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2152 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2153 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2154 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2155 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2156 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2157 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2158 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2159 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2160 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2161 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2162 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2163 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2164 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2165 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2166 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2167 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2168 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2169 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2170 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2171};
2172
2173/*
2174 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2175 * wanting 7-bit ASCII characters out the other end.
2176 */
2177 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002178ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002179{
2180 int i;
2181
2182 for (i = 0; i < len; i++)
2183 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2184}
2185#endif