blob: d762a2545bf0a52e393ea2f8b3f61824bfa0fc28 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
Bram Moolenaarc667da52019-11-30 20:52:27 +010013# include <wchar.h> // for towupper() and towlower()
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
Bram Moolenaarc667da52019-11-30 20:52:27 +010021// b_chartab[] is an array of 32 bytes, each bit representing one of the
22// characters 0-255.
Bram Moolenaar071d4272004-06-13 20:20:40 +000023#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaarc667da52019-11-30 20:52:27 +010027// table used below, see init_chartab() for an explanation
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010028static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
Bram Moolenaarc667da52019-11-30 20:52:27 +010033#define CT_CELL_MASK 0x07 // mask: nr of display cells (1, 2 or 4)
34#define CT_PRINT_CHAR 0x10 // flag: set for printable chars
35#define CT_ID_CHAR 0x20 // flag: set for ID chars
36#define CT_FNAME_CHAR 0x40 // flag: set for file name chars
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010037
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
Bram Moolenaarc667da52019-11-30 20:52:27 +010075 int global) // FALSE: only set buf->b_chartab[]
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100105 // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100108 // euc-jp characters starting with 0x8e are single width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100111 // other double-byte chars can be printable AND double-width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100115 // the rest is unprintable by default
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaarc667da52019-11-30 20:52:27 +0100119 // Assume that every multi-byte char is a filename character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
Bram Moolenaara80faa82020-04-12 19:37:17 +0200130 CLEAR_FIELD(buf->b_chartab);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100134 // double-byte characters are probably word characters
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
Bram Moolenaarc667da52019-11-30 20:52:27 +0100147 // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 // options Each option is a list of characters, character numbers or
149 // ranges, separated by commas, e.g.: "200-210,x,#-178,-"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000150 for (i = global ? 0 : 3; i <= 3; ++i)
151 {
152 if (i == 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100153 p = p_isi; // first round: 'isident'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154 else if (i == 1)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100155 p = p_isp; // second round: 'isprint'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000156 else if (i == 2)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100157 p = p_isf; // third round: 'isfname'
158 else // i == 3
159 p = buf->b_p_isk; // fourth round: 'iskeyword'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000160
161 while (*p)
162 {
163 tilde = FALSE;
164 do_isalpha = FALSE;
165 if (*p == '^' && p[1] != NUL)
166 {
167 tilde = TRUE;
168 ++p;
169 }
170 if (VIM_ISDIGIT(*p))
171 c = getdigits(&p);
Dominique Pelle4781d6f2021-05-18 21:46:31 +0200172 else if (has_mbyte)
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000173 c = mb_ptr2char_adv(&p);
174 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000175 c = *p++;
176 c2 = -1;
177 if (*p == '-' && p[1] != NUL)
178 {
179 ++p;
180 if (VIM_ISDIGIT(*p))
181 c2 = getdigits(&p);
Dominique Pelle4781d6f2021-05-18 21:46:31 +0200182 else if (has_mbyte)
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000183 c2 = mb_ptr2char_adv(&p);
184 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185 c2 = *p++;
186 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000187 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
Bram Moolenaarc667da52019-11-30 20:52:27 +0100191 if (c2 == -1) // not a range
Bram Moolenaar071d4272004-06-13 20:20:40 +0000192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100210 // Use the MB_ functions here, because isalpha() doesn't
211 // work properly when 'encoding' is "latin1" and the locale is
212 // "C".
Bram Moolenaar14184a32019-02-16 15:10:30 +0100213 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000214 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100215 if (i == 0) // (re)set ID flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000216 {
217 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100218 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000219 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100220 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000221 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100222 else if (i == 1) // (re)set printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000223 {
224 if ((c < ' '
225#ifndef EBCDIC
226 || c > '~'
227#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100228 // For double-byte we keep the cell width, so
229 // that we can detect it from the first byte.
230 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000231 {
232 if (tilde)
233 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100234 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000235 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100236 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000237 }
238 else
239 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100240 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
241 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000242 }
243 }
244 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100245 else if (i == 2) // (re)set fname flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000246 {
247 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100248 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100252 else // i == 3 (re)set keyword flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 {
254 if (tilde)
255 RESET_CHARTAB(buf, c);
256 else
257 SET_CHARTAB(buf, c);
258 }
259 }
260 ++c;
261 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100262
263 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000264 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100265 if (c == ',' && *p == NUL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100266 // Trailing comma is not allowed.
Bram Moolenaar309379f2013-02-06 16:26:26 +0100267 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268 }
269 }
270 chartab_initialized = TRUE;
271 return OK;
272}
273
274/*
275 * Translate any special characters in buf[bufsize] in-place.
276 * The result is a string with only printable characters, but if there is not
277 * enough room, not all characters will be translated.
278 */
279 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100280trans_characters(
281 char_u *buf,
282 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100284 int len; // length of string needing translation
285 int room; // room in buffer after string
286 char_u *trs; // translated character
287 int trs_len; // length of trs[]
Bram Moolenaar071d4272004-06-13 20:20:40 +0000288
289 len = (int)STRLEN(buf);
290 room = bufsize - len;
291 while (*buf != 0)
292 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100293 // Assume a multi-byte character doesn't need translation.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000294 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000295 len -= trs_len;
296 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297 {
298 trs = transchar_byte(*buf);
299 trs_len = (int)STRLEN(trs);
300 if (trs_len > 1)
301 {
302 room -= trs_len - 1;
303 if (room <= 0)
304 return;
305 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
306 }
307 mch_memmove(buf, trs, (size_t)trs_len);
308 --len;
309 }
310 buf += trs_len;
311 }
312}
313
Bram Moolenaar071d4272004-06-13 20:20:40 +0000314/*
315 * Translate a string into allocated memory, replacing special chars with
316 * printable chars. Returns NULL when out of memory.
317 */
318 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100319transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320{
321 char_u *res;
322 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323 int l, len, c;
324 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 if (has_mbyte)
327 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100328 // Compute the length of the result, taking account of unprintable
329 // multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000330 len = 0;
331 p = s;
332 while (*p != NUL)
333 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000334 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000335 {
336 c = (*mb_ptr2char)(p);
337 p += l;
338 if (vim_isprintc(c))
339 len += l;
340 else
341 {
342 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000343 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344 }
345 }
346 else
347 {
348 l = byte2cells(*p++);
349 if (l > 0)
350 len += l;
351 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100352 len += 4; // illegal byte sequence
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353 }
354 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200355 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356 }
357 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200358 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 if (res != NULL)
360 {
361 *res = NUL;
362 p = s;
363 while (*p != NUL)
364 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000365 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000366 {
367 c = (*mb_ptr2char)(p);
368 if (vim_isprintc(c))
Bram Moolenaarc667da52019-11-30 20:52:27 +0100369 STRNCAT(res, p, l); // append printable multi-byte char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370 else
371 transchar_hex(res + STRLEN(res), c);
372 p += l;
373 }
374 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000375 STRCAT(res, transchar_byte(*p++));
376 }
377 }
378 return res;
379}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380
Bram Moolenaar071d4272004-06-13 20:20:40 +0000381/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000382 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
383 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000384 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
385 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000386 */
387 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100388str_foldcase(
389 char_u *str,
390 int orglen,
391 char_u *buf,
392 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393{
394 garray_T ga;
395 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000396 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000397
398#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
399#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000400#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
401#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402
Bram Moolenaarc667da52019-11-30 20:52:27 +0100403 // Copy "str" into "buf" or allocated memory, unmodified.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 if (buf == NULL)
405 {
406 ga_init2(&ga, 1, 10);
407 if (ga_grow(&ga, len + 1) == FAIL)
408 return NULL;
409 mch_memmove(ga.ga_data, str, (size_t)len);
410 ga.ga_len = len;
411 }
412 else
413 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100414 if (len >= buflen) // Ugly!
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000415 len = buflen - 1;
416 mch_memmove(buf, str, (size_t)len);
417 }
418 if (buf == NULL)
419 GA_CHAR(len) = NUL;
420 else
421 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaarc667da52019-11-30 20:52:27 +0100423 // Make each character lower case.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000425 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000427 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428 {
429 if (enc_utf8)
430 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000431 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100432 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000433 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434
Bram Moolenaarc667da52019-11-30 20:52:27 +0100435 // Only replace the character when it is not an invalid
436 // sequence (ASCII character or more than one byte) and
437 // utf_tolower() doesn't return the original character.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100438 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000439 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100440 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441
Bram Moolenaarc667da52019-11-30 20:52:27 +0100442 // If the byte length changes need to shift the following
443 // characters forward or backward.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100444 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100446 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000447 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100448 if (buf == NULL
449 ? ga_grow(&ga, nlen - olen + 1) == FAIL
450 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100452 // out of memory, keep old char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100454 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000456 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100457 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000459 if (buf == NULL)
460 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100461 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
462 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000463 }
464 else
465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 STRMOVE(buf + i + nlen, buf + i + olen);
467 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000468 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000469 }
470 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000471 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000472 }
473 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100474 // skip to next multi-byte char
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000475 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000476 }
477 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000479 if (buf == NULL)
480 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
481 else
482 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000483 ++i;
484 }
485 }
486
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000487 if (buf == NULL)
488 return (char_u *)ga.ga_data;
489 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000491
492/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100493 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100495 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 * Does NOT work for multi-byte characters, c must be <= 255.
497 * Also doesn't work for the first byte of a multi-byte, "c" must be a
498 * character!
499 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200500static char_u transchar_charbuf[7];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000501
502 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100503transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504{
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200505 return transchar_buf(curbuf, c);
506}
507
508 char_u *
509transchar_buf(buf_T *buf, int c)
510{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511 int i;
512
513 i = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100514 if (IS_SPECIAL(c)) // special key code, display as ~@ char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200516 transchar_charbuf[0] = '~';
517 transchar_charbuf[1] = '@';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000518 i = 2;
519 c = K_SECOND(c);
520 }
521
522 if ((!chartab_initialized && (
523#ifdef EBCDIC
524 (c >= 64 && c < 255)
525#else
526 (c >= ' ' && c <= '~')
527#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000528 )) || (c < 256 && vim_isprintc_strict(c)))
529 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100530 // printable character
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200531 transchar_charbuf[i] = c;
532 transchar_charbuf[i + 1] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000533 }
534 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200535 transchar_nonprint(buf, transchar_charbuf + i, c);
536 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000537}
538
Bram Moolenaar071d4272004-06-13 20:20:40 +0000539/*
540 * Like transchar(), but called with a byte instead of a character. Checks
541 * for an illegal UTF-8 byte.
542 */
543 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100544transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545{
546 if (enc_utf8 && c >= 0x80)
547 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200548 transchar_nonprint(curbuf, transchar_charbuf, c);
549 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550 }
551 return transchar(c);
552}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000553
554/*
555 * Convert non-printable character to two or more printable characters in
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200556 * "buf[]". "charbuf" needs to be able to hold five bytes.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000557 * Does NOT work for multi-byte characters, c must be <= 255.
558 */
559 void
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200560transchar_nonprint(buf_T *buf, char_u *charbuf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000561{
562 if (c == NL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100563 c = NUL; // we use newline in place of a NUL
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200564 else if (c == CAR && get_fileformat(buf) == EOL_MAC)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100565 c = NL; // we use CR in place of NL in this case
Bram Moolenaar071d4272004-06-13 20:20:40 +0000566
Bram Moolenaarc667da52019-11-30 20:52:27 +0100567 if (dy_flags & DY_UHEX) // 'display' has "uhex"
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200568 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000569
570#ifdef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100571 // For EBCDIC only the characters 0-63 and 255 are not printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000572 else if (CtrlChar(c) != 0 || c == DEL)
573#else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100574 else if (c <= 0x7f) // 0x00 - 0x1f and 0x7f
Bram Moolenaar071d4272004-06-13 20:20:40 +0000575#endif
576 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200577 charbuf[0] = '^';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000578#ifdef EBCDIC
579 if (c == DEL)
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200580 charbuf[1] = '?'; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000581 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200582 charbuf[1] = CtrlChar(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000583#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200584 charbuf[1] = c ^ 0x40; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000585#endif
586
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200587 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000588 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000589 else if (enc_utf8 && c >= 0x80)
590 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200591 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000592 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000593#ifndef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100594 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) // 0xa0 - 0xfe
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200596 charbuf[0] = '|';
597 charbuf[1] = c - 0x80;
598 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000599 }
600#else
601 else if (c < 64)
602 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200603 charbuf[0] = '~';
604 charbuf[1] = MetaChar(c);
605 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000606 }
607#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +0100608 else // 0x80 - 0x9f and 0xff
Bram Moolenaar071d4272004-06-13 20:20:40 +0000609 {
610 /*
611 * TODO: EBCDIC I don't know what to do with this chars, so I display
612 * them as '~?' for now
613 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200614 charbuf[0] = '~';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000615#ifdef EBCDIC
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200616 charbuf[1] = '?'; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000617#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200618 charbuf[1] = (c - 0x80) ^ 0x40; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000619#endif
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200620 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000621 }
622}
623
624 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100625transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000626{
627 int i = 0;
628
629 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630 if (c > 255)
631 {
632 buf[++i] = nr2hex((unsigned)c >> 12);
633 buf[++i] = nr2hex((unsigned)c >> 8);
634 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000635 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000636 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = '>';
638 buf[++i] = NUL;
639}
640
641/*
642 * Convert the lower 4 bits of byte "c" to its hex character.
643 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
644 * function key 1.
645 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000646 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100647nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648{
649 if ((c & 0xf) <= 9)
650 return (c & 0xf) + '0';
651 return (c & 0xf) - 10 + 'a';
652}
653
654/*
655 * Return number of display cells occupied by byte "b".
656 * Caller must make sure 0 <= b <= 255.
657 * For multi-byte mode "b" must be the first byte of a character.
658 * A TAB is counted as two cells: "^I".
659 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
660 * cells depends on further bytes.
661 */
662 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100663byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 if (enc_utf8 && b >= 0x80)
666 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100667 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000668}
669
670/*
671 * Return number of display cells occupied by character "c".
672 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
673 * A TAB is counted as two cells: "^I" or four: "<09>".
674 */
675 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100676char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677{
678 if (IS_SPECIAL(c))
679 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680 if (c >= 0x80)
681 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100682 // UTF-8: above 0x80 need to check the value
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683 if (enc_utf8)
684 return utf_char2cells(c);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100685 // DBCS: double-byte means double-width, except for euc-jp with first
686 // byte 0x8e
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687 if (enc_dbcs != 0 && c >= 0x100)
688 {
689 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
690 return 1;
691 return 2;
692 }
693 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100694 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695}
696
697/*
698 * Return number of display cells occupied by character at "*p".
699 * A TAB is counted as two cells: "^I" or four: "<09>".
700 */
701 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100702ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100704 // For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705 if (enc_utf8 && *p >= 0x80)
706 return utf_ptr2cells(p);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100707 // For DBCS we can tell the cell count from the first byte.
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100708 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000709}
710
711/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100712 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713 * counting TABs as two characters: "^I".
714 */
715 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100716vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717{
718 return vim_strnsize(s, (int)MAXCOL);
719}
720
721/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100722 * Return the number of character cells string "s[len]" will take on the
723 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724 */
725 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100726vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727{
728 int size = 0;
729
730 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 if (has_mbyte)
732 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000733 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734
735 size += ptr2cells(s);
736 s += l;
737 len -= l - 1;
738 }
739 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000740 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100741
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 return size;
743}
744
745/*
746 * Return the number of characters 'c' will take on the screen, taking
747 * into account the size of a tab.
748 * Use a define to make it fast, this is used very often!!!
749 * Also see getvcol() below.
750 */
751
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200752#ifdef FEAT_VARTABS
753# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100754 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200755 { \
756 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
757 } \
758 else \
759 return ptr2cells(p);
760#else
761# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100762 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763 { \
764 int ts; \
765 ts = (buf)->b_p_ts; \
766 return (int)(ts - (col % ts)); \
767 } \
768 else \
769 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000771
Bram Moolenaar071d4272004-06-13 20:20:40 +0000772 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100773chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774{
775 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
776}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777
778#ifdef FEAT_LINEBREAK
779 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100780win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000781{
782 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
783}
784#endif
785
786/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200787 * Return the number of characters the string 's' will take on the screen,
788 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789 */
790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
Bram Moolenaardc536092010-07-18 15:45:49 +0200793 return linetabsize_col(0, s);
794}
795
796/*
797 * Like linetabsize(), but starting at column "startcol".
798 */
799 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100800linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200801{
802 colnr_T col = startcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100803 char_u *line = s; // pointer to start of line, for breakindent
Bram Moolenaar071d4272004-06-13 20:20:40 +0000804
805 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200806 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 return (int)col;
808}
809
810/*
811 * Like linetabsize(), but for a given window instead of the current one.
812 */
813 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100814win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815{
816 colnr_T col = 0;
817 char_u *s;
818
Bram Moolenaar597a4222014-06-25 14:39:50 +0200819 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100820 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822 return (int)col;
823}
824
825/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000826 * Return TRUE if 'c' is a normal identifier character:
827 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828 */
829 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100830vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100832 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833}
834
835/*
Bram Moolenaare3d1f4c2021-04-06 20:21:59 +0200836 * Like vim_isIDc() but not using the 'isident' option: letters, numbers and
837 * underscore.
838 */
839 int
840vim_isNormalIDc(int c)
841{
842 return ASCII_ISALNUM(c) || c == '_';
843}
844
845/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100847 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000848 * For multi-byte characters mb_get_class() is used (builtin rules).
849 */
850 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100851vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000852{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100853 return vim_iswordc_buf(c, curbuf);
854}
855
856 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100857vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100858{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000859 if (c >= 0x100)
860 {
861 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000862 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000863 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100864 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100865 return FALSE;
866 }
867 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000868}
869
870/*
871 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
872 */
873 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100874vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100876 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877}
878
Bram Moolenaar071d4272004-06-13 20:20:40 +0000879 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100880vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000881{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100882 int c = *p;
883
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100884 if (has_mbyte && MB_BYTE2LEN(c) > 1)
885 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100886 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000888
889/*
890 * return TRUE if 'c' is a valid file-name character
891 * Assume characters above 0x100 are valid (multi-byte).
892 */
893 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100894vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000895{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100896 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000897}
898
899/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000900 * return TRUE if 'c' is a valid file-name character or a wildcard character
901 * Assume characters above 0x100 are valid (multi-byte).
902 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
903 * returns false.
904 */
905 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100906vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000907{
908 char_u buf[2];
909
910 buf[0] = (char_u)c;
911 buf[1] = NUL;
912 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
913}
914
915/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200916 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000917 * Assume characters above 0x100 are printable (multi-byte), except for
918 * Unicode.
919 */
920 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100921vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000922{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000923 if (enc_utf8 && c >= 0x100)
924 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100925 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000926}
927
928/*
929 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
930 * byte of a double-byte character.
931 */
932 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100933vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000935 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
936 return FALSE;
937 if (enc_utf8 && c >= 0x100)
938 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100939 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000940}
941
942/*
943 * like chartabsize(), but also check for line breaks on the screen
944 */
945 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100946lbr_chartabsize(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100947 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100948 unsigned char *s,
949 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000950{
951#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +0100952 if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL
953 && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000954 {
955#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000956 if (curwin->w_p_wrap)
957 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
959#ifdef FEAT_LINEBREAK
960 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200961 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000962#endif
963}
964
965/*
966 * Call lbr_chartabsize() and advance the pointer.
967 */
968 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100969lbr_chartabsize_adv(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100970 char_u *line, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100971 char_u **s,
972 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000973{
974 int retval;
975
Bram Moolenaar597a4222014-06-25 14:39:50 +0200976 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100977 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000978 return retval;
979}
980
981/*
982 * This function is used very often, keep it fast!!!!
983 *
984 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
985 * string at start of line. Warning: *headp is only set if it's a non-zero
986 * value, init to 0 before calling.
987 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000988 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100989win_lbr_chartabsize(
990 win_T *wp,
Bram Moolenaarc667da52019-11-30 20:52:27 +0100991 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100992 char_u *s,
993 colnr_T col,
994 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995{
996#ifdef FEAT_LINEBREAK
997 int c;
998 int size;
999 colnr_T col2;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001000 colnr_T col_adj = 0; // col + screen size of tab
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001 colnr_T colmax;
1002 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001003 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004 int numberextra;
1005 char_u *ps;
1006 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001007 int n;
Bram Moolenaaree857022019-11-09 23:26:40 +01001008 char_u *sbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001009
1010 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001011 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001012 */
Bram Moolenaaree857022019-11-09 23:26:40 +01001013 if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001014#endif
1015 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001016 if (wp->w_p_wrap)
1017 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1019 }
1020
1021#ifdef FEAT_LINEBREAK
1022 /*
1023 * First get normal size, without 'linebreak'
1024 */
1025 size = win_chartabsize(wp, s, col);
1026 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001027 if (tab_corr)
1028 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001029
1030 /*
1031 * If 'linebreak' set check at a blank before a non-blank if the line
1032 * needs a break here
1033 */
1034 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001035 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001036 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001037 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001038 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001039 {
1040 /*
1041 * Count all characters from first non-blank after a blank up to next
1042 * non-blank after a blank.
1043 */
1044 numberextra = win_col_off(wp);
1045 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001046 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001047 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001048 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001049 colmax += col_adj;
1050 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001051 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001052 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001053 }
1054
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055 for (;;)
1056 {
1057 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001058 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 c = *s;
1060 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001061 && (VIM_ISBREAK(c)
1062 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001063 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001064 break;
1065
1066 col2 += win_chartabsize(wp, s, col2);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001067 if (col2 >= colmax) // doesn't fit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001068 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001069 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 break;
1071 }
1072 }
1073 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001074 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1075 && wp->w_p_wrap && in_win_border(wp, col))
1076 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001077 ++size; // Count the ">" in the last column.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001078 mb_added = 1;
1079 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080
1081 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001082 * May have to add something for 'breakindent' and/or 'showbreak'
1083 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001084 * Set *headp to the size of what we add.
1085 */
1086 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001087 sbr = get_showbreak_value(wp);
1088 if ((*sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001089 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001090 colnr_T sbrlen = 0;
1091 int numberwidth = win_col_off(wp);
1092
1093 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001094 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001095 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001096 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001097 col -= wp->w_width;
1098 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001099 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001100 col %= numberextra;
Bram Moolenaaree857022019-11-09 23:26:40 +01001101 if (*sbr != NUL)
Bram Moolenaar1c852102014-10-15 21:26:40 +02001102 {
Bram Moolenaaree857022019-11-09 23:26:40 +01001103 sbrlen = (colnr_T)MB_CHARLEN(sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001104 if (col >= sbrlen)
1105 col -= sbrlen;
1106 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001107 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001108 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 else if (col > 0 && numberextra > 0)
1110 col += numberwidth - win_col_off2(wp);
1111
1112 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001113 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001114 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001115 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001116 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001117 if (*sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001118 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001119 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001120 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001121 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001122 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001123 int prev_width = col
1124 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001125
1126 if (width <= 0)
1127 width = (colnr_T)1;
Bram Moolenaaree857022019-11-09 23:26:40 +01001128 added += ((size - prev_width) / width) * vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001129 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001130 // wrapped, add another length of 'sbr'
Bram Moolenaaree857022019-11-09 23:26:40 +01001131 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001132 }
1133 else
Bram Moolenaaree857022019-11-09 23:26:40 +01001134 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001135 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001136 if (wp->w_p_bri)
1137 added += get_breakindent_win(wp, line);
1138
Bram Moolenaar95765082014-08-24 21:19:25 +02001139 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001140 if (col != 0)
1141 added = 0;
1142 }
1143 }
1144 if (headp != NULL)
1145 *headp = added + mb_added;
1146 return size;
1147#endif
1148}
1149
Bram Moolenaar071d4272004-06-13 20:20:40 +00001150/*
1151 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1152 * 'wrap' is on. This means we need to check for a double-byte character that
1153 * doesn't fit at the end of the screen line.
1154 */
1155 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001156win_nolbr_chartabsize(
1157 win_T *wp,
1158 char_u *s,
1159 colnr_T col,
1160 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001161{
1162 int n;
1163
Bram Moolenaareed9d462021-02-15 20:38:25 +01001164 if (*s == TAB && (!wp->w_p_list || wp->w_lcs_chars.tab1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001166# ifdef FEAT_VARTABS
1167 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1168 wp->w_buffer->b_p_vts_array);
1169# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001170 n = wp->w_buffer->b_p_ts;
1171 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001172# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001173 }
1174 n = ptr2cells(s);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001175 // Add one cell for a double-width character in the last column of the
1176 // window, displayed with a ">".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1178 {
1179 if (headp != NULL)
1180 *headp = 1;
1181 return 3;
1182 }
1183 return n;
1184}
1185
1186/*
1187 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1188 * "wp".
1189 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001190 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001191in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192{
Bram Moolenaarc667da52019-11-30 20:52:27 +01001193 int width1; // width of first line (after line number)
1194 int width2; // width of further lines
Bram Moolenaar071d4272004-06-13 20:20:40 +00001195
Bram Moolenaarc667da52019-11-30 20:52:27 +01001196 if (wp->w_width == 0) // there is no border
Bram Moolenaar071d4272004-06-13 20:20:40 +00001197 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001198 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001199 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001200 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001201 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001202 return TRUE;
1203 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001204 if (width2 <= 0)
1205 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206 return ((vcol - width1) % width2 == width2 - 1);
1207}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001208
1209/*
1210 * Get virtual column number of pos.
1211 * start: on the first position of this character (TAB, ctrl)
1212 * cursor: where the cursor is on this character (first char, except for TAB)
1213 * end: on the last position of this character (TAB, ctrl)
1214 *
1215 * This is used very often, keep it fast!
1216 */
1217 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001218getvcol(
1219 win_T *wp,
1220 pos_T *pos,
1221 colnr_T *start,
1222 colnr_T *cursor,
1223 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001224{
1225 colnr_T vcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001226 char_u *ptr; // points to current char
1227 char_u *posptr; // points to char at pos->col
1228 char_u *line; // start of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001229 int incr;
1230 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001231#ifdef FEAT_VARTABS
1232 int *vts = wp->w_buffer->b_p_vts_array;
1233#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001234 int ts = wp->w_buffer->b_p_ts;
1235 int c;
1236
1237 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001238 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001239 if (pos->col == MAXCOL)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001240 posptr = NULL; // continue until the NUL
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001241 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001242 {
Bram Moolenaar94f31922021-12-30 15:29:18 +00001243 colnr_T i;
1244
1245 // In a few cases the position can be beyond the end of the line.
1246 for (i = 0; i < pos->col; ++i)
1247 if (ptr[i] == NUL)
1248 {
1249 pos->col = i;
1250 break;
1251 }
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001252 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001253 if (has_mbyte)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001254 // always start on the first byte
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001255 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001256 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001257
1258 /*
1259 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001260 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1261 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262 * Also use this when 'list' is set but tabs take their normal size.
1263 */
Bram Moolenaareed9d462021-02-15 20:38:25 +01001264 if ((!wp->w_p_list || wp->w_lcs_chars.tab1 != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001265#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +01001266 && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267#endif
1268 )
1269 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 for (;;)
1271 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001272 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273 c = *ptr;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001274 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001275 if (c == NUL)
1276 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001277 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001278 break;
1279 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001280 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001281 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001282#ifdef FEAT_VARTABS
1283 incr = tabstop_padding(vcol, ts, vts);
1284#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001285 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001286#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001287 else
1288 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001289 if (has_mbyte)
1290 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001291 // For utf-8, if the byte is >= 0x80, need to look at
1292 // further bytes to find the cell width.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001293 if (enc_utf8 && c >= 0x80)
1294 incr = utf_ptr2cells(ptr);
1295 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001296 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001297
Bram Moolenaarc667da52019-11-30 20:52:27 +01001298 // If a double-cell char doesn't fit at the end of a line
1299 // it wraps to the next line, it's like this char is three
1300 // cells wide.
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001301 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1302 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001303 {
1304 ++incr;
1305 head = 1;
1306 }
1307 }
1308 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001309 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310 }
1311
Bram Moolenaarc667da52019-11-30 20:52:27 +01001312 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001313 break;
1314
1315 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001316 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001317 }
1318 }
1319 else
1320 {
1321 for (;;)
1322 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001323 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001324 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001325 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001326 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001327 if (*ptr == NUL)
1328 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001329 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 break;
1331 }
1332
Bram Moolenaarc667da52019-11-30 20:52:27 +01001333 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 break;
1335
1336 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001337 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001338 }
1339 }
1340 if (start != NULL)
1341 *start = vcol + head;
1342 if (end != NULL)
1343 *end = vcol + incr - 1;
1344 if (cursor != NULL)
1345 {
1346 if (*ptr == TAB
1347 && (State & NORMAL)
1348 && !wp->w_p_list
1349 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001350 && !(VIsual_active
1351 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001352 )
Bram Moolenaarc667da52019-11-30 20:52:27 +01001353 *cursor = vcol + incr - 1; // cursor at end
Bram Moolenaar071d4272004-06-13 20:20:40 +00001354 else
Bram Moolenaarc667da52019-11-30 20:52:27 +01001355 *cursor = vcol + head; // cursor at start
Bram Moolenaar071d4272004-06-13 20:20:40 +00001356 }
1357}
1358
1359/*
1360 * Get virtual cursor column in the current window, pretending 'list' is off.
1361 */
1362 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001363getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001364{
1365 int list_save = curwin->w_p_list;
1366 colnr_T vcol;
1367
1368 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001369 if (posp->coladd)
1370 getvvcol(curwin, posp, NULL, &vcol, NULL);
1371 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001372 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001373 curwin->w_p_list = list_save;
1374 return vcol;
1375}
1376
Bram Moolenaar071d4272004-06-13 20:20:40 +00001377/*
1378 * Get virtual column in virtual mode.
1379 */
1380 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001381getvvcol(
1382 win_T *wp,
1383 pos_T *pos,
1384 colnr_T *start,
1385 colnr_T *cursor,
1386 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387{
1388 colnr_T col;
1389 colnr_T coladd;
1390 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001391 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001392
1393 if (virtual_active())
1394 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001395 // For virtual mode, only want one value
Bram Moolenaar071d4272004-06-13 20:20:40 +00001396 getvcol(wp, pos, &col, NULL, NULL);
1397
1398 coladd = pos->coladd;
1399 endadd = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001400 // Cannot put the cursor on part of a wide character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001401 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001402 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001403 {
1404 int c = (*mb_ptr2char)(ptr + pos->col);
1405
1406 if (c != TAB && vim_isprintc(c))
1407 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001408 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001409 if (coladd > endadd) // past end of line
Bram Moolenaara5792f52005-11-23 21:25:05 +00001410 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001411 else
1412 coladd = 0;
1413 }
1414 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001415 col += coladd;
1416 if (start != NULL)
1417 *start = col;
1418 if (cursor != NULL)
1419 *cursor = col;
1420 if (end != NULL)
1421 *end = col + endadd;
1422 }
1423 else
1424 getvcol(wp, pos, start, cursor, end);
1425}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001426
Bram Moolenaar071d4272004-06-13 20:20:40 +00001427/*
1428 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1429 * Used for Visual block mode.
1430 */
1431 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001432getvcols(
1433 win_T *wp,
1434 pos_T *pos1,
1435 pos_T *pos2,
1436 colnr_T *left,
1437 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001438{
1439 colnr_T from1, from2, to1, to2;
1440
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001441 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001442 {
1443 getvvcol(wp, pos1, &from1, NULL, &to1);
1444 getvvcol(wp, pos2, &from2, NULL, &to2);
1445 }
1446 else
1447 {
1448 getvvcol(wp, pos2, &from1, NULL, &to1);
1449 getvvcol(wp, pos1, &from2, NULL, &to2);
1450 }
1451 if (from2 < from1)
1452 *left = from2;
1453 else
1454 *left = from1;
1455 if (to2 > to1)
1456 {
1457 if (*p_sel == 'e' && from2 - 1 >= to1)
1458 *right = from2 - 1;
1459 else
1460 *right = to2;
1461 }
1462 else
1463 *right = to1;
1464}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001465
1466/*
Bram Moolenaarce7eada2021-12-15 15:41:44 +00001467 * Skip over ' ' and '\t'.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001468 */
1469 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001470skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001471{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001472 char_u *p = q;
1473
Bram Moolenaarce7eada2021-12-15 15:41:44 +00001474 while (VIM_ISWHITE(*p))
1475 ++p;
1476 return p;
1477}
1478
1479/*
1480 * skip over ' ', '\t' and '\n'.
1481 */
1482 char_u *
1483skipwhite_and_nl(char_u *q)
1484{
1485 char_u *p = q;
1486
1487 while (VIM_ISWHITE(*p) || *p == NL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001488 ++p;
1489 return p;
1490}
1491
1492/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001493 * getwhitecols: return the number of whitespace
1494 * columns (bytes) at the start of a given line
1495 */
1496 int
1497getwhitecols_curline()
1498{
1499 return getwhitecols(ml_get_curline());
1500}
1501
1502 int
1503getwhitecols(char_u *p)
1504{
1505 return skipwhite(p) - p;
1506}
1507
1508/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001509 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001510 */
1511 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001512skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001513{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001514 char_u *p = q;
1515
Bram Moolenaarc667da52019-11-30 20:52:27 +01001516 while (VIM_ISDIGIT(*p)) // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001517 ++p;
1518 return p;
1519}
1520
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001521#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001522/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001523 * skip over binary digits
1524 */
1525 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001526skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001527{
1528 char_u *p = q;
1529
Bram Moolenaarc667da52019-11-30 20:52:27 +01001530 while (vim_isbdigit(*p)) // skip to next non-digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001531 ++p;
1532 return p;
1533}
1534
1535/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001536 * skip over digits and hex characters
1537 */
1538 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001539skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001540{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001541 char_u *p = q;
1542
Bram Moolenaarc667da52019-11-30 20:52:27 +01001543 while (vim_isxdigit(*p)) // skip to next non-digit
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001544 ++p;
1545 return p;
1546}
1547#endif
1548
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001549/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001550 * skip to bin digit (or NUL after the string)
1551 */
1552 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001553skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001554{
1555 char_u *p = q;
1556
Bram Moolenaarc667da52019-11-30 20:52:27 +01001557 while (*p != NUL && !vim_isbdigit(*p)) // skip to next digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001558 ++p;
1559 return p;
1560}
1561
1562/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001563 * skip to digit (or NUL after the string)
1564 */
1565 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001566skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001567{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001568 char_u *p = q;
1569
Bram Moolenaarc667da52019-11-30 20:52:27 +01001570 while (*p != NUL && !VIM_ISDIGIT(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001571 ++p;
1572 return p;
1573}
1574
1575/*
1576 * skip to hex character (or NUL after the string)
1577 */
1578 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001579skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001580{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001581 char_u *p = q;
1582
Bram Moolenaarc667da52019-11-30 20:52:27 +01001583 while (*p != NUL && !vim_isxdigit(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001584 ++p;
1585 return p;
1586}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001587
Bram Moolenaar071d4272004-06-13 20:20:40 +00001588/*
1589 * Variant of isdigit() that can handle characters > 0x100.
1590 * We don't use isdigit() here, because on some systems it also considers
1591 * superscript 1 to be a digit.
1592 * Use the VIM_ISDIGIT() macro for simple arguments.
1593 */
1594 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001595vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001596{
1597 return (c >= '0' && c <= '9');
1598}
1599
1600/*
1601 * Variant of isxdigit() that can handle characters > 0x100.
1602 * We don't use isxdigit() here, because on some systems it also considers
1603 * superscript 1 to be a digit.
1604 */
1605 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001606vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001607{
1608 return (c >= '0' && c <= '9')
1609 || (c >= 'a' && c <= 'f')
1610 || (c >= 'A' && c <= 'F');
1611}
1612
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001613/*
1614 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1615 * characters > 0x100.
1616 */
1617 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001618vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001619{
1620 return (c == '0' || c == '1');
1621}
1622
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001623 static int
1624vim_isodigit(int c)
1625{
1626 return (c >= '0' && c <= '7');
1627}
1628
Bram Moolenaar78622822005-08-23 21:00:13 +00001629/*
1630 * Vim's own character class functions. These exist because many library
1631 * islower()/toupper() etc. do not work properly: they crash when used with
1632 * invalid values or can't handle latin1 when the locale is C.
1633 * Speed is most important here.
1634 */
1635#define LATIN1LOWER 'l'
1636#define LATIN1UPPER 'U'
1637
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001638static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001639static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1640static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001641
1642 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001643vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001644{
1645 if (c <= '@')
1646 return FALSE;
1647 if (c >= 0x80)
1648 {
1649 if (enc_utf8)
1650 return utf_islower(c);
1651 if (c >= 0x100)
1652 {
1653#ifdef HAVE_ISWLOWER
1654 if (has_mbyte)
1655 return iswlower(c);
1656#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001657 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001658 return FALSE;
1659 }
1660 if (enc_latin1like)
1661 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1662 }
1663 return islower(c);
1664}
1665
1666 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001667vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001668{
1669 if (c <= '@')
1670 return FALSE;
1671 if (c >= 0x80)
1672 {
1673 if (enc_utf8)
1674 return utf_isupper(c);
1675 if (c >= 0x100)
1676 {
1677#ifdef HAVE_ISWUPPER
1678 if (has_mbyte)
1679 return iswupper(c);
1680#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001681 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001682 return FALSE;
1683 }
1684 if (enc_latin1like)
1685 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1686 }
1687 return isupper(c);
1688}
1689
1690 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001691vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001692{
1693 if (c <= '@')
1694 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001695 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001696 {
1697 if (enc_utf8)
1698 return utf_toupper(c);
1699 if (c >= 0x100)
1700 {
1701#ifdef HAVE_TOWUPPER
1702 if (has_mbyte)
1703 return towupper(c);
1704#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001705 // toupper() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001706 return c;
1707 }
1708 if (enc_latin1like)
1709 return latin1upper[c];
1710 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001711 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1712 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001713 return TOUPPER_LOC(c);
1714}
1715
1716 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001717vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001718{
1719 if (c <= '@')
1720 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001721 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001722 {
1723 if (enc_utf8)
1724 return utf_tolower(c);
1725 if (c >= 0x100)
1726 {
1727#ifdef HAVE_TOWLOWER
1728 if (has_mbyte)
1729 return towlower(c);
1730#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001731 // tolower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001732 return c;
1733 }
1734 if (enc_latin1like)
1735 return latin1lower[c];
1736 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001737 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1738 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001739 return TOLOWER_LOC(c);
1740}
Bram Moolenaar78622822005-08-23 21:00:13 +00001741
Bram Moolenaar071d4272004-06-13 20:20:40 +00001742/*
1743 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1744 */
1745 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001746skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001747{
1748 while (*p != ' ' && *p != '\t' && *p != NUL)
1749 ++p;
1750 return p;
1751}
1752
Bram Moolenaar071d4272004-06-13 20:20:40 +00001753/*
1754 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1755 */
1756 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001757skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001758{
1759 while (*p != ' ' && *p != '\t' && *p != NUL)
1760 {
1761 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1762 ++p;
1763 ++p;
1764 }
1765 return p;
1766}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001767
1768/*
Bram Moolenaaraf377e32021-11-29 12:12:43 +00001769 * Get a number from a string and skip over it.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001770 * Note: the argument is a pointer to a char_u pointer!
1771 */
1772 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001773getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001774{
1775 char_u *p;
1776 long retval;
1777
1778 p = *pp;
1779 retval = atol((char *)p);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001780 if (*p == '-') // skip negative sign
Bram Moolenaar071d4272004-06-13 20:20:40 +00001781 ++p;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001782 p = skipdigits(p); // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001783 *pp = p;
1784 return retval;
1785}
1786
1787/*
Bram Moolenaaraf377e32021-11-29 12:12:43 +00001788 * Like getdigits() but allow for embedded single quotes.
1789 */
1790 long
1791getdigits_quoted(char_u **pp)
1792{
1793 char_u *p = *pp;
1794 long retval = 0;
1795
1796 if (*p == '-')
1797 ++p;
1798 while (VIM_ISDIGIT(*p))
1799 {
1800 if (retval >= LONG_MAX / 10 - 10)
1801 retval = LONG_MAX;
1802 else
1803 retval = retval * 10 - '0' + *p;
1804 ++p;
1805 if (in_vim9script() && *p == '\'' && VIM_ISDIGIT(p[1]))
1806 ++p;
1807 }
1808 if (**pp == '-')
1809 {
1810 if (retval == LONG_MAX)
1811 retval = LONG_MIN;
1812 else
1813 retval = -retval;
1814 }
1815 *pp = p;
1816 return retval;
1817}
1818
1819/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001820 * Return TRUE if "lbuf" is empty or only contains blanks.
1821 */
1822 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001823vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001824{
1825 char_u *p;
1826
1827 p = skipwhite(lbuf);
1828 return (*p == NUL || *p == '\r' || *p == '\n');
1829}
1830
1831/*
1832 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001833 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1834 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001835 * 0 decimal
1836 * '0' octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001837 * 'O' octal
1838 * 'o' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001839 * 'B' bin
1840 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841 * 'X' hex
1842 * 'x' hex
1843 * If "len" is not NULL, the length of the number in characters is returned.
1844 * If "nptr" is not NULL, the signed result is returned in it.
1845 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001846 * If "what" contains STR2NR_BIN recognize binary numbers
1847 * If "what" contains STR2NR_OCT recognize octal numbers
1848 * If "what" contains STR2NR_HEX recognize hex numbers
1849 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001850 * If "what" contains STR2NR_QUOTE ignore embedded single quotes
Bram Moolenaarce157752017-10-28 16:07:33 +02001851 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001852 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001853 */
1854 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001855vim_str2nr(
1856 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001857 int *prep, // return: type of number 0 = decimal, 'x'
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001858 // or 'X' is hex, '0', 'o' or 'O' is octal,
1859 // 'b' or 'B' is bin
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001860 int *len, // return: detected length of number
1861 int what, // what numbers to recognize
1862 varnumber_T *nptr, // return: signed result
1863 uvarnumber_T *unptr, // return: unsigned result
1864 int maxlen, // max length of string to check
1865 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001866{
1867 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001868 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001869 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001870 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001871 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001872
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001873 if (len != NULL)
1874 *len = 0;
1875
Bram Moolenaar071d4272004-06-13 20:20:40 +00001876 if (ptr[0] == '-')
1877 {
1878 negative = TRUE;
1879 ++ptr;
1880 }
1881
Bram Moolenaarc667da52019-11-30 20:52:27 +01001882 // Recognize hex, octal, and bin.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001883 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1884 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001885 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001886 pre = ptr[1];
1887 if ((what & STR2NR_HEX)
1888 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1889 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001890 // hexadecimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001891 ptr += 2;
1892 else if ((what & STR2NR_BIN)
1893 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1894 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001895 // binary
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001896 ptr += 2;
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001897 else if ((what & STR2NR_OOCT)
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001898 && (pre == 'O' || pre == 'o') && vim_isodigit(ptr[2])
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001899 && (maxlen == 0 || maxlen > 2))
1900 // octal with prefix "0o"
1901 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001902 else
1903 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001904 // decimal or octal, default is decimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001905 pre = 0;
1906 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001907 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001908 // Don't interpret "0", "08" or "0129" as octal.
Bram Moolenaarce157752017-10-28 16:07:33 +02001909 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001910 {
1911 if (ptr[n] > '7')
1912 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001913 pre = 0; // can't be octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001914 break;
1915 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001916 pre = '0'; // assume octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001917 }
1918 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001919 }
1920 }
1921
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001922 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001923 n = 1;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001924 if (pre == 'B' || pre == 'b'
1925 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001926 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001927 // bin
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001928 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001929 n += 2; // skip over "0b"
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001930 while ('0' <= *ptr && *ptr <= '1')
1931 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001932 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001933 if (un <= UVARNUM_MAX / 2)
1934 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001935 else
1936 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001937 ++ptr;
1938 if (n++ == maxlen)
1939 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001940 if ((what & STR2NR_QUOTE) && *ptr == '\''
1941 && '0' <= ptr[1] && ptr[1] <= '1')
1942 {
1943 ++ptr;
1944 if (n++ == maxlen)
1945 break;
1946 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001947 }
1948 }
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001949 else if (pre == 'O' || pre == 'o' ||
1950 pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001951 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001952 // octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001953 if (pre != 0 && pre != '0')
1954 n += 2; // skip over "0o"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001955 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001956 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001957 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001958 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001959 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1960 else
1961 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001962 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001963 if (n++ == maxlen)
1964 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001965 if ((what & STR2NR_QUOTE) && *ptr == '\''
1966 && '0' <= ptr[1] && ptr[1] <= '7')
1967 {
1968 ++ptr;
1969 if (n++ == maxlen)
1970 break;
1971 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001972 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001973 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001974 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001975 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001976 // hex
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001977 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001978 n += 2; // skip over "0x"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001979 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001980 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001981 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001982 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001983 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1984 else
1985 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001986 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001987 if (n++ == maxlen)
1988 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001989 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1990 {
1991 ++ptr;
1992 if (n++ == maxlen)
1993 break;
1994 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001995 }
1996 }
1997 else
1998 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001999 // decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00002000 while (VIM_ISDIGIT(*ptr))
2001 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02002002 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
2003
Bram Moolenaarc667da52019-11-30 20:52:27 +01002004 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02002005 if (un < UVARNUM_MAX / 10
2006 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
2007 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002008 else
2009 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002010 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02002011 if (n++ == maxlen)
2012 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02002013 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
2014 {
2015 ++ptr;
2016 if (n++ == maxlen)
2017 break;
2018 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002019 }
2020 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02002021
Bram Moolenaar4b96df52020-01-26 22:00:26 +01002022 // Check for an alphanumeric character immediately following, that is
Bram Moolenaar16e9b852019-05-19 19:59:35 +02002023 // most likely a typo.
2024 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
2025 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002026
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01002027 if (prep != NULL)
2028 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002029 if (len != NULL)
2030 *len = (int)(ptr - start);
2031 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002032 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01002033 if (negative) // account for leading '-' for decimal numbers
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002034 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01002035 // avoid ubsan error for overflow
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002036 if (un > VARNUM_MAX)
2037 *nptr = VARNUM_MIN;
2038 else
2039 *nptr = -(varnumber_T)un;
2040 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002041 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002042 {
2043 if (un > VARNUM_MAX)
2044 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02002045 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01002046 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00002047 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002048 if (unptr != NULL)
2049 *unptr = un;
2050}
2051
2052/*
2053 * Return the value of a single hex character.
2054 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2055 */
2056 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002057hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002058{
2059 if (c >= 'a' && c <= 'f')
2060 return c - 'a' + 10;
2061 if (c >= 'A' && c <= 'F')
2062 return c - 'A' + 10;
2063 return c - '0';
2064}
2065
Bram Moolenaar071d4272004-06-13 20:20:40 +00002066/*
2067 * Convert two hex characters to a byte.
2068 * Return -1 if one of the characters is not hex.
2069 */
2070 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002071hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002072{
2073 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2074 return -1;
2075 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2076}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002077
2078/*
2079 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002080 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00002081 * backslash is not a normal file name character.
2082 * '$' is a valid file name character, we don't remove the backslash before
2083 * it. This means it is not possible to use an environment variable after a
2084 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2085 * Although "\ name" is valid, the backslash in "Program\ files" must be
2086 * removed. Assume a file name doesn't start with a space.
2087 * For multi-byte names, never remove a backslash before a non-ascii
2088 * character, assume that all multi-byte characters are valid file name
2089 * characters.
2090 */
2091 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002092rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002093{
2094#ifdef BACKSLASH_IN_FILENAME
2095 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002096 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002097 && (str[1] == ' '
2098 || (str[1] != NUL
2099 && str[1] != '*'
2100 && str[1] != '?'
2101 && !vim_isfilec(str[1]))));
2102#else
2103 return (str[0] == '\\' && str[1] != NUL);
2104#endif
2105}
2106
2107/*
2108 * Halve the number of backslashes in a file name argument.
2109 * For MS-DOS we only do this if the character after the backslash
2110 * is not a normal file character.
2111 */
2112 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002113backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002114{
2115 for ( ; *p; ++p)
2116 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002117 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002118}
2119
2120/*
2121 * backslash_halve() plus save the result in allocated memory.
Bram Moolenaare2c453d2019-08-21 14:37:09 +02002122 * However, returns "p" when out of memory.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002123 */
2124 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002125backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002126{
2127 char_u *res;
2128
2129 res = vim_strsave(p);
2130 if (res == NULL)
2131 return p;
2132 backslash_halve(res);
2133 return res;
2134}
2135
2136#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2137/*
2138 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2139 * The first 64 entries have been added to map control characters defined in
2140 * ascii.h
2141 */
2142static char_u ebcdic2ascii_tab[256] =
2143{
2144 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2145 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2146 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2147 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2148 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2149 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2150 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2151 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2152 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2153 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2154 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2155 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2156 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2157 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2158 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2159 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2160 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2161 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2162 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2163 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2164 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2165 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2166 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2167 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2168 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2169 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2170 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2171 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2172 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2173 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2174 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2175 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2176};
2177
2178/*
2179 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2180 * wanting 7-bit ASCII characters out the other end.
2181 */
2182 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002183ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002184{
2185 int i;
2186
2187 for (i = 0; i < len; i++)
2188 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2189}
2190#endif