blob: 2c46f7ad04c0578b933c692dd5d3c5b9799f8f36 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
Bram Moolenaarc667da52019-11-30 20:52:27 +010013# include <wchar.h> // for towupper() and towlower()
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
Bram Moolenaarc667da52019-11-30 20:52:27 +010021// b_chartab[] is an array of 32 bytes, each bit representing one of the
22// characters 0-255.
Bram Moolenaar071d4272004-06-13 20:20:40 +000023#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaarc667da52019-11-30 20:52:27 +010027// table used below, see init_chartab() for an explanation
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010028static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
Bram Moolenaarc667da52019-11-30 20:52:27 +010033#define CT_CELL_MASK 0x07 // mask: nr of display cells (1, 2 or 4)
34#define CT_PRINT_CHAR 0x10 // flag: set for printable chars
35#define CT_ID_CHAR 0x20 // flag: set for ID chars
36#define CT_FNAME_CHAR 0x40 // flag: set for file name chars
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010037
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
Bram Moolenaarc667da52019-11-30 20:52:27 +010075 int global) // FALSE: only set buf->b_chartab[]
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100105 // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100108 // euc-jp characters starting with 0x8e are single width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100111 // other double-byte chars can be printable AND double-width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100115 // the rest is unprintable by default
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaarc667da52019-11-30 20:52:27 +0100119 // Assume that every multi-byte char is a filename character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
Bram Moolenaara80faa82020-04-12 19:37:17 +0200130 CLEAR_FIELD(buf->b_chartab);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100134 // double-byte characters are probably word characters
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
Bram Moolenaarc667da52019-11-30 20:52:27 +0100147 // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 // options Each option is a list of characters, character numbers or
149 // ranges, separated by commas, e.g.: "200-210,x,#-178,-"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000150 for (i = global ? 0 : 3; i <= 3; ++i)
151 {
152 if (i == 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100153 p = p_isi; // first round: 'isident'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154 else if (i == 1)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100155 p = p_isp; // second round: 'isprint'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000156 else if (i == 2)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100157 p = p_isf; // third round: 'isfname'
158 else // i == 3
159 p = buf->b_p_isk; // fourth round: 'iskeyword'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000160
161 while (*p)
162 {
163 tilde = FALSE;
164 do_isalpha = FALSE;
165 if (*p == '^' && p[1] != NUL)
166 {
167 tilde = TRUE;
168 ++p;
169 }
170 if (VIM_ISDIGIT(*p))
171 c = getdigits(&p);
Dominique Pelle4781d6f2021-05-18 21:46:31 +0200172 else if (has_mbyte)
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000173 c = mb_ptr2char_adv(&p);
174 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000175 c = *p++;
176 c2 = -1;
177 if (*p == '-' && p[1] != NUL)
178 {
179 ++p;
180 if (VIM_ISDIGIT(*p))
181 c2 = getdigits(&p);
Dominique Pelle4781d6f2021-05-18 21:46:31 +0200182 else if (has_mbyte)
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000183 c2 = mb_ptr2char_adv(&p);
184 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185 c2 = *p++;
186 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000187 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000188 || !(*p == NUL || *p == ','))
189 return FAIL;
190
Bram Moolenaarc667da52019-11-30 20:52:27 +0100191 if (c2 == -1) // not a range
Bram Moolenaar071d4272004-06-13 20:20:40 +0000192 {
193 /*
194 * A single '@' (not "@-@"):
195 * Decide on letters being ID/printable/keyword chars with
196 * standard function isalpha(). This takes care of locale for
197 * single-byte characters).
198 */
199 if (c == '@')
200 {
201 do_isalpha = TRUE;
202 c = 1;
203 c2 = 255;
204 }
205 else
206 c2 = c;
207 }
208 while (c <= c2)
209 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100210 // Use the MB_ functions here, because isalpha() doesn't
211 // work properly when 'encoding' is "latin1" and the locale is
212 // "C".
Bram Moolenaar14184a32019-02-16 15:10:30 +0100213 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000214 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100215 if (i == 0) // (re)set ID flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000216 {
217 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100218 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000219 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100220 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000221 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100222 else if (i == 1) // (re)set printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000223 {
224 if ((c < ' '
225#ifndef EBCDIC
226 || c > '~'
227#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100228 // For double-byte we keep the cell width, so
229 // that we can detect it from the first byte.
230 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000231 {
232 if (tilde)
233 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100234 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000235 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100236 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000237 }
238 else
239 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100240 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
241 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000242 }
243 }
244 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100245 else if (i == 2) // (re)set fname flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000246 {
247 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100248 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100252 else // i == 3 (re)set keyword flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 {
254 if (tilde)
255 RESET_CHARTAB(buf, c);
256 else
257 SET_CHARTAB(buf, c);
258 }
259 }
260 ++c;
261 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100262
263 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000264 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100265 if (c == ',' && *p == NUL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100266 // Trailing comma is not allowed.
Bram Moolenaar309379f2013-02-06 16:26:26 +0100267 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268 }
269 }
270 chartab_initialized = TRUE;
271 return OK;
272}
273
274/*
275 * Translate any special characters in buf[bufsize] in-place.
276 * The result is a string with only printable characters, but if there is not
277 * enough room, not all characters will be translated.
278 */
279 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100280trans_characters(
281 char_u *buf,
282 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100284 int len; // length of string needing translation
285 int room; // room in buffer after string
286 char_u *trs; // translated character
287 int trs_len; // length of trs[]
Bram Moolenaar071d4272004-06-13 20:20:40 +0000288
289 len = (int)STRLEN(buf);
290 room = bufsize - len;
291 while (*buf != 0)
292 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100293 // Assume a multi-byte character doesn't need translation.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000294 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000295 len -= trs_len;
296 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297 {
298 trs = transchar_byte(*buf);
299 trs_len = (int)STRLEN(trs);
300 if (trs_len > 1)
301 {
302 room -= trs_len - 1;
303 if (room <= 0)
304 return;
305 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
306 }
307 mch_memmove(buf, trs, (size_t)trs_len);
308 --len;
309 }
310 buf += trs_len;
311 }
312}
313
Bram Moolenaar071d4272004-06-13 20:20:40 +0000314/*
315 * Translate a string into allocated memory, replacing special chars with
316 * printable chars. Returns NULL when out of memory.
317 */
318 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100319transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320{
321 char_u *res;
322 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323 int l, len, c;
324 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 if (has_mbyte)
327 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100328 // Compute the length of the result, taking account of unprintable
329 // multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000330 len = 0;
331 p = s;
332 while (*p != NUL)
333 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000334 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000335 {
336 c = (*mb_ptr2char)(p);
337 p += l;
338 if (vim_isprintc(c))
339 len += l;
340 else
341 {
342 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000343 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344 }
345 }
346 else
347 {
348 l = byte2cells(*p++);
349 if (l > 0)
350 len += l;
351 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100352 len += 4; // illegal byte sequence
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353 }
354 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200355 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356 }
357 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200358 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359 if (res != NULL)
360 {
361 *res = NUL;
362 p = s;
363 while (*p != NUL)
364 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000365 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000366 {
367 c = (*mb_ptr2char)(p);
368 if (vim_isprintc(c))
Bram Moolenaarc667da52019-11-30 20:52:27 +0100369 STRNCAT(res, p, l); // append printable multi-byte char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370 else
371 transchar_hex(res + STRLEN(res), c);
372 p += l;
373 }
374 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000375 STRCAT(res, transchar_byte(*p++));
376 }
377 }
378 return res;
379}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380
Bram Moolenaar071d4272004-06-13 20:20:40 +0000381/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000382 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
383 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000384 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
385 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000386 */
387 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100388str_foldcase(
389 char_u *str,
390 int orglen,
391 char_u *buf,
392 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393{
394 garray_T ga;
395 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000396 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000397
398#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
399#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000400#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
401#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402
Bram Moolenaarc667da52019-11-30 20:52:27 +0100403 // Copy "str" into "buf" or allocated memory, unmodified.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000404 if (buf == NULL)
405 {
406 ga_init2(&ga, 1, 10);
407 if (ga_grow(&ga, len + 1) == FAIL)
408 return NULL;
409 mch_memmove(ga.ga_data, str, (size_t)len);
410 ga.ga_len = len;
411 }
412 else
413 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100414 if (len >= buflen) // Ugly!
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000415 len = buflen - 1;
416 mch_memmove(buf, str, (size_t)len);
417 }
418 if (buf == NULL)
419 GA_CHAR(len) = NUL;
420 else
421 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422
Bram Moolenaarc667da52019-11-30 20:52:27 +0100423 // Make each character lower case.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000425 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000427 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428 {
429 if (enc_utf8)
430 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000431 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100432 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000433 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434
Bram Moolenaarc667da52019-11-30 20:52:27 +0100435 // Only replace the character when it is not an invalid
436 // sequence (ASCII character or more than one byte) and
437 // utf_tolower() doesn't return the original character.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100438 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000439 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100440 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441
Bram Moolenaarc667da52019-11-30 20:52:27 +0100442 // If the byte length changes need to shift the following
443 // characters forward or backward.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100444 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000445 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100446 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000447 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100448 if (buf == NULL
449 ? ga_grow(&ga, nlen - olen + 1) == FAIL
450 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100452 // out of memory, keep old char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100454 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000456 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100457 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000459 if (buf == NULL)
460 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100461 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
462 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000463 }
464 else
465 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100466 STRMOVE(buf + i + nlen, buf + i + olen);
467 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000468 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000469 }
470 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000471 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000472 }
473 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100474 // skip to next multi-byte char
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000475 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000476 }
477 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000479 if (buf == NULL)
480 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
481 else
482 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000483 ++i;
484 }
485 }
486
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000487 if (buf == NULL)
488 return (char_u *)ga.ga_data;
489 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000491
492/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100493 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100495 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 * Does NOT work for multi-byte characters, c must be <= 255.
497 * Also doesn't work for the first byte of a multi-byte, "c" must be a
498 * character!
499 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200500static char_u transchar_charbuf[7];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000501
502 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100503transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504{
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200505 return transchar_buf(curbuf, c);
506}
507
508 char_u *
509transchar_buf(buf_T *buf, int c)
510{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511 int i;
512
513 i = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100514 if (IS_SPECIAL(c)) // special key code, display as ~@ char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000515 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200516 transchar_charbuf[0] = '~';
517 transchar_charbuf[1] = '@';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000518 i = 2;
519 c = K_SECOND(c);
520 }
521
522 if ((!chartab_initialized && (
523#ifdef EBCDIC
524 (c >= 64 && c < 255)
525#else
526 (c >= ' ' && c <= '~')
527#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000528 )) || (c < 256 && vim_isprintc_strict(c)))
529 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100530 // printable character
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200531 transchar_charbuf[i] = c;
532 transchar_charbuf[i + 1] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000533 }
534 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200535 transchar_nonprint(buf, transchar_charbuf + i, c);
536 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000537}
538
Bram Moolenaar071d4272004-06-13 20:20:40 +0000539/*
540 * Like transchar(), but called with a byte instead of a character. Checks
541 * for an illegal UTF-8 byte.
542 */
543 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100544transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545{
546 if (enc_utf8 && c >= 0x80)
547 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200548 transchar_nonprint(curbuf, transchar_charbuf, c);
549 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550 }
551 return transchar(c);
552}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000553
554/*
555 * Convert non-printable character to two or more printable characters in
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200556 * "buf[]". "charbuf" needs to be able to hold five bytes.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000557 * Does NOT work for multi-byte characters, c must be <= 255.
558 */
559 void
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200560transchar_nonprint(buf_T *buf, char_u *charbuf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000561{
562 if (c == NL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100563 c = NUL; // we use newline in place of a NUL
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200564 else if (c == CAR && get_fileformat(buf) == EOL_MAC)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100565 c = NL; // we use CR in place of NL in this case
Bram Moolenaar071d4272004-06-13 20:20:40 +0000566
Bram Moolenaarc667da52019-11-30 20:52:27 +0100567 if (dy_flags & DY_UHEX) // 'display' has "uhex"
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200568 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000569
570#ifdef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100571 // For EBCDIC only the characters 0-63 and 255 are not printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000572 else if (CtrlChar(c) != 0 || c == DEL)
573#else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100574 else if (c <= 0x7f) // 0x00 - 0x1f and 0x7f
Bram Moolenaar071d4272004-06-13 20:20:40 +0000575#endif
576 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200577 charbuf[0] = '^';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000578#ifdef EBCDIC
579 if (c == DEL)
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200580 charbuf[1] = '?'; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000581 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200582 charbuf[1] = CtrlChar(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000583#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200584 charbuf[1] = c ^ 0x40; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000585#endif
586
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200587 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000588 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000589 else if (enc_utf8 && c >= 0x80)
590 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200591 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000592 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000593#ifndef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100594 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) // 0xa0 - 0xfe
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200596 charbuf[0] = '|';
597 charbuf[1] = c - 0x80;
598 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000599 }
600#else
601 else if (c < 64)
602 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200603 charbuf[0] = '~';
604 charbuf[1] = MetaChar(c);
605 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000606 }
607#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +0100608 else // 0x80 - 0x9f and 0xff
Bram Moolenaar071d4272004-06-13 20:20:40 +0000609 {
610 /*
611 * TODO: EBCDIC I don't know what to do with this chars, so I display
612 * them as '~?' for now
613 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200614 charbuf[0] = '~';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000615#ifdef EBCDIC
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200616 charbuf[1] = '?'; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000617#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200618 charbuf[1] = (c - 0x80) ^ 0x40; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000619#endif
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200620 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000621 }
622}
623
624 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100625transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000626{
627 int i = 0;
628
629 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630 if (c > 255)
631 {
632 buf[++i] = nr2hex((unsigned)c >> 12);
633 buf[++i] = nr2hex((unsigned)c >> 8);
634 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000635 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000636 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = '>';
638 buf[++i] = NUL;
639}
640
641/*
642 * Convert the lower 4 bits of byte "c" to its hex character.
643 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
644 * function key 1.
645 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000646 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100647nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648{
649 if ((c & 0xf) <= 9)
650 return (c & 0xf) + '0';
651 return (c & 0xf) - 10 + 'a';
652}
653
654/*
655 * Return number of display cells occupied by byte "b".
656 * Caller must make sure 0 <= b <= 255.
657 * For multi-byte mode "b" must be the first byte of a character.
658 * A TAB is counted as two cells: "^I".
659 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
660 * cells depends on further bytes.
661 */
662 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100663byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 if (enc_utf8 && b >= 0x80)
666 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100667 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000668}
669
670/*
671 * Return number of display cells occupied by character "c".
672 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
673 * A TAB is counted as two cells: "^I" or four: "<09>".
674 */
675 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100676char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677{
678 if (IS_SPECIAL(c))
679 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680 if (c >= 0x80)
681 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100682 // UTF-8: above 0x80 need to check the value
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683 if (enc_utf8)
684 return utf_char2cells(c);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100685 // DBCS: double-byte means double-width, except for euc-jp with first
686 // byte 0x8e
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687 if (enc_dbcs != 0 && c >= 0x100)
688 {
689 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
690 return 1;
691 return 2;
692 }
693 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100694 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695}
696
697/*
698 * Return number of display cells occupied by character at "*p".
699 * A TAB is counted as two cells: "^I" or four: "<09>".
700 */
701 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100702ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100704 // For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705 if (enc_utf8 && *p >= 0x80)
706 return utf_ptr2cells(p);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100707 // For DBCS we can tell the cell count from the first byte.
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100708 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000709}
710
711/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100712 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713 * counting TABs as two characters: "^I".
714 */
715 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100716vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717{
718 return vim_strnsize(s, (int)MAXCOL);
719}
720
721/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100722 * Return the number of character cells string "s[len]" will take on the
723 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000724 */
725 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100726vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727{
728 int size = 0;
729
730 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731 if (has_mbyte)
732 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000733 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734
735 size += ptr2cells(s);
736 s += l;
737 len -= l - 1;
738 }
739 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000740 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100741
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 return size;
743}
744
745/*
746 * Return the number of characters 'c' will take on the screen, taking
747 * into account the size of a tab.
748 * Use a define to make it fast, this is used very often!!!
749 * Also see getvcol() below.
750 */
751
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200752#ifdef FEAT_VARTABS
753# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100754 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200755 { \
756 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
757 } \
758 else \
759 return ptr2cells(p);
760#else
761# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100762 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763 { \
764 int ts; \
765 ts = (buf)->b_p_ts; \
766 return (int)(ts - (col % ts)); \
767 } \
768 else \
769 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200770#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000771
Bram Moolenaar071d4272004-06-13 20:20:40 +0000772 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100773chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774{
775 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
776}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777
778#ifdef FEAT_LINEBREAK
779 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100780win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000781{
782 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
783}
784#endif
785
786/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200787 * Return the number of characters the string 's' will take on the screen,
788 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789 */
790 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100791linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000792{
Bram Moolenaardc536092010-07-18 15:45:49 +0200793 return linetabsize_col(0, s);
794}
795
796/*
797 * Like linetabsize(), but starting at column "startcol".
798 */
799 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100800linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200801{
802 colnr_T col = startcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100803 char_u *line = s; // pointer to start of line, for breakindent
Bram Moolenaar071d4272004-06-13 20:20:40 +0000804
805 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200806 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 return (int)col;
808}
809
810/*
811 * Like linetabsize(), but for a given window instead of the current one.
812 */
813 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100814win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815{
816 colnr_T col = 0;
817 char_u *s;
818
Bram Moolenaar597a4222014-06-25 14:39:50 +0200819 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100820 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822 return (int)col;
823}
824
825/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000826 * Return TRUE if 'c' is a normal identifier character:
827 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828 */
829 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100830vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100832 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833}
834
835/*
Bram Moolenaare3d1f4c2021-04-06 20:21:59 +0200836 * Like vim_isIDc() but not using the 'isident' option: letters, numbers and
837 * underscore.
838 */
839 int
840vim_isNormalIDc(int c)
841{
842 return ASCII_ISALNUM(c) || c == '_';
843}
844
845/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100847 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000848 * For multi-byte characters mb_get_class() is used (builtin rules).
849 */
850 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100851vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000852{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100853 return vim_iswordc_buf(c, curbuf);
854}
855
856 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100857vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100858{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000859 if (c >= 0x100)
860 {
861 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000862 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000863 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100864 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100865 return FALSE;
866 }
867 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000868}
869
870/*
871 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
872 */
873 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100874vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100876 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877}
878
Bram Moolenaar071d4272004-06-13 20:20:40 +0000879 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100880vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000881{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100882 int c = *p;
883
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100884 if (has_mbyte && MB_BYTE2LEN(c) > 1)
885 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100886 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000888
889/*
890 * return TRUE if 'c' is a valid file-name character
891 * Assume characters above 0x100 are valid (multi-byte).
892 */
893 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100894vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000895{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100896 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000897}
898
899/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000900 * return TRUE if 'c' is a valid file-name character or a wildcard character
901 * Assume characters above 0x100 are valid (multi-byte).
902 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
903 * returns false.
904 */
905 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100906vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000907{
908 char_u buf[2];
909
910 buf[0] = (char_u)c;
911 buf[1] = NUL;
912 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
913}
914
915/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200916 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000917 * Assume characters above 0x100 are printable (multi-byte), except for
918 * Unicode.
919 */
920 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100921vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000922{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000923 if (enc_utf8 && c >= 0x100)
924 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100925 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000926}
927
928/*
929 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
930 * byte of a double-byte character.
931 */
932 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100933vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000935 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
936 return FALSE;
937 if (enc_utf8 && c >= 0x100)
938 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100939 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000940}
941
942/*
943 * like chartabsize(), but also check for line breaks on the screen
944 */
945 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100946lbr_chartabsize(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100947 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100948 unsigned char *s,
949 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000950{
951#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +0100952 if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL
953 && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000954 {
955#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000956 if (curwin->w_p_wrap)
957 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
959#ifdef FEAT_LINEBREAK
960 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200961 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000962#endif
963}
964
965/*
966 * Call lbr_chartabsize() and advance the pointer.
967 */
968 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100969lbr_chartabsize_adv(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100970 char_u *line, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100971 char_u **s,
972 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000973{
974 int retval;
975
Bram Moolenaar597a4222014-06-25 14:39:50 +0200976 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100977 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000978 return retval;
979}
980
981/*
982 * This function is used very often, keep it fast!!!!
983 *
984 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
985 * string at start of line. Warning: *headp is only set if it's a non-zero
986 * value, init to 0 before calling.
987 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000988 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100989win_lbr_chartabsize(
990 win_T *wp,
Bram Moolenaarc667da52019-11-30 20:52:27 +0100991 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100992 char_u *s,
993 colnr_T col,
994 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995{
996#ifdef FEAT_LINEBREAK
997 int c;
998 int size;
999 colnr_T col2;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001000 colnr_T col_adj = 0; // col + screen size of tab
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001 colnr_T colmax;
1002 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001003 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004 int numberextra;
1005 char_u *ps;
1006 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001007 int n;
Bram Moolenaaree857022019-11-09 23:26:40 +01001008 char_u *sbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001009
1010 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001011 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001012 */
Bram Moolenaaree857022019-11-09 23:26:40 +01001013 if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001014#endif
1015 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001016 if (wp->w_p_wrap)
1017 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1019 }
1020
1021#ifdef FEAT_LINEBREAK
1022 /*
1023 * First get normal size, without 'linebreak'
1024 */
1025 size = win_chartabsize(wp, s, col);
1026 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001027 if (tab_corr)
1028 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001029
1030 /*
1031 * If 'linebreak' set check at a blank before a non-blank if the line
1032 * needs a break here
1033 */
1034 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001035 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001036 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001037 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001038 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001039 {
1040 /*
1041 * Count all characters from first non-blank after a blank up to next
1042 * non-blank after a blank.
1043 */
1044 numberextra = win_col_off(wp);
1045 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001046 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001047 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001048 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001049 colmax += col_adj;
1050 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001051 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001052 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001053 }
1054
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055 for (;;)
1056 {
1057 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001058 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 c = *s;
1060 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001061 && (VIM_ISBREAK(c)
1062 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001063 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001064 break;
1065
1066 col2 += win_chartabsize(wp, s, col2);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001067 if (col2 >= colmax) // doesn't fit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001068 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001069 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 break;
1071 }
1072 }
1073 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001074 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1075 && wp->w_p_wrap && in_win_border(wp, col))
1076 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001077 ++size; // Count the ">" in the last column.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001078 mb_added = 1;
1079 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080
1081 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001082 * May have to add something for 'breakindent' and/or 'showbreak'
1083 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001084 * Set *headp to the size of what we add.
1085 */
1086 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001087 sbr = get_showbreak_value(wp);
1088 if ((*sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001089 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001090 colnr_T sbrlen = 0;
1091 int numberwidth = win_col_off(wp);
1092
1093 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001094 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001095 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001096 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001097 col -= wp->w_width;
1098 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001099 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001100 col %= numberextra;
Bram Moolenaaree857022019-11-09 23:26:40 +01001101 if (*sbr != NUL)
Bram Moolenaar1c852102014-10-15 21:26:40 +02001102 {
Bram Moolenaaree857022019-11-09 23:26:40 +01001103 sbrlen = (colnr_T)MB_CHARLEN(sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001104 if (col >= sbrlen)
1105 col -= sbrlen;
1106 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001107 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001108 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 else if (col > 0 && numberextra > 0)
1110 col += numberwidth - win_col_off2(wp);
1111
1112 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001113 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001114 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001115 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001116 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001117 if (*sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001118 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001119 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001120 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001121 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001122 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001123 int prev_width = col
1124 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001125
1126 if (width <= 0)
1127 width = (colnr_T)1;
Bram Moolenaaree857022019-11-09 23:26:40 +01001128 added += ((size - prev_width) / width) * vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001129 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001130 // wrapped, add another length of 'sbr'
Bram Moolenaaree857022019-11-09 23:26:40 +01001131 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001132 }
1133 else
Bram Moolenaaree857022019-11-09 23:26:40 +01001134 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001135 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001136 if (wp->w_p_bri)
1137 added += get_breakindent_win(wp, line);
1138
Bram Moolenaar95765082014-08-24 21:19:25 +02001139 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001140 if (col != 0)
1141 added = 0;
1142 }
1143 }
1144 if (headp != NULL)
1145 *headp = added + mb_added;
1146 return size;
1147#endif
1148}
1149
Bram Moolenaar071d4272004-06-13 20:20:40 +00001150/*
1151 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1152 * 'wrap' is on. This means we need to check for a double-byte character that
1153 * doesn't fit at the end of the screen line.
1154 */
1155 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001156win_nolbr_chartabsize(
1157 win_T *wp,
1158 char_u *s,
1159 colnr_T col,
1160 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001161{
1162 int n;
1163
Bram Moolenaareed9d462021-02-15 20:38:25 +01001164 if (*s == TAB && (!wp->w_p_list || wp->w_lcs_chars.tab1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001166# ifdef FEAT_VARTABS
1167 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1168 wp->w_buffer->b_p_vts_array);
1169# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001170 n = wp->w_buffer->b_p_ts;
1171 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001172# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001173 }
1174 n = ptr2cells(s);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001175 // Add one cell for a double-width character in the last column of the
1176 // window, displayed with a ">".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1178 {
1179 if (headp != NULL)
1180 *headp = 1;
1181 return 3;
1182 }
1183 return n;
1184}
1185
1186/*
1187 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1188 * "wp".
1189 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001190 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001191in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192{
Bram Moolenaarc667da52019-11-30 20:52:27 +01001193 int width1; // width of first line (after line number)
1194 int width2; // width of further lines
Bram Moolenaar071d4272004-06-13 20:20:40 +00001195
Bram Moolenaarc667da52019-11-30 20:52:27 +01001196 if (wp->w_width == 0) // there is no border
Bram Moolenaar071d4272004-06-13 20:20:40 +00001197 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001198 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001199 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001200 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001201 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001202 return TRUE;
1203 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001204 if (width2 <= 0)
1205 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206 return ((vcol - width1) % width2 == width2 - 1);
1207}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001208
1209/*
1210 * Get virtual column number of pos.
1211 * start: on the first position of this character (TAB, ctrl)
1212 * cursor: where the cursor is on this character (first char, except for TAB)
1213 * end: on the last position of this character (TAB, ctrl)
1214 *
1215 * This is used very often, keep it fast!
1216 */
1217 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001218getvcol(
1219 win_T *wp,
1220 pos_T *pos,
1221 colnr_T *start,
1222 colnr_T *cursor,
1223 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001224{
1225 colnr_T vcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001226 char_u *ptr; // points to current char
1227 char_u *posptr; // points to char at pos->col
1228 char_u *line; // start of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001229 int incr;
1230 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001231#ifdef FEAT_VARTABS
1232 int *vts = wp->w_buffer->b_p_vts_array;
1233#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001234 int ts = wp->w_buffer->b_p_ts;
1235 int c;
1236
1237 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001238 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001239 if (pos->col == MAXCOL)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001240 posptr = NULL; // continue until the NUL
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001241 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001242 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001243 // Special check for an empty line, which can happen on exit, when
1244 // ml_get_buf() always returns an empty string.
Bram Moolenaar955f1982017-02-05 15:10:51 +01001245 if (*ptr == NUL)
1246 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001247 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001248 if (has_mbyte)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001249 // always start on the first byte
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001250 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001251 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001252
1253 /*
1254 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001255 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1256 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001257 * Also use this when 'list' is set but tabs take their normal size.
1258 */
Bram Moolenaareed9d462021-02-15 20:38:25 +01001259 if ((!wp->w_p_list || wp->w_lcs_chars.tab1 != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001260#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +01001261 && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262#endif
1263 )
1264 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001265 for (;;)
1266 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001268 c = *ptr;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001269 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 if (c == NUL)
1271 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001272 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273 break;
1274 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001275 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001276 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001277#ifdef FEAT_VARTABS
1278 incr = tabstop_padding(vcol, ts, vts);
1279#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001280 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001281#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001282 else
1283 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001284 if (has_mbyte)
1285 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001286 // For utf-8, if the byte is >= 0x80, need to look at
1287 // further bytes to find the cell width.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001288 if (enc_utf8 && c >= 0x80)
1289 incr = utf_ptr2cells(ptr);
1290 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001291 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001292
Bram Moolenaarc667da52019-11-30 20:52:27 +01001293 // If a double-cell char doesn't fit at the end of a line
1294 // it wraps to the next line, it's like this char is three
1295 // cells wide.
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001296 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1297 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001298 {
1299 ++incr;
1300 head = 1;
1301 }
1302 }
1303 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001304 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001305 }
1306
Bram Moolenaarc667da52019-11-30 20:52:27 +01001307 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001308 break;
1309
1310 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001311 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001312 }
1313 }
1314 else
1315 {
1316 for (;;)
1317 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001318 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001320 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001321 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001322 if (*ptr == NUL)
1323 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001324 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001325 break;
1326 }
1327
Bram Moolenaarc667da52019-11-30 20:52:27 +01001328 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001329 break;
1330
1331 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001332 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001333 }
1334 }
1335 if (start != NULL)
1336 *start = vcol + head;
1337 if (end != NULL)
1338 *end = vcol + incr - 1;
1339 if (cursor != NULL)
1340 {
1341 if (*ptr == TAB
1342 && (State & NORMAL)
1343 && !wp->w_p_list
1344 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001345 && !(VIsual_active
1346 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001347 )
Bram Moolenaarc667da52019-11-30 20:52:27 +01001348 *cursor = vcol + incr - 1; // cursor at end
Bram Moolenaar071d4272004-06-13 20:20:40 +00001349 else
Bram Moolenaarc667da52019-11-30 20:52:27 +01001350 *cursor = vcol + head; // cursor at start
Bram Moolenaar071d4272004-06-13 20:20:40 +00001351 }
1352}
1353
1354/*
1355 * Get virtual cursor column in the current window, pretending 'list' is off.
1356 */
1357 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001358getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001359{
1360 int list_save = curwin->w_p_list;
1361 colnr_T vcol;
1362
1363 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001364 if (posp->coladd)
1365 getvvcol(curwin, posp, NULL, &vcol, NULL);
1366 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001367 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001368 curwin->w_p_list = list_save;
1369 return vcol;
1370}
1371
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372/*
1373 * Get virtual column in virtual mode.
1374 */
1375 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001376getvvcol(
1377 win_T *wp,
1378 pos_T *pos,
1379 colnr_T *start,
1380 colnr_T *cursor,
1381 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001382{
1383 colnr_T col;
1384 colnr_T coladd;
1385 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387
1388 if (virtual_active())
1389 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001390 // For virtual mode, only want one value
Bram Moolenaar071d4272004-06-13 20:20:40 +00001391 getvcol(wp, pos, &col, NULL, NULL);
1392
1393 coladd = pos->coladd;
1394 endadd = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001395 // Cannot put the cursor on part of a wide character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001396 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001397 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001398 {
1399 int c = (*mb_ptr2char)(ptr + pos->col);
1400
1401 if (c != TAB && vim_isprintc(c))
1402 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001403 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001404 if (coladd > endadd) // past end of line
Bram Moolenaara5792f52005-11-23 21:25:05 +00001405 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001406 else
1407 coladd = 0;
1408 }
1409 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001410 col += coladd;
1411 if (start != NULL)
1412 *start = col;
1413 if (cursor != NULL)
1414 *cursor = col;
1415 if (end != NULL)
1416 *end = col + endadd;
1417 }
1418 else
1419 getvcol(wp, pos, start, cursor, end);
1420}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001421
Bram Moolenaar071d4272004-06-13 20:20:40 +00001422/*
1423 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1424 * Used for Visual block mode.
1425 */
1426 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001427getvcols(
1428 win_T *wp,
1429 pos_T *pos1,
1430 pos_T *pos2,
1431 colnr_T *left,
1432 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001433{
1434 colnr_T from1, from2, to1, to2;
1435
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001436 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001437 {
1438 getvvcol(wp, pos1, &from1, NULL, &to1);
1439 getvvcol(wp, pos2, &from2, NULL, &to2);
1440 }
1441 else
1442 {
1443 getvvcol(wp, pos2, &from1, NULL, &to1);
1444 getvvcol(wp, pos1, &from2, NULL, &to2);
1445 }
1446 if (from2 < from1)
1447 *left = from2;
1448 else
1449 *left = from1;
1450 if (to2 > to1)
1451 {
1452 if (*p_sel == 'e' && from2 - 1 >= to1)
1453 *right = from2 - 1;
1454 else
1455 *right = to2;
1456 }
1457 else
1458 *right = to1;
1459}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001460
1461/*
1462 * skipwhite: skip over ' ' and '\t'.
1463 */
1464 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001465skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001466{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001467 char_u *p = q;
1468
Bram Moolenaarc667da52019-11-30 20:52:27 +01001469 while (VIM_ISWHITE(*p)) // skip to next non-white
Bram Moolenaar071d4272004-06-13 20:20:40 +00001470 ++p;
1471 return p;
1472}
1473
1474/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001475 * getwhitecols: return the number of whitespace
1476 * columns (bytes) at the start of a given line
1477 */
1478 int
1479getwhitecols_curline()
1480{
1481 return getwhitecols(ml_get_curline());
1482}
1483
1484 int
1485getwhitecols(char_u *p)
1486{
1487 return skipwhite(p) - p;
1488}
1489
1490/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001491 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001492 */
1493 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001494skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001495{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001496 char_u *p = q;
1497
Bram Moolenaarc667da52019-11-30 20:52:27 +01001498 while (VIM_ISDIGIT(*p)) // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001499 ++p;
1500 return p;
1501}
1502
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001503#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001504/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001505 * skip over binary digits
1506 */
1507 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001508skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001509{
1510 char_u *p = q;
1511
Bram Moolenaarc667da52019-11-30 20:52:27 +01001512 while (vim_isbdigit(*p)) // skip to next non-digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001513 ++p;
1514 return p;
1515}
1516
1517/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001518 * skip over digits and hex characters
1519 */
1520 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001521skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001522{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001523 char_u *p = q;
1524
Bram Moolenaarc667da52019-11-30 20:52:27 +01001525 while (vim_isxdigit(*p)) // skip to next non-digit
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001526 ++p;
1527 return p;
1528}
1529#endif
1530
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001531/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001532 * skip to bin digit (or NUL after the string)
1533 */
1534 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001535skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001536{
1537 char_u *p = q;
1538
Bram Moolenaarc667da52019-11-30 20:52:27 +01001539 while (*p != NUL && !vim_isbdigit(*p)) // skip to next digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001540 ++p;
1541 return p;
1542}
1543
1544/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001545 * skip to digit (or NUL after the string)
1546 */
1547 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001548skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001549{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001550 char_u *p = q;
1551
Bram Moolenaarc667da52019-11-30 20:52:27 +01001552 while (*p != NUL && !VIM_ISDIGIT(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001553 ++p;
1554 return p;
1555}
1556
1557/*
1558 * skip to hex character (or NUL after the string)
1559 */
1560 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001561skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001562{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001563 char_u *p = q;
1564
Bram Moolenaarc667da52019-11-30 20:52:27 +01001565 while (*p != NUL && !vim_isxdigit(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001566 ++p;
1567 return p;
1568}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001569
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570/*
1571 * Variant of isdigit() that can handle characters > 0x100.
1572 * We don't use isdigit() here, because on some systems it also considers
1573 * superscript 1 to be a digit.
1574 * Use the VIM_ISDIGIT() macro for simple arguments.
1575 */
1576 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001577vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578{
1579 return (c >= '0' && c <= '9');
1580}
1581
1582/*
1583 * Variant of isxdigit() that can handle characters > 0x100.
1584 * We don't use isxdigit() here, because on some systems it also considers
1585 * superscript 1 to be a digit.
1586 */
1587 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001588vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001589{
1590 return (c >= '0' && c <= '9')
1591 || (c >= 'a' && c <= 'f')
1592 || (c >= 'A' && c <= 'F');
1593}
1594
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001595/*
1596 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1597 * characters > 0x100.
1598 */
1599 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001600vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001601{
1602 return (c == '0' || c == '1');
1603}
1604
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001605 static int
1606vim_isodigit(int c)
1607{
1608 return (c >= '0' && c <= '7');
1609}
1610
Bram Moolenaar78622822005-08-23 21:00:13 +00001611/*
1612 * Vim's own character class functions. These exist because many library
1613 * islower()/toupper() etc. do not work properly: they crash when used with
1614 * invalid values or can't handle latin1 when the locale is C.
1615 * Speed is most important here.
1616 */
1617#define LATIN1LOWER 'l'
1618#define LATIN1UPPER 'U'
1619
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001620static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001621static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1622static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001623
1624 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001625vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001626{
1627 if (c <= '@')
1628 return FALSE;
1629 if (c >= 0x80)
1630 {
1631 if (enc_utf8)
1632 return utf_islower(c);
1633 if (c >= 0x100)
1634 {
1635#ifdef HAVE_ISWLOWER
1636 if (has_mbyte)
1637 return iswlower(c);
1638#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001639 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001640 return FALSE;
1641 }
1642 if (enc_latin1like)
1643 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1644 }
1645 return islower(c);
1646}
1647
1648 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001649vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001650{
1651 if (c <= '@')
1652 return FALSE;
1653 if (c >= 0x80)
1654 {
1655 if (enc_utf8)
1656 return utf_isupper(c);
1657 if (c >= 0x100)
1658 {
1659#ifdef HAVE_ISWUPPER
1660 if (has_mbyte)
1661 return iswupper(c);
1662#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001663 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001664 return FALSE;
1665 }
1666 if (enc_latin1like)
1667 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1668 }
1669 return isupper(c);
1670}
1671
1672 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001673vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001674{
1675 if (c <= '@')
1676 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001677 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001678 {
1679 if (enc_utf8)
1680 return utf_toupper(c);
1681 if (c >= 0x100)
1682 {
1683#ifdef HAVE_TOWUPPER
1684 if (has_mbyte)
1685 return towupper(c);
1686#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001687 // toupper() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001688 return c;
1689 }
1690 if (enc_latin1like)
1691 return latin1upper[c];
1692 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001693 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1694 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001695 return TOUPPER_LOC(c);
1696}
1697
1698 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001699vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001700{
1701 if (c <= '@')
1702 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001703 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001704 {
1705 if (enc_utf8)
1706 return utf_tolower(c);
1707 if (c >= 0x100)
1708 {
1709#ifdef HAVE_TOWLOWER
1710 if (has_mbyte)
1711 return towlower(c);
1712#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001713 // tolower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001714 return c;
1715 }
1716 if (enc_latin1like)
1717 return latin1lower[c];
1718 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001719 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1720 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001721 return TOLOWER_LOC(c);
1722}
Bram Moolenaar78622822005-08-23 21:00:13 +00001723
Bram Moolenaar071d4272004-06-13 20:20:40 +00001724/*
1725 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1726 */
1727 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001728skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001729{
1730 while (*p != ' ' && *p != '\t' && *p != NUL)
1731 ++p;
1732 return p;
1733}
1734
Bram Moolenaar071d4272004-06-13 20:20:40 +00001735/*
1736 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1737 */
1738 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001739skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001740{
1741 while (*p != ' ' && *p != '\t' && *p != NUL)
1742 {
1743 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1744 ++p;
1745 ++p;
1746 }
1747 return p;
1748}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001749
1750/*
1751 * Getdigits: Get a number from a string and skip over it.
1752 * Note: the argument is a pointer to a char_u pointer!
1753 */
1754 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001755getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001756{
1757 char_u *p;
1758 long retval;
1759
1760 p = *pp;
1761 retval = atol((char *)p);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001762 if (*p == '-') // skip negative sign
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 ++p;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001764 p = skipdigits(p); // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001765 *pp = p;
1766 return retval;
1767}
1768
1769/*
1770 * Return TRUE if "lbuf" is empty or only contains blanks.
1771 */
1772 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001773vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001774{
1775 char_u *p;
1776
1777 p = skipwhite(lbuf);
1778 return (*p == NUL || *p == '\r' || *p == '\n');
1779}
1780
1781/*
1782 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001783 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1784 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001785 * 0 decimal
1786 * '0' octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001787 * 'O' octal
1788 * 'o' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001789 * 'B' bin
1790 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001791 * 'X' hex
1792 * 'x' hex
1793 * If "len" is not NULL, the length of the number in characters is returned.
1794 * If "nptr" is not NULL, the signed result is returned in it.
1795 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001796 * If "what" contains STR2NR_BIN recognize binary numbers
1797 * If "what" contains STR2NR_OCT recognize octal numbers
1798 * If "what" contains STR2NR_HEX recognize hex numbers
1799 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001800 * If "what" contains STR2NR_QUOTE ignore embedded single quotes
Bram Moolenaarce157752017-10-28 16:07:33 +02001801 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001802 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001803 */
1804 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001805vim_str2nr(
1806 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001807 int *prep, // return: type of number 0 = decimal, 'x'
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001808 // or 'X' is hex, '0', 'o' or 'O' is octal,
1809 // 'b' or 'B' is bin
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001810 int *len, // return: detected length of number
1811 int what, // what numbers to recognize
1812 varnumber_T *nptr, // return: signed result
1813 uvarnumber_T *unptr, // return: unsigned result
1814 int maxlen, // max length of string to check
1815 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001816{
1817 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001818 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001819 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001820 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001821 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001822
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001823 if (len != NULL)
1824 *len = 0;
1825
Bram Moolenaar071d4272004-06-13 20:20:40 +00001826 if (ptr[0] == '-')
1827 {
1828 negative = TRUE;
1829 ++ptr;
1830 }
1831
Bram Moolenaarc667da52019-11-30 20:52:27 +01001832 // Recognize hex, octal, and bin.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001833 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1834 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001835 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001836 pre = ptr[1];
1837 if ((what & STR2NR_HEX)
1838 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1839 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001840 // hexadecimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001841 ptr += 2;
1842 else if ((what & STR2NR_BIN)
1843 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1844 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001845 // binary
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001846 ptr += 2;
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001847 else if ((what & STR2NR_OOCT)
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001848 && (pre == 'O' || pre == 'o') && vim_isodigit(ptr[2])
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001849 && (maxlen == 0 || maxlen > 2))
1850 // octal with prefix "0o"
1851 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001852 else
1853 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001854 // decimal or octal, default is decimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001855 pre = 0;
1856 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001857 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001858 // Don't interpret "0", "08" or "0129" as octal.
Bram Moolenaarce157752017-10-28 16:07:33 +02001859 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001860 {
1861 if (ptr[n] > '7')
1862 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001863 pre = 0; // can't be octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001864 break;
1865 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001866 pre = '0'; // assume octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001867 }
1868 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001869 }
1870 }
1871
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001872 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001873 n = 1;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001874 if (pre == 'B' || pre == 'b'
1875 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001876 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001877 // bin
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001878 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001879 n += 2; // skip over "0b"
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001880 while ('0' <= *ptr && *ptr <= '1')
1881 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001882 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001883 if (un <= UVARNUM_MAX / 2)
1884 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001885 else
1886 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001887 ++ptr;
1888 if (n++ == maxlen)
1889 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001890 if ((what & STR2NR_QUOTE) && *ptr == '\''
1891 && '0' <= ptr[1] && ptr[1] <= '1')
1892 {
1893 ++ptr;
1894 if (n++ == maxlen)
1895 break;
1896 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001897 }
1898 }
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001899 else if (pre == 'O' || pre == 'o' ||
1900 pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001901 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001902 // octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001903 if (pre != 0 && pre != '0')
1904 n += 2; // skip over "0o"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001905 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001906 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001907 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001908 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001909 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1910 else
1911 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001912 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001913 if (n++ == maxlen)
1914 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001915 if ((what & STR2NR_QUOTE) && *ptr == '\''
1916 && '0' <= ptr[1] && ptr[1] <= '7')
1917 {
1918 ++ptr;
1919 if (n++ == maxlen)
1920 break;
1921 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001922 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001923 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001924 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001925 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001926 // hex
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001927 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001928 n += 2; // skip over "0x"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001929 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001930 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001931 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001932 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001933 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1934 else
1935 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001936 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001937 if (n++ == maxlen)
1938 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001939 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1940 {
1941 ++ptr;
1942 if (n++ == maxlen)
1943 break;
1944 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001945 }
1946 }
1947 else
1948 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001949 // decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001950 while (VIM_ISDIGIT(*ptr))
1951 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001952 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1953
Bram Moolenaarc667da52019-11-30 20:52:27 +01001954 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001955 if (un < UVARNUM_MAX / 10
1956 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1957 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001958 else
1959 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001960 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001961 if (n++ == maxlen)
1962 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001963 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
1964 {
1965 ++ptr;
1966 if (n++ == maxlen)
1967 break;
1968 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001969 }
1970 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001971
Bram Moolenaar4b96df52020-01-26 22:00:26 +01001972 // Check for an alphanumeric character immediately following, that is
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001973 // most likely a typo.
1974 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1975 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001976
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001977 if (prep != NULL)
1978 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001979 if (len != NULL)
1980 *len = (int)(ptr - start);
1981 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001982 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001983 if (negative) // account for leading '-' for decimal numbers
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001984 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001985 // avoid ubsan error for overflow
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001986 if (un > VARNUM_MAX)
1987 *nptr = VARNUM_MIN;
1988 else
1989 *nptr = -(varnumber_T)un;
1990 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001991 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001992 {
1993 if (un > VARNUM_MAX)
1994 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001995 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001996 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001997 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001998 if (unptr != NULL)
1999 *unptr = un;
2000}
2001
2002/*
2003 * Return the value of a single hex character.
2004 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2005 */
2006 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002007hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002008{
2009 if (c >= 'a' && c <= 'f')
2010 return c - 'a' + 10;
2011 if (c >= 'A' && c <= 'F')
2012 return c - 'A' + 10;
2013 return c - '0';
2014}
2015
Bram Moolenaar071d4272004-06-13 20:20:40 +00002016/*
2017 * Convert two hex characters to a byte.
2018 * Return -1 if one of the characters is not hex.
2019 */
2020 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002021hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002022{
2023 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2024 return -1;
2025 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2026}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002027
2028/*
2029 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002030 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00002031 * backslash is not a normal file name character.
2032 * '$' is a valid file name character, we don't remove the backslash before
2033 * it. This means it is not possible to use an environment variable after a
2034 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2035 * Although "\ name" is valid, the backslash in "Program\ files" must be
2036 * removed. Assume a file name doesn't start with a space.
2037 * For multi-byte names, never remove a backslash before a non-ascii
2038 * character, assume that all multi-byte characters are valid file name
2039 * characters.
2040 */
2041 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002042rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002043{
2044#ifdef BACKSLASH_IN_FILENAME
2045 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002046 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002047 && (str[1] == ' '
2048 || (str[1] != NUL
2049 && str[1] != '*'
2050 && str[1] != '?'
2051 && !vim_isfilec(str[1]))));
2052#else
2053 return (str[0] == '\\' && str[1] != NUL);
2054#endif
2055}
2056
2057/*
2058 * Halve the number of backslashes in a file name argument.
2059 * For MS-DOS we only do this if the character after the backslash
2060 * is not a normal file character.
2061 */
2062 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002063backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002064{
2065 for ( ; *p; ++p)
2066 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002067 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002068}
2069
2070/*
2071 * backslash_halve() plus save the result in allocated memory.
Bram Moolenaare2c453d2019-08-21 14:37:09 +02002072 * However, returns "p" when out of memory.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002073 */
2074 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002075backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002076{
2077 char_u *res;
2078
2079 res = vim_strsave(p);
2080 if (res == NULL)
2081 return p;
2082 backslash_halve(res);
2083 return res;
2084}
2085
2086#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2087/*
2088 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2089 * The first 64 entries have been added to map control characters defined in
2090 * ascii.h
2091 */
2092static char_u ebcdic2ascii_tab[256] =
2093{
2094 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2095 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2096 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2097 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2098 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2099 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2100 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2101 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2102 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2103 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2104 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2105 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2106 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2107 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2108 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2109 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2110 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2111 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2112 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2113 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2114 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2115 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2116 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2117 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2118 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2119 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2120 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2121 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2122 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2123 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2124 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2125 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2126};
2127
2128/*
2129 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2130 * wanting 7-bit ASCII characters out the other end.
2131 */
2132 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002133ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002134{
2135 int i;
2136
2137 for (i = 0; i < len; i++)
2138 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2139}
2140#endif