blob: 10aa2e8e52541dce34df70577cbe5d0ff36256c3 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
Bram Moolenaarc667da52019-11-30 20:52:27 +010013# include <wchar.h> // for towupper() and towlower()
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
Bram Moolenaarc667da52019-11-30 20:52:27 +010021// b_chartab[] is an array of 32 bytes, each bit representing one of the
22// characters 0-255.
Bram Moolenaar071d4272004-06-13 20:20:40 +000023#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaarc667da52019-11-30 20:52:27 +010027// table used below, see init_chartab() for an explanation
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010028static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
Bram Moolenaarc667da52019-11-30 20:52:27 +010033#define CT_CELL_MASK 0x07 // mask: nr of display cells (1, 2 or 4)
34#define CT_PRINT_CHAR 0x10 // flag: set for printable chars
35#define CT_ID_CHAR 0x20 // flag: set for ID chars
36#define CT_FNAME_CHAR 0x40 // flag: set for file name chars
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010037
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
Bram Moolenaarc667da52019-11-30 20:52:27 +010075 int global) // FALSE: only set buf->b_chartab[]
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100105 // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100108 // euc-jp characters starting with 0x8e are single width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100111 // other double-byte chars can be printable AND double-width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100115 // the rest is unprintable by default
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaarc667da52019-11-30 20:52:27 +0100119 // Assume that every multi-byte char is a filename character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
Bram Moolenaara80faa82020-04-12 19:37:17 +0200130 CLEAR_FIELD(buf->b_chartab);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100134 // double-byte characters are probably word characters
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
Bram Moolenaarc667da52019-11-30 20:52:27 +0100147 // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 // options Each option is a list of characters, character numbers or
149 // ranges, separated by commas, e.g.: "200-210,x,#-178,-"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000150 for (i = global ? 0 : 3; i <= 3; ++i)
151 {
152 if (i == 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100153 p = p_isi; // first round: 'isident'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154 else if (i == 1)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100155 p = p_isp; // second round: 'isprint'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000156 else if (i == 2)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100157 p = p_isf; // third round: 'isfname'
158 else // i == 3
159 p = buf->b_p_isk; // fourth round: 'iskeyword'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000160
161 while (*p)
162 {
163 tilde = FALSE;
164 do_isalpha = FALSE;
165 if (*p == '^' && p[1] != NUL)
166 {
167 tilde = TRUE;
168 ++p;
169 }
170 if (VIM_ISDIGIT(*p))
171 c = getdigits(&p);
172 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000173 if (has_mbyte)
174 c = mb_ptr2char_adv(&p);
175 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000176 c = *p++;
177 c2 = -1;
178 if (*p == '-' && p[1] != NUL)
179 {
180 ++p;
181 if (VIM_ISDIGIT(*p))
182 c2 = getdigits(&p);
183 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000184 if (has_mbyte)
185 c2 = mb_ptr2char_adv(&p);
186 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000187 c2 = *p++;
188 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000189 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000190 || !(*p == NUL || *p == ','))
191 return FAIL;
192
Bram Moolenaarc667da52019-11-30 20:52:27 +0100193 if (c2 == -1) // not a range
Bram Moolenaar071d4272004-06-13 20:20:40 +0000194 {
195 /*
196 * A single '@' (not "@-@"):
197 * Decide on letters being ID/printable/keyword chars with
198 * standard function isalpha(). This takes care of locale for
199 * single-byte characters).
200 */
201 if (c == '@')
202 {
203 do_isalpha = TRUE;
204 c = 1;
205 c2 = 255;
206 }
207 else
208 c2 = c;
209 }
210 while (c <= c2)
211 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100212 // Use the MB_ functions here, because isalpha() doesn't
213 // work properly when 'encoding' is "latin1" and the locale is
214 // "C".
Bram Moolenaar14184a32019-02-16 15:10:30 +0100215 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000216 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100217 if (i == 0) // (re)set ID flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000218 {
219 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100220 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000221 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100222 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000223 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100224 else if (i == 1) // (re)set printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000225 {
226 if ((c < ' '
227#ifndef EBCDIC
228 || c > '~'
229#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100230 // For double-byte we keep the cell width, so
231 // that we can detect it from the first byte.
232 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000233 {
234 if (tilde)
235 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100236 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000237 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100238 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000239 }
240 else
241 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100242 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
243 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000244 }
245 }
246 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100247 else if (i == 2) // (re)set fname flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000248 {
249 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100252 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100254 else // i == 3 (re)set keyword flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 {
256 if (tilde)
257 RESET_CHARTAB(buf, c);
258 else
259 SET_CHARTAB(buf, c);
260 }
261 }
262 ++c;
263 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100264
265 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000266 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100267 if (c == ',' && *p == NUL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100268 // Trailing comma is not allowed.
Bram Moolenaar309379f2013-02-06 16:26:26 +0100269 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000270 }
271 }
272 chartab_initialized = TRUE;
273 return OK;
274}
275
276/*
277 * Translate any special characters in buf[bufsize] in-place.
278 * The result is a string with only printable characters, but if there is not
279 * enough room, not all characters will be translated.
280 */
281 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100282trans_characters(
283 char_u *buf,
284 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000285{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100286 int len; // length of string needing translation
287 int room; // room in buffer after string
288 char_u *trs; // translated character
289 int trs_len; // length of trs[]
Bram Moolenaar071d4272004-06-13 20:20:40 +0000290
291 len = (int)STRLEN(buf);
292 room = bufsize - len;
293 while (*buf != 0)
294 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100295 // Assume a multi-byte character doesn't need translation.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000296 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297 len -= trs_len;
298 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000299 {
300 trs = transchar_byte(*buf);
301 trs_len = (int)STRLEN(trs);
302 if (trs_len > 1)
303 {
304 room -= trs_len - 1;
305 if (room <= 0)
306 return;
307 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
308 }
309 mch_memmove(buf, trs, (size_t)trs_len);
310 --len;
311 }
312 buf += trs_len;
313 }
314}
315
Bram Moolenaar071d4272004-06-13 20:20:40 +0000316/*
317 * Translate a string into allocated memory, replacing special chars with
318 * printable chars. Returns NULL when out of memory.
319 */
320 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100321transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000322{
323 char_u *res;
324 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325 int l, len, c;
326 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000327
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328 if (has_mbyte)
329 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100330 // Compute the length of the result, taking account of unprintable
331 // multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000332 len = 0;
333 p = s;
334 while (*p != NUL)
335 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000336 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000337 {
338 c = (*mb_ptr2char)(p);
339 p += l;
340 if (vim_isprintc(c))
341 len += l;
342 else
343 {
344 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000345 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346 }
347 }
348 else
349 {
350 l = byte2cells(*p++);
351 if (l > 0)
352 len += l;
353 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100354 len += 4; // illegal byte sequence
Bram Moolenaar071d4272004-06-13 20:20:40 +0000355 }
356 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200357 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000358 }
359 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200360 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000361 if (res != NULL)
362 {
363 *res = NUL;
364 p = s;
365 while (*p != NUL)
366 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000367 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000368 {
369 c = (*mb_ptr2char)(p);
370 if (vim_isprintc(c))
Bram Moolenaarc667da52019-11-30 20:52:27 +0100371 STRNCAT(res, p, l); // append printable multi-byte char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000372 else
373 transchar_hex(res + STRLEN(res), c);
374 p += l;
375 }
376 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000377 STRCAT(res, transchar_byte(*p++));
378 }
379 }
380 return res;
381}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000382
Bram Moolenaar071d4272004-06-13 20:20:40 +0000383/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000384 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
385 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000386 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
387 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000388 */
389 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100390str_foldcase(
391 char_u *str,
392 int orglen,
393 char_u *buf,
394 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000395{
396 garray_T ga;
397 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000398 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000399
400#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
401#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000402#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
403#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
Bram Moolenaarc667da52019-11-30 20:52:27 +0100405 // Copy "str" into "buf" or allocated memory, unmodified.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000406 if (buf == NULL)
407 {
408 ga_init2(&ga, 1, 10);
409 if (ga_grow(&ga, len + 1) == FAIL)
410 return NULL;
411 mch_memmove(ga.ga_data, str, (size_t)len);
412 ga.ga_len = len;
413 }
414 else
415 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100416 if (len >= buflen) // Ugly!
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000417 len = buflen - 1;
418 mch_memmove(buf, str, (size_t)len);
419 }
420 if (buf == NULL)
421 GA_CHAR(len) = NUL;
422 else
423 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424
Bram Moolenaarc667da52019-11-30 20:52:27 +0100425 // Make each character lower case.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000427 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000429 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000430 {
431 if (enc_utf8)
432 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000433 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100434 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000435 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000436
Bram Moolenaarc667da52019-11-30 20:52:27 +0100437 // Only replace the character when it is not an invalid
438 // sequence (ASCII character or more than one byte) and
439 // utf_tolower() doesn't return the original character.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100440 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100442 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000443
Bram Moolenaarc667da52019-11-30 20:52:27 +0100444 // If the byte length changes need to shift the following
445 // characters forward or backward.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100446 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000447 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100448 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000449 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100450 if (buf == NULL
451 ? ga_grow(&ga, nlen - olen + 1) == FAIL
452 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100454 // out of memory, keep old char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100456 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000457 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000458 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100459 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000460 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000461 if (buf == NULL)
462 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100463 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
464 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000465 }
466 else
467 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100468 STRMOVE(buf + i + nlen, buf + i + olen);
469 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000470 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 }
472 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000473 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000474 }
475 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100476 // skip to next multi-byte char
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000477 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 }
479 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000480 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000481 if (buf == NULL)
482 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
483 else
484 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000485 ++i;
486 }
487 }
488
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000489 if (buf == NULL)
490 return (char_u *)ga.ga_data;
491 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000493
494/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100495 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100497 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000498 * Does NOT work for multi-byte characters, c must be <= 255.
499 * Also doesn't work for the first byte of a multi-byte, "c" must be a
500 * character!
501 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200502static char_u transchar_charbuf[7];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000503
504 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100505transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506{
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200507 return transchar_buf(curbuf, c);
508}
509
510 char_u *
511transchar_buf(buf_T *buf, int c)
512{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000513 int i;
514
515 i = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100516 if (IS_SPECIAL(c)) // special key code, display as ~@ char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000517 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200518 transchar_charbuf[0] = '~';
519 transchar_charbuf[1] = '@';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000520 i = 2;
521 c = K_SECOND(c);
522 }
523
524 if ((!chartab_initialized && (
525#ifdef EBCDIC
526 (c >= 64 && c < 255)
527#else
528 (c >= ' ' && c <= '~')
529#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000530 )) || (c < 256 && vim_isprintc_strict(c)))
531 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100532 // printable character
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200533 transchar_charbuf[i] = c;
534 transchar_charbuf[i + 1] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000535 }
536 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200537 transchar_nonprint(buf, transchar_charbuf + i, c);
538 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000539}
540
Bram Moolenaar071d4272004-06-13 20:20:40 +0000541/*
542 * Like transchar(), but called with a byte instead of a character. Checks
543 * for an illegal UTF-8 byte.
544 */
545 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100546transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000547{
548 if (enc_utf8 && c >= 0x80)
549 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200550 transchar_nonprint(curbuf, transchar_charbuf, c);
551 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000552 }
553 return transchar(c);
554}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000555
556/*
557 * Convert non-printable character to two or more printable characters in
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200558 * "buf[]". "charbuf" needs to be able to hold five bytes.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000559 * Does NOT work for multi-byte characters, c must be <= 255.
560 */
561 void
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200562transchar_nonprint(buf_T *buf, char_u *charbuf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563{
564 if (c == NL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100565 c = NUL; // we use newline in place of a NUL
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200566 else if (c == CAR && get_fileformat(buf) == EOL_MAC)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100567 c = NL; // we use CR in place of NL in this case
Bram Moolenaar071d4272004-06-13 20:20:40 +0000568
Bram Moolenaarc667da52019-11-30 20:52:27 +0100569 if (dy_flags & DY_UHEX) // 'display' has "uhex"
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200570 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000571
572#ifdef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100573 // For EBCDIC only the characters 0-63 and 255 are not printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000574 else if (CtrlChar(c) != 0 || c == DEL)
575#else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100576 else if (c <= 0x7f) // 0x00 - 0x1f and 0x7f
Bram Moolenaar071d4272004-06-13 20:20:40 +0000577#endif
578 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200579 charbuf[0] = '^';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000580#ifdef EBCDIC
581 if (c == DEL)
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200582 charbuf[1] = '?'; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000583 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200584 charbuf[1] = CtrlChar(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000585#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200586 charbuf[1] = c ^ 0x40; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000587#endif
588
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200589 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000591 else if (enc_utf8 && c >= 0x80)
592 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200593 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000594 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595#ifndef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100596 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) // 0xa0 - 0xfe
Bram Moolenaar071d4272004-06-13 20:20:40 +0000597 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200598 charbuf[0] = '|';
599 charbuf[1] = c - 0x80;
600 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000601 }
602#else
603 else if (c < 64)
604 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200605 charbuf[0] = '~';
606 charbuf[1] = MetaChar(c);
607 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000608 }
609#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +0100610 else // 0x80 - 0x9f and 0xff
Bram Moolenaar071d4272004-06-13 20:20:40 +0000611 {
612 /*
613 * TODO: EBCDIC I don't know what to do with this chars, so I display
614 * them as '~?' for now
615 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200616 charbuf[0] = '~';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000617#ifdef EBCDIC
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200618 charbuf[1] = '?'; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000619#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200620 charbuf[1] = (c - 0x80) ^ 0x40; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000621#endif
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200622 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000623 }
624}
625
626 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100627transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000628{
629 int i = 0;
630
631 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000632 if (c > 255)
633 {
634 buf[++i] = nr2hex((unsigned)c >> 12);
635 buf[++i] = nr2hex((unsigned)c >> 8);
636 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000638 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000639 buf[++i] = '>';
640 buf[++i] = NUL;
641}
642
643/*
644 * Convert the lower 4 bits of byte "c" to its hex character.
645 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
646 * function key 1.
647 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000648 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100649nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000650{
651 if ((c & 0xf) <= 9)
652 return (c & 0xf) + '0';
653 return (c & 0xf) - 10 + 'a';
654}
655
656/*
657 * Return number of display cells occupied by byte "b".
658 * Caller must make sure 0 <= b <= 255.
659 * For multi-byte mode "b" must be the first byte of a character.
660 * A TAB is counted as two cells: "^I".
661 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
662 * cells depends on further bytes.
663 */
664 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100665byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667 if (enc_utf8 && b >= 0x80)
668 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100669 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000670}
671
672/*
673 * Return number of display cells occupied by character "c".
674 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
675 * A TAB is counted as two cells: "^I" or four: "<09>".
676 */
677 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100678char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000679{
680 if (IS_SPECIAL(c))
681 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000682 if (c >= 0x80)
683 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100684 // UTF-8: above 0x80 need to check the value
Bram Moolenaar071d4272004-06-13 20:20:40 +0000685 if (enc_utf8)
686 return utf_char2cells(c);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100687 // DBCS: double-byte means double-width, except for euc-jp with first
688 // byte 0x8e
Bram Moolenaar071d4272004-06-13 20:20:40 +0000689 if (enc_dbcs != 0 && c >= 0x100)
690 {
691 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
692 return 1;
693 return 2;
694 }
695 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100696 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000697}
698
699/*
700 * Return number of display cells occupied by character at "*p".
701 * A TAB is counted as two cells: "^I" or four: "<09>".
702 */
703 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100704ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100706 // For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000707 if (enc_utf8 && *p >= 0x80)
708 return utf_ptr2cells(p);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100709 // For DBCS we can tell the cell count from the first byte.
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100710 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000711}
712
713/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100714 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000715 * counting TABs as two characters: "^I".
716 */
717 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100718vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000719{
720 return vim_strnsize(s, (int)MAXCOL);
721}
722
723/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100724 * Return the number of character cells string "s[len]" will take on the
725 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000726 */
727 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100728vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000729{
730 int size = 0;
731
732 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000733 if (has_mbyte)
734 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000735 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000736
737 size += ptr2cells(s);
738 s += l;
739 len -= l - 1;
740 }
741 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100743
Bram Moolenaar071d4272004-06-13 20:20:40 +0000744 return size;
745}
746
747/*
748 * Return the number of characters 'c' will take on the screen, taking
749 * into account the size of a tab.
750 * Use a define to make it fast, this is used very often!!!
751 * Also see getvcol() below.
752 */
753
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200754#ifdef FEAT_VARTABS
755# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100756 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200757 { \
758 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
759 } \
760 else \
761 return ptr2cells(p);
762#else
763# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaareed9d462021-02-15 20:38:25 +0100764 if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000765 { \
766 int ts; \
767 ts = (buf)->b_p_ts; \
768 return (int)(ts - (col % ts)); \
769 } \
770 else \
771 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200772#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000773
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100775chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000776{
777 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
778}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000779
780#ifdef FEAT_LINEBREAK
781 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100782win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000783{
784 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
785}
786#endif
787
788/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200789 * Return the number of characters the string 's' will take on the screen,
790 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000791 */
792 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100793linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000794{
Bram Moolenaardc536092010-07-18 15:45:49 +0200795 return linetabsize_col(0, s);
796}
797
798/*
799 * Like linetabsize(), but starting at column "startcol".
800 */
801 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100802linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200803{
804 colnr_T col = startcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100805 char_u *line = s; // pointer to start of line, for breakindent
Bram Moolenaar071d4272004-06-13 20:20:40 +0000806
807 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200808 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000809 return (int)col;
810}
811
812/*
813 * Like linetabsize(), but for a given window instead of the current one.
814 */
815 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100816win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000817{
818 colnr_T col = 0;
819 char_u *s;
820
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100822 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200823 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000824 return (int)col;
825}
826
827/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000828 * Return TRUE if 'c' is a normal identifier character:
829 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830 */
831 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100832vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100834 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000835}
836
837/*
Bram Moolenaare3d1f4c2021-04-06 20:21:59 +0200838 * Like vim_isIDc() but not using the 'isident' option: letters, numbers and
839 * underscore.
840 */
841 int
842vim_isNormalIDc(int c)
843{
844 return ASCII_ISALNUM(c) || c == '_';
845}
846
847/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000848 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100849 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000850 * For multi-byte characters mb_get_class() is used (builtin rules).
851 */
852 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100853vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000854{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100855 return vim_iswordc_buf(c, curbuf);
856}
857
858 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100859vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100860{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000861 if (c >= 0x100)
862 {
863 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000864 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000865 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100866 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100867 return FALSE;
868 }
869 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000870}
871
872/*
873 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
874 */
875 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100876vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100878 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000879}
880
Bram Moolenaar071d4272004-06-13 20:20:40 +0000881 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100882vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000883{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100884 int c = *p;
885
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100886 if (has_mbyte && MB_BYTE2LEN(c) > 1)
887 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100888 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000890
891/*
892 * return TRUE if 'c' is a valid file-name character
893 * Assume characters above 0x100 are valid (multi-byte).
894 */
895 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100896vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000897{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100898 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000899}
900
901/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000902 * return TRUE if 'c' is a valid file-name character or a wildcard character
903 * Assume characters above 0x100 are valid (multi-byte).
904 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
905 * returns false.
906 */
907 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100908vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000909{
910 char_u buf[2];
911
912 buf[0] = (char_u)c;
913 buf[1] = NUL;
914 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
915}
916
917/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200918 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000919 * Assume characters above 0x100 are printable (multi-byte), except for
920 * Unicode.
921 */
922 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100923vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000924{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000925 if (enc_utf8 && c >= 0x100)
926 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100927 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000928}
929
930/*
931 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
932 * byte of a double-byte character.
933 */
934 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100935vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000936{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000937 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
938 return FALSE;
939 if (enc_utf8 && c >= 0x100)
940 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100941 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000942}
943
944/*
945 * like chartabsize(), but also check for line breaks on the screen
946 */
947 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100948lbr_chartabsize(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100949 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100950 unsigned char *s,
951 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000952{
953#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +0100954 if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL
955 && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000956 {
957#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958 if (curwin->w_p_wrap)
959 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000960 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
961#ifdef FEAT_LINEBREAK
962 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200963 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000964#endif
965}
966
967/*
968 * Call lbr_chartabsize() and advance the pointer.
969 */
970 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100971lbr_chartabsize_adv(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100972 char_u *line, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100973 char_u **s,
974 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000975{
976 int retval;
977
Bram Moolenaar597a4222014-06-25 14:39:50 +0200978 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100979 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000980 return retval;
981}
982
983/*
984 * This function is used very often, keep it fast!!!!
985 *
986 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
987 * string at start of line. Warning: *headp is only set if it's a non-zero
988 * value, init to 0 before calling.
989 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100991win_lbr_chartabsize(
992 win_T *wp,
Bram Moolenaarc667da52019-11-30 20:52:27 +0100993 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100994 char_u *s,
995 colnr_T col,
996 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997{
998#ifdef FEAT_LINEBREAK
999 int c;
1000 int size;
1001 colnr_T col2;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001002 colnr_T col_adj = 0; // col + screen size of tab
Bram Moolenaar071d4272004-06-13 20:20:40 +00001003 colnr_T colmax;
1004 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001005 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001006 int numberextra;
1007 char_u *ps;
1008 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001009 int n;
Bram Moolenaaree857022019-11-09 23:26:40 +01001010 char_u *sbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001011
1012 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001013 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001014 */
Bram Moolenaaree857022019-11-09 23:26:40 +01001015 if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001016#endif
1017 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 if (wp->w_p_wrap)
1019 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001020 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1021 }
1022
1023#ifdef FEAT_LINEBREAK
1024 /*
1025 * First get normal size, without 'linebreak'
1026 */
1027 size = win_chartabsize(wp, s, col);
1028 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001029 if (tab_corr)
1030 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001031
1032 /*
1033 * If 'linebreak' set check at a blank before a non-blank if the line
1034 * needs a break here
1035 */
1036 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001037 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001038 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001039 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001040 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001041 {
1042 /*
1043 * Count all characters from first non-blank after a blank up to next
1044 * non-blank after a blank.
1045 */
1046 numberextra = win_col_off(wp);
1047 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001048 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001049 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001050 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001051 colmax += col_adj;
1052 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001053 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001054 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001055 }
1056
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057 for (;;)
1058 {
1059 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001060 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001061 c = *s;
1062 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001063 && (VIM_ISBREAK(c)
1064 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001065 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001066 break;
1067
1068 col2 += win_chartabsize(wp, s, col2);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001069 if (col2 >= colmax) // doesn't fit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001071 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001072 break;
1073 }
1074 }
1075 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1077 && wp->w_p_wrap && in_win_border(wp, col))
1078 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001079 ++size; // Count the ">" in the last column.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080 mb_added = 1;
1081 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001082
1083 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001084 * May have to add something for 'breakindent' and/or 'showbreak'
1085 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001086 * Set *headp to the size of what we add.
1087 */
1088 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001089 sbr = get_showbreak_value(wp);
1090 if ((*sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001091 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001092 colnr_T sbrlen = 0;
1093 int numberwidth = win_col_off(wp);
1094
1095 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001096 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001097 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001098 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001099 col -= wp->w_width;
1100 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001101 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001102 col %= numberextra;
Bram Moolenaaree857022019-11-09 23:26:40 +01001103 if (*sbr != NUL)
Bram Moolenaar1c852102014-10-15 21:26:40 +02001104 {
Bram Moolenaaree857022019-11-09 23:26:40 +01001105 sbrlen = (colnr_T)MB_CHARLEN(sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001106 if (col >= sbrlen)
1107 col -= sbrlen;
1108 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001109 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001110 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001111 else if (col > 0 && numberextra > 0)
1112 col += numberwidth - win_col_off2(wp);
1113
1114 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001115 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001116 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001117 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001118 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001119 if (*sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001120 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001121 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001122 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001123 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001124 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001125 int prev_width = col
1126 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001127
1128 if (width <= 0)
1129 width = (colnr_T)1;
Bram Moolenaaree857022019-11-09 23:26:40 +01001130 added += ((size - prev_width) / width) * vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001131 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001132 // wrapped, add another length of 'sbr'
Bram Moolenaaree857022019-11-09 23:26:40 +01001133 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001134 }
1135 else
Bram Moolenaaree857022019-11-09 23:26:40 +01001136 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001137 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001138 if (wp->w_p_bri)
1139 added += get_breakindent_win(wp, line);
1140
Bram Moolenaar95765082014-08-24 21:19:25 +02001141 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001142 if (col != 0)
1143 added = 0;
1144 }
1145 }
1146 if (headp != NULL)
1147 *headp = added + mb_added;
1148 return size;
1149#endif
1150}
1151
Bram Moolenaar071d4272004-06-13 20:20:40 +00001152/*
1153 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1154 * 'wrap' is on. This means we need to check for a double-byte character that
1155 * doesn't fit at the end of the screen line.
1156 */
1157 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001158win_nolbr_chartabsize(
1159 win_T *wp,
1160 char_u *s,
1161 colnr_T col,
1162 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001163{
1164 int n;
1165
Bram Moolenaareed9d462021-02-15 20:38:25 +01001166 if (*s == TAB && (!wp->w_p_list || wp->w_lcs_chars.tab1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001167 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001168# ifdef FEAT_VARTABS
1169 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1170 wp->w_buffer->b_p_vts_array);
1171# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001172 n = wp->w_buffer->b_p_ts;
1173 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001174# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001175 }
1176 n = ptr2cells(s);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001177 // Add one cell for a double-width character in the last column of the
1178 // window, displayed with a ">".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001179 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1180 {
1181 if (headp != NULL)
1182 *headp = 1;
1183 return 3;
1184 }
1185 return n;
1186}
1187
1188/*
1189 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1190 * "wp".
1191 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001192 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001193in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001194{
Bram Moolenaarc667da52019-11-30 20:52:27 +01001195 int width1; // width of first line (after line number)
1196 int width2; // width of further lines
Bram Moolenaar071d4272004-06-13 20:20:40 +00001197
Bram Moolenaarc667da52019-11-30 20:52:27 +01001198 if (wp->w_width == 0) // there is no border
Bram Moolenaar071d4272004-06-13 20:20:40 +00001199 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001200 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001201 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001202 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001203 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001204 return TRUE;
1205 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001206 if (width2 <= 0)
1207 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001208 return ((vcol - width1) % width2 == width2 - 1);
1209}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001210
1211/*
1212 * Get virtual column number of pos.
1213 * start: on the first position of this character (TAB, ctrl)
1214 * cursor: where the cursor is on this character (first char, except for TAB)
1215 * end: on the last position of this character (TAB, ctrl)
1216 *
1217 * This is used very often, keep it fast!
1218 */
1219 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001220getvcol(
1221 win_T *wp,
1222 pos_T *pos,
1223 colnr_T *start,
1224 colnr_T *cursor,
1225 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001226{
1227 colnr_T vcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001228 char_u *ptr; // points to current char
1229 char_u *posptr; // points to char at pos->col
1230 char_u *line; // start of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001231 int incr;
1232 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001233#ifdef FEAT_VARTABS
1234 int *vts = wp->w_buffer->b_p_vts_array;
1235#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001236 int ts = wp->w_buffer->b_p_ts;
1237 int c;
1238
1239 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001240 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001241 if (pos->col == MAXCOL)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001242 posptr = NULL; // continue until the NUL
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001243 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001244 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001245 // Special check for an empty line, which can happen on exit, when
1246 // ml_get_buf() always returns an empty string.
Bram Moolenaar955f1982017-02-05 15:10:51 +01001247 if (*ptr == NUL)
1248 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001249 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001250 if (has_mbyte)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001251 // always start on the first byte
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001252 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001253 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001254
1255 /*
1256 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001257 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1258 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001259 * Also use this when 'list' is set but tabs take their normal size.
1260 */
Bram Moolenaareed9d462021-02-15 20:38:25 +01001261 if ((!wp->w_p_list || wp->w_lcs_chars.tab1 != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +01001263 && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001264#endif
1265 )
1266 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267 for (;;)
1268 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 c = *ptr;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001271 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001272 if (c == NUL)
1273 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001274 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001275 break;
1276 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001277 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001278 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001279#ifdef FEAT_VARTABS
1280 incr = tabstop_padding(vcol, ts, vts);
1281#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001282 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001283#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001284 else
1285 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001286 if (has_mbyte)
1287 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001288 // For utf-8, if the byte is >= 0x80, need to look at
1289 // further bytes to find the cell width.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001290 if (enc_utf8 && c >= 0x80)
1291 incr = utf_ptr2cells(ptr);
1292 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001293 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001294
Bram Moolenaarc667da52019-11-30 20:52:27 +01001295 // If a double-cell char doesn't fit at the end of a line
1296 // it wraps to the next line, it's like this char is three
1297 // cells wide.
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001298 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1299 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001300 {
1301 ++incr;
1302 head = 1;
1303 }
1304 }
1305 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001306 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001307 }
1308
Bram Moolenaarc667da52019-11-30 20:52:27 +01001309 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310 break;
1311
1312 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001313 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001314 }
1315 }
1316 else
1317 {
1318 for (;;)
1319 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001320 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001321 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001322 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001323 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001324 if (*ptr == NUL)
1325 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001326 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001327 break;
1328 }
1329
Bram Moolenaarc667da52019-11-30 20:52:27 +01001330 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001331 break;
1332
1333 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001334 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001335 }
1336 }
1337 if (start != NULL)
1338 *start = vcol + head;
1339 if (end != NULL)
1340 *end = vcol + incr - 1;
1341 if (cursor != NULL)
1342 {
1343 if (*ptr == TAB
1344 && (State & NORMAL)
1345 && !wp->w_p_list
1346 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001347 && !(VIsual_active
1348 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001349 )
Bram Moolenaarc667da52019-11-30 20:52:27 +01001350 *cursor = vcol + incr - 1; // cursor at end
Bram Moolenaar071d4272004-06-13 20:20:40 +00001351 else
Bram Moolenaarc667da52019-11-30 20:52:27 +01001352 *cursor = vcol + head; // cursor at start
Bram Moolenaar071d4272004-06-13 20:20:40 +00001353 }
1354}
1355
1356/*
1357 * Get virtual cursor column in the current window, pretending 'list' is off.
1358 */
1359 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001360getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001361{
1362 int list_save = curwin->w_p_list;
1363 colnr_T vcol;
1364
1365 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001366 if (posp->coladd)
1367 getvvcol(curwin, posp, NULL, &vcol, NULL);
1368 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001369 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001370 curwin->w_p_list = list_save;
1371 return vcol;
1372}
1373
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374/*
1375 * Get virtual column in virtual mode.
1376 */
1377 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001378getvvcol(
1379 win_T *wp,
1380 pos_T *pos,
1381 colnr_T *start,
1382 colnr_T *cursor,
1383 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001384{
1385 colnr_T col;
1386 colnr_T coladd;
1387 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001388 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001389
1390 if (virtual_active())
1391 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001392 // For virtual mode, only want one value
Bram Moolenaar071d4272004-06-13 20:20:40 +00001393 getvcol(wp, pos, &col, NULL, NULL);
1394
1395 coladd = pos->coladd;
1396 endadd = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001397 // Cannot put the cursor on part of a wide character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001398 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001399 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001400 {
1401 int c = (*mb_ptr2char)(ptr + pos->col);
1402
1403 if (c != TAB && vim_isprintc(c))
1404 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001405 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001406 if (coladd > endadd) // past end of line
Bram Moolenaara5792f52005-11-23 21:25:05 +00001407 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001408 else
1409 coladd = 0;
1410 }
1411 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001412 col += coladd;
1413 if (start != NULL)
1414 *start = col;
1415 if (cursor != NULL)
1416 *cursor = col;
1417 if (end != NULL)
1418 *end = col + endadd;
1419 }
1420 else
1421 getvcol(wp, pos, start, cursor, end);
1422}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001423
Bram Moolenaar071d4272004-06-13 20:20:40 +00001424/*
1425 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1426 * Used for Visual block mode.
1427 */
1428 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001429getvcols(
1430 win_T *wp,
1431 pos_T *pos1,
1432 pos_T *pos2,
1433 colnr_T *left,
1434 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001435{
1436 colnr_T from1, from2, to1, to2;
1437
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001438 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001439 {
1440 getvvcol(wp, pos1, &from1, NULL, &to1);
1441 getvvcol(wp, pos2, &from2, NULL, &to2);
1442 }
1443 else
1444 {
1445 getvvcol(wp, pos2, &from1, NULL, &to1);
1446 getvvcol(wp, pos1, &from2, NULL, &to2);
1447 }
1448 if (from2 < from1)
1449 *left = from2;
1450 else
1451 *left = from1;
1452 if (to2 > to1)
1453 {
1454 if (*p_sel == 'e' && from2 - 1 >= to1)
1455 *right = from2 - 1;
1456 else
1457 *right = to2;
1458 }
1459 else
1460 *right = to1;
1461}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001462
1463/*
1464 * skipwhite: skip over ' ' and '\t'.
1465 */
1466 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001467skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001468{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001469 char_u *p = q;
1470
Bram Moolenaarc667da52019-11-30 20:52:27 +01001471 while (VIM_ISWHITE(*p)) // skip to next non-white
Bram Moolenaar071d4272004-06-13 20:20:40 +00001472 ++p;
1473 return p;
1474}
1475
1476/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001477 * getwhitecols: return the number of whitespace
1478 * columns (bytes) at the start of a given line
1479 */
1480 int
1481getwhitecols_curline()
1482{
1483 return getwhitecols(ml_get_curline());
1484}
1485
1486 int
1487getwhitecols(char_u *p)
1488{
1489 return skipwhite(p) - p;
1490}
1491
1492/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001493 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001494 */
1495 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001496skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001497{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001498 char_u *p = q;
1499
Bram Moolenaarc667da52019-11-30 20:52:27 +01001500 while (VIM_ISDIGIT(*p)) // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001501 ++p;
1502 return p;
1503}
1504
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001505#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001506/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001507 * skip over binary digits
1508 */
1509 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001510skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001511{
1512 char_u *p = q;
1513
Bram Moolenaarc667da52019-11-30 20:52:27 +01001514 while (vim_isbdigit(*p)) // skip to next non-digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001515 ++p;
1516 return p;
1517}
1518
1519/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001520 * skip over digits and hex characters
1521 */
1522 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001523skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001524{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001525 char_u *p = q;
1526
Bram Moolenaarc667da52019-11-30 20:52:27 +01001527 while (vim_isxdigit(*p)) // skip to next non-digit
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001528 ++p;
1529 return p;
1530}
1531#endif
1532
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001533/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001534 * skip to bin digit (or NUL after the string)
1535 */
1536 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001537skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001538{
1539 char_u *p = q;
1540
Bram Moolenaarc667da52019-11-30 20:52:27 +01001541 while (*p != NUL && !vim_isbdigit(*p)) // skip to next digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001542 ++p;
1543 return p;
1544}
1545
1546/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001547 * skip to digit (or NUL after the string)
1548 */
1549 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001550skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001551{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001552 char_u *p = q;
1553
Bram Moolenaarc667da52019-11-30 20:52:27 +01001554 while (*p != NUL && !VIM_ISDIGIT(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001555 ++p;
1556 return p;
1557}
1558
1559/*
1560 * skip to hex character (or NUL after the string)
1561 */
1562 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001563skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001564{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001565 char_u *p = q;
1566
Bram Moolenaarc667da52019-11-30 20:52:27 +01001567 while (*p != NUL && !vim_isxdigit(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001568 ++p;
1569 return p;
1570}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001571
Bram Moolenaar071d4272004-06-13 20:20:40 +00001572/*
1573 * Variant of isdigit() that can handle characters > 0x100.
1574 * We don't use isdigit() here, because on some systems it also considers
1575 * superscript 1 to be a digit.
1576 * Use the VIM_ISDIGIT() macro for simple arguments.
1577 */
1578 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001579vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001580{
1581 return (c >= '0' && c <= '9');
1582}
1583
1584/*
1585 * Variant of isxdigit() that can handle characters > 0x100.
1586 * We don't use isxdigit() here, because on some systems it also considers
1587 * superscript 1 to be a digit.
1588 */
1589 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001590vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001591{
1592 return (c >= '0' && c <= '9')
1593 || (c >= 'a' && c <= 'f')
1594 || (c >= 'A' && c <= 'F');
1595}
1596
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001597/*
1598 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1599 * characters > 0x100.
1600 */
1601 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001602vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001603{
1604 return (c == '0' || c == '1');
1605}
1606
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001607 static int
1608vim_isodigit(int c)
1609{
1610 return (c >= '0' && c <= '7');
1611}
1612
Bram Moolenaar78622822005-08-23 21:00:13 +00001613/*
1614 * Vim's own character class functions. These exist because many library
1615 * islower()/toupper() etc. do not work properly: they crash when used with
1616 * invalid values or can't handle latin1 when the locale is C.
1617 * Speed is most important here.
1618 */
1619#define LATIN1LOWER 'l'
1620#define LATIN1UPPER 'U'
1621
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001622static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001623static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1624static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001625
1626 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001627vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001628{
1629 if (c <= '@')
1630 return FALSE;
1631 if (c >= 0x80)
1632 {
1633 if (enc_utf8)
1634 return utf_islower(c);
1635 if (c >= 0x100)
1636 {
1637#ifdef HAVE_ISWLOWER
1638 if (has_mbyte)
1639 return iswlower(c);
1640#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001641 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001642 return FALSE;
1643 }
1644 if (enc_latin1like)
1645 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1646 }
1647 return islower(c);
1648}
1649
1650 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001651vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001652{
1653 if (c <= '@')
1654 return FALSE;
1655 if (c >= 0x80)
1656 {
1657 if (enc_utf8)
1658 return utf_isupper(c);
1659 if (c >= 0x100)
1660 {
1661#ifdef HAVE_ISWUPPER
1662 if (has_mbyte)
1663 return iswupper(c);
1664#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001665 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001666 return FALSE;
1667 }
1668 if (enc_latin1like)
1669 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1670 }
1671 return isupper(c);
1672}
1673
1674 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001675vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001676{
1677 if (c <= '@')
1678 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001679 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001680 {
1681 if (enc_utf8)
1682 return utf_toupper(c);
1683 if (c >= 0x100)
1684 {
1685#ifdef HAVE_TOWUPPER
1686 if (has_mbyte)
1687 return towupper(c);
1688#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001689 // toupper() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001690 return c;
1691 }
1692 if (enc_latin1like)
1693 return latin1upper[c];
1694 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001695 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1696 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001697 return TOUPPER_LOC(c);
1698}
1699
1700 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001701vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001702{
1703 if (c <= '@')
1704 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001705 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001706 {
1707 if (enc_utf8)
1708 return utf_tolower(c);
1709 if (c >= 0x100)
1710 {
1711#ifdef HAVE_TOWLOWER
1712 if (has_mbyte)
1713 return towlower(c);
1714#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001715 // tolower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001716 return c;
1717 }
1718 if (enc_latin1like)
1719 return latin1lower[c];
1720 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001721 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1722 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001723 return TOLOWER_LOC(c);
1724}
Bram Moolenaar78622822005-08-23 21:00:13 +00001725
Bram Moolenaar071d4272004-06-13 20:20:40 +00001726/*
1727 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1728 */
1729 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001730skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001731{
1732 while (*p != ' ' && *p != '\t' && *p != NUL)
1733 ++p;
1734 return p;
1735}
1736
Bram Moolenaar071d4272004-06-13 20:20:40 +00001737/*
1738 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1739 */
1740 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001741skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001742{
1743 while (*p != ' ' && *p != '\t' && *p != NUL)
1744 {
1745 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1746 ++p;
1747 ++p;
1748 }
1749 return p;
1750}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001751
1752/*
1753 * Getdigits: Get a number from a string and skip over it.
1754 * Note: the argument is a pointer to a char_u pointer!
1755 */
1756 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001757getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001758{
1759 char_u *p;
1760 long retval;
1761
1762 p = *pp;
1763 retval = atol((char *)p);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001764 if (*p == '-') // skip negative sign
Bram Moolenaar071d4272004-06-13 20:20:40 +00001765 ++p;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001766 p = skipdigits(p); // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001767 *pp = p;
1768 return retval;
1769}
1770
1771/*
1772 * Return TRUE if "lbuf" is empty or only contains blanks.
1773 */
1774 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001775vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001776{
1777 char_u *p;
1778
1779 p = skipwhite(lbuf);
1780 return (*p == NUL || *p == '\r' || *p == '\n');
1781}
1782
1783/*
1784 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001785 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1786 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001787 * 0 decimal
1788 * '0' octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001789 * 'O' octal
1790 * 'o' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001791 * 'B' bin
1792 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001793 * 'X' hex
1794 * 'x' hex
1795 * If "len" is not NULL, the length of the number in characters is returned.
1796 * If "nptr" is not NULL, the signed result is returned in it.
1797 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001798 * If "what" contains STR2NR_BIN recognize binary numbers
1799 * If "what" contains STR2NR_OCT recognize octal numbers
1800 * If "what" contains STR2NR_HEX recognize hex numbers
1801 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001802 * If "what" contains STR2NR_QUOTE ignore embedded single quotes
Bram Moolenaarce157752017-10-28 16:07:33 +02001803 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001804 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001805 */
1806 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001807vim_str2nr(
1808 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001809 int *prep, // return: type of number 0 = decimal, 'x'
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001810 // or 'X' is hex, '0', 'o' or 'O' is octal,
1811 // 'b' or 'B' is bin
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001812 int *len, // return: detected length of number
1813 int what, // what numbers to recognize
1814 varnumber_T *nptr, // return: signed result
1815 uvarnumber_T *unptr, // return: unsigned result
1816 int maxlen, // max length of string to check
1817 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001818{
1819 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001820 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001821 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001822 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001823 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001824
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001825 if (len != NULL)
1826 *len = 0;
1827
Bram Moolenaar071d4272004-06-13 20:20:40 +00001828 if (ptr[0] == '-')
1829 {
1830 negative = TRUE;
1831 ++ptr;
1832 }
1833
Bram Moolenaarc667da52019-11-30 20:52:27 +01001834 // Recognize hex, octal, and bin.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001835 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1836 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001837 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001838 pre = ptr[1];
1839 if ((what & STR2NR_HEX)
1840 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1841 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001842 // hexadecimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001843 ptr += 2;
1844 else if ((what & STR2NR_BIN)
1845 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1846 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001847 // binary
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001848 ptr += 2;
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001849 else if ((what & STR2NR_OOCT)
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001850 && (pre == 'O' || pre == 'o') && vim_isodigit(ptr[2])
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001851 && (maxlen == 0 || maxlen > 2))
1852 // octal with prefix "0o"
1853 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001854 else
1855 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001856 // decimal or octal, default is decimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001857 pre = 0;
1858 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001859 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001860 // Don't interpret "0", "08" or "0129" as octal.
Bram Moolenaarce157752017-10-28 16:07:33 +02001861 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001862 {
1863 if (ptr[n] > '7')
1864 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001865 pre = 0; // can't be octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001866 break;
1867 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001868 pre = '0'; // assume octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001869 }
1870 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001871 }
1872 }
1873
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001874 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001875 n = 1;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001876 if (pre == 'B' || pre == 'b'
1877 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001878 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001879 // bin
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001880 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001881 n += 2; // skip over "0b"
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001882 while ('0' <= *ptr && *ptr <= '1')
1883 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001884 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001885 if (un <= UVARNUM_MAX / 2)
1886 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001887 else
1888 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001889 ++ptr;
1890 if (n++ == maxlen)
1891 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001892 if ((what & STR2NR_QUOTE) && *ptr == '\''
1893 && '0' <= ptr[1] && ptr[1] <= '1')
1894 {
1895 ++ptr;
1896 if (n++ == maxlen)
1897 break;
1898 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001899 }
1900 }
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001901 else if (pre == 'O' || pre == 'o' ||
1902 pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001903 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001904 // octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001905 if (pre != 0 && pre != '0')
1906 n += 2; // skip over "0o"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001907 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001908 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001909 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001910 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001911 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1912 else
1913 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001914 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001915 if (n++ == maxlen)
1916 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001917 if ((what & STR2NR_QUOTE) && *ptr == '\''
1918 && '0' <= ptr[1] && ptr[1] <= '7')
1919 {
1920 ++ptr;
1921 if (n++ == maxlen)
1922 break;
1923 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001924 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001925 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001926 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001927 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001928 // hex
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001929 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001930 n += 2; // skip over "0x"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001931 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001932 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001933 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001934 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001935 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1936 else
1937 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001938 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001939 if (n++ == maxlen)
1940 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001941 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1942 {
1943 ++ptr;
1944 if (n++ == maxlen)
1945 break;
1946 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001947 }
1948 }
1949 else
1950 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001951 // decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001952 while (VIM_ISDIGIT(*ptr))
1953 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001954 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1955
Bram Moolenaarc667da52019-11-30 20:52:27 +01001956 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001957 if (un < UVARNUM_MAX / 10
1958 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1959 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001960 else
1961 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001962 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001963 if (n++ == maxlen)
1964 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001965 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
1966 {
1967 ++ptr;
1968 if (n++ == maxlen)
1969 break;
1970 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001971 }
1972 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001973
Bram Moolenaar4b96df52020-01-26 22:00:26 +01001974 // Check for an alphanumeric character immediately following, that is
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001975 // most likely a typo.
1976 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1977 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001978
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001979 if (prep != NULL)
1980 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001981 if (len != NULL)
1982 *len = (int)(ptr - start);
1983 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001984 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001985 if (negative) // account for leading '-' for decimal numbers
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001986 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001987 // avoid ubsan error for overflow
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001988 if (un > VARNUM_MAX)
1989 *nptr = VARNUM_MIN;
1990 else
1991 *nptr = -(varnumber_T)un;
1992 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001993 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001994 {
1995 if (un > VARNUM_MAX)
1996 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001997 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001998 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001999 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002000 if (unptr != NULL)
2001 *unptr = un;
2002}
2003
2004/*
2005 * Return the value of a single hex character.
2006 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2007 */
2008 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002009hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002010{
2011 if (c >= 'a' && c <= 'f')
2012 return c - 'a' + 10;
2013 if (c >= 'A' && c <= 'F')
2014 return c - 'A' + 10;
2015 return c - '0';
2016}
2017
Bram Moolenaar8b9abfd2021-03-29 20:49:05 +02002018#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) \
2019 || defined(PROTO) || defined(FEAT_AUTOSHELLDIR)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002020/*
2021 * Convert two hex characters to a byte.
2022 * Return -1 if one of the characters is not hex.
2023 */
2024 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002025hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002026{
2027 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2028 return -1;
2029 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2030}
2031#endif
2032
2033/*
2034 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002035 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00002036 * backslash is not a normal file name character.
2037 * '$' is a valid file name character, we don't remove the backslash before
2038 * it. This means it is not possible to use an environment variable after a
2039 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2040 * Although "\ name" is valid, the backslash in "Program\ files" must be
2041 * removed. Assume a file name doesn't start with a space.
2042 * For multi-byte names, never remove a backslash before a non-ascii
2043 * character, assume that all multi-byte characters are valid file name
2044 * characters.
2045 */
2046 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002047rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002048{
2049#ifdef BACKSLASH_IN_FILENAME
2050 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002051 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002052 && (str[1] == ' '
2053 || (str[1] != NUL
2054 && str[1] != '*'
2055 && str[1] != '?'
2056 && !vim_isfilec(str[1]))));
2057#else
2058 return (str[0] == '\\' && str[1] != NUL);
2059#endif
2060}
2061
2062/*
2063 * Halve the number of backslashes in a file name argument.
2064 * For MS-DOS we only do this if the character after the backslash
2065 * is not a normal file character.
2066 */
2067 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002068backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002069{
2070 for ( ; *p; ++p)
2071 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002072 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002073}
2074
2075/*
2076 * backslash_halve() plus save the result in allocated memory.
Bram Moolenaare2c453d2019-08-21 14:37:09 +02002077 * However, returns "p" when out of memory.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002078 */
2079 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002080backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002081{
2082 char_u *res;
2083
2084 res = vim_strsave(p);
2085 if (res == NULL)
2086 return p;
2087 backslash_halve(res);
2088 return res;
2089}
2090
2091#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2092/*
2093 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2094 * The first 64 entries have been added to map control characters defined in
2095 * ascii.h
2096 */
2097static char_u ebcdic2ascii_tab[256] =
2098{
2099 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2100 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2101 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2102 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2103 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2104 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2105 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2106 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2107 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2108 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2109 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2110 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2111 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2112 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2113 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2114 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2115 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2116 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2117 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2118 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2119 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2120 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2121 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2122 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2123 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2124 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2125 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2126 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2127 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2128 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2129 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2130 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2131};
2132
2133/*
2134 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2135 * wanting 7-bit ASCII characters out the other end.
2136 */
2137 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002138ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002139{
2140 int i;
2141
2142 for (i = 0; i < len; i++)
2143 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2144}
2145#endif