blob: 4289360e474a9c7ab397ac092143a6cc02467362 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10#include "vim.h"
11
Bram Moolenaar13505972019-01-24 15:04:48 +010012#if defined(HAVE_WCHAR_H)
Bram Moolenaarc667da52019-11-30 20:52:27 +010013# include <wchar.h> // for towupper() and towlower()
Bram Moolenaar071d4272004-06-13 20:20:40 +000014#endif
Bram Moolenaar13505972019-01-24 15:04:48 +010015static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +000016
Bram Moolenaarf28dbce2016-01-29 22:03:47 +010017static unsigned nr2hex(unsigned c);
Bram Moolenaar071d4272004-06-13 20:20:40 +000018
19static int chartab_initialized = FALSE;
20
Bram Moolenaarc667da52019-11-30 20:52:27 +010021// b_chartab[] is an array of 32 bytes, each bit representing one of the
22// characters 0-255.
Bram Moolenaar071d4272004-06-13 20:20:40 +000023#define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24#define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25#define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26
Bram Moolenaarc667da52019-11-30 20:52:27 +010027// table used below, see init_chartab() for an explanation
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010028static char_u g_chartab[256];
29
Bram Moolenaar071d4272004-06-13 20:20:40 +000030/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010031 * Flags for g_chartab[].
32 */
Bram Moolenaarc667da52019-11-30 20:52:27 +010033#define CT_CELL_MASK 0x07 // mask: nr of display cells (1, 2 or 4)
34#define CT_PRINT_CHAR 0x10 // flag: set for printable chars
35#define CT_ID_CHAR 0x20 // flag: set for ID chars
36#define CT_FNAME_CHAR 0x40 // flag: set for file name chars
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010037
Bram Moolenaar5843f5f2019-08-20 20:13:45 +020038static int in_win_border(win_T *wp, colnr_T vcol);
39
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010040/*
41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
Bram Moolenaar071d4272004-06-13 20:20:40 +000042 * characters for current buffer.
43 *
44 * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45 * 'isprint' and 'encoding'.
46 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010047 * The index in g_chartab[] depends on 'encoding':
Bram Moolenaar071d4272004-06-13 20:20:40 +000048 * - For non-multi-byte index with the byte (same as the character).
49 * - For DBCS index with the first byte.
50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51 * the same as the character, if the first byte is 0x80 and above it depends
52 * on further bytes).
53 *
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010054 * The contents of g_chartab[]:
Bram Moolenaar071d4272004-06-13 20:20:40 +000055 * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58 * translate the character before displaying it). Note that only DBCS
59 * characters can have 2 display cells and still be printable.
60 * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61 * - CT_ID_CHAR bit is set when the character can be in an identifier.
62 *
63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64 * error, OK otherwise.
65 */
66 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010067init_chartab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +000068{
69 return buf_init_chartab(curbuf, TRUE);
70}
71
72 int
Bram Moolenaar7454a062016-01-30 15:14:10 +010073buf_init_chartab(
74 buf_T *buf,
Bram Moolenaarc667da52019-11-30 20:52:27 +010075 int global) // FALSE: only set buf->b_chartab[]
Bram Moolenaar071d4272004-06-13 20:20:40 +000076{
77 int c;
78 int c2;
79 char_u *p;
80 int i;
81 int tilde;
82 int do_isalpha;
83
84 if (global)
85 {
86 /*
87 * Set the default size for printable characters:
88 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 *
91 * EBCDIC: all chars below ' ' are not printable, all others are
92 * printable.
93 */
94 c = 0;
95 while (c < ' ')
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +010096 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +000097#ifdef EBCDIC
98 while (c < 255)
99#else
100 while (c <= '~')
101#endif
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100102 g_chartab[c++] = 1 + CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000103 while (c < 256)
104 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100105 // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106 if (enc_utf8 && c >= 0xa0)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100107 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100108 // euc-jp characters starting with 0x8e are single width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100110 g_chartab[c++] = CT_PRINT_CHAR + 1;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100111 // other double-byte chars can be printable AND double-width
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100113 g_chartab[c++] = CT_PRINT_CHAR + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000114 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100115 // the rest is unprintable by default
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 }
118
Bram Moolenaarc667da52019-11-30 20:52:27 +0100119 // Assume that every multi-byte char is a filename character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 for (c = 1; c < 256; ++c)
121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 || (enc_utf8 && c >= 0xa0))
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100124 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125 }
126
127 /*
128 * Init word char flags all to FALSE
129 */
Bram Moolenaara80faa82020-04-12 19:37:17 +0200130 CLEAR_FIELD(buf->b_chartab);
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000131 if (enc_dbcs != 0)
132 for (c = 0; c < 256; ++c)
133 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100134 // double-byte characters are probably word characters
Bram Moolenaar6bb68362005-03-22 23:03:44 +0000135 if (MB_BYTE2LEN(c) == 2)
136 SET_CHARTAB(buf, c);
137 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000138
139#ifdef FEAT_LISP
140 /*
141 * In lisp mode the '-' character is included in keywords.
142 */
143 if (buf->b_p_lisp)
144 SET_CHARTAB(buf, '-');
145#endif
146
Bram Moolenaarc667da52019-11-30 20:52:27 +0100147 // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148 // options Each option is a list of characters, character numbers or
149 // ranges, separated by commas, e.g.: "200-210,x,#-178,-"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000150 for (i = global ? 0 : 3; i <= 3; ++i)
151 {
152 if (i == 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100153 p = p_isi; // first round: 'isident'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154 else if (i == 1)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100155 p = p_isp; // second round: 'isprint'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000156 else if (i == 2)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100157 p = p_isf; // third round: 'isfname'
158 else // i == 3
159 p = buf->b_p_isk; // fourth round: 'iskeyword'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000160
161 while (*p)
162 {
163 tilde = FALSE;
164 do_isalpha = FALSE;
165 if (*p == '^' && p[1] != NUL)
166 {
167 tilde = TRUE;
168 ++p;
169 }
170 if (VIM_ISDIGIT(*p))
171 c = getdigits(&p);
172 else
Bram Moolenaar183bb3e2009-09-11 12:02:34 +0000173 if (has_mbyte)
174 c = mb_ptr2char_adv(&p);
175 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000176 c = *p++;
177 c2 = -1;
178 if (*p == '-' && p[1] != NUL)
179 {
180 ++p;
181 if (VIM_ISDIGIT(*p))
182 c2 = getdigits(&p);
183 else
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000184 if (has_mbyte)
185 c2 = mb_ptr2char_adv(&p);
186 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000187 c2 = *p++;
188 }
Bram Moolenaar2ac5e602009-11-03 15:04:20 +0000189 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
Bram Moolenaar071d4272004-06-13 20:20:40 +0000190 || !(*p == NUL || *p == ','))
191 return FAIL;
192
Bram Moolenaarc667da52019-11-30 20:52:27 +0100193 if (c2 == -1) // not a range
Bram Moolenaar071d4272004-06-13 20:20:40 +0000194 {
195 /*
196 * A single '@' (not "@-@"):
197 * Decide on letters being ID/printable/keyword chars with
198 * standard function isalpha(). This takes care of locale for
199 * single-byte characters).
200 */
201 if (c == '@')
202 {
203 do_isalpha = TRUE;
204 c = 1;
205 c2 = 255;
206 }
207 else
208 c2 = c;
209 }
210 while (c <= c2)
211 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100212 // Use the MB_ functions here, because isalpha() doesn't
213 // work properly when 'encoding' is "latin1" and the locale is
214 // "C".
Bram Moolenaar14184a32019-02-16 15:10:30 +0100215 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000216 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100217 if (i == 0) // (re)set ID flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000218 {
219 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100220 g_chartab[c] &= ~CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000221 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100222 g_chartab[c] |= CT_ID_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000223 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100224 else if (i == 1) // (re)set printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000225 {
226 if ((c < ' '
227#ifndef EBCDIC
228 || c > '~'
229#endif
Bram Moolenaar13505972019-01-24 15:04:48 +0100230 // For double-byte we keep the cell width, so
231 // that we can detect it from the first byte.
232 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000233 {
234 if (tilde)
235 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100236 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000237 + ((dy_flags & DY_UHEX) ? 4 : 2);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100238 g_chartab[c] &= ~CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000239 }
240 else
241 {
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100242 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
243 g_chartab[c] |= CT_PRINT_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000244 }
245 }
246 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100247 else if (i == 2) // (re)set fname flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000248 {
249 if (tilde)
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100250 g_chartab[c] &= ~CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000251 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100252 g_chartab[c] |= CT_FNAME_CHAR;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100254 else // i == 3 (re)set keyword flag
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 {
256 if (tilde)
257 RESET_CHARTAB(buf, c);
258 else
259 SET_CHARTAB(buf, c);
260 }
261 }
262 ++c;
263 }
Bram Moolenaar309379f2013-02-06 16:26:26 +0100264
265 c = *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000266 p = skip_to_option_part(p);
Bram Moolenaar309379f2013-02-06 16:26:26 +0100267 if (c == ',' && *p == NUL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100268 // Trailing comma is not allowed.
Bram Moolenaar309379f2013-02-06 16:26:26 +0100269 return FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000270 }
271 }
272 chartab_initialized = TRUE;
273 return OK;
274}
275
276/*
277 * Translate any special characters in buf[bufsize] in-place.
278 * The result is a string with only printable characters, but if there is not
279 * enough room, not all characters will be translated.
280 */
281 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100282trans_characters(
283 char_u *buf,
284 int bufsize)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000285{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100286 int len; // length of string needing translation
287 int room; // room in buffer after string
288 char_u *trs; // translated character
289 int trs_len; // length of trs[]
Bram Moolenaar071d4272004-06-13 20:20:40 +0000290
291 len = (int)STRLEN(buf);
292 room = bufsize - len;
293 while (*buf != 0)
294 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100295 // Assume a multi-byte character doesn't need translation.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000296 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297 len -= trs_len;
298 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000299 {
300 trs = transchar_byte(*buf);
301 trs_len = (int)STRLEN(trs);
302 if (trs_len > 1)
303 {
304 room -= trs_len - 1;
305 if (room <= 0)
306 return;
307 mch_memmove(buf + trs_len, buf + 1, (size_t)len);
308 }
309 mch_memmove(buf, trs, (size_t)trs_len);
310 --len;
311 }
312 buf += trs_len;
313 }
314}
315
Bram Moolenaar071d4272004-06-13 20:20:40 +0000316/*
317 * Translate a string into allocated memory, replacing special chars with
318 * printable chars. Returns NULL when out of memory.
319 */
320 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100321transstr(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000322{
323 char_u *res;
324 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325 int l, len, c;
326 char_u hexbuf[11];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000327
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328 if (has_mbyte)
329 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100330 // Compute the length of the result, taking account of unprintable
331 // multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000332 len = 0;
333 p = s;
334 while (*p != NUL)
335 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000336 if ((l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000337 {
338 c = (*mb_ptr2char)(p);
339 p += l;
340 if (vim_isprintc(c))
341 len += l;
342 else
343 {
344 transchar_hex(hexbuf, c);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000345 len += (int)STRLEN(hexbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346 }
347 }
348 else
349 {
350 l = byte2cells(*p++);
351 if (l > 0)
352 len += l;
353 else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100354 len += 4; // illegal byte sequence
Bram Moolenaar071d4272004-06-13 20:20:40 +0000355 }
356 }
Bram Moolenaar964b3742019-05-24 18:54:09 +0200357 res = alloc(len + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000358 }
359 else
Bram Moolenaar964b3742019-05-24 18:54:09 +0200360 res = alloc(vim_strsize(s) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000361 if (res != NULL)
362 {
363 *res = NUL;
364 p = s;
365 while (*p != NUL)
366 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000367 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000368 {
369 c = (*mb_ptr2char)(p);
370 if (vim_isprintc(c))
Bram Moolenaarc667da52019-11-30 20:52:27 +0100371 STRNCAT(res, p, l); // append printable multi-byte char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000372 else
373 transchar_hex(res + STRLEN(res), c);
374 p += l;
375 }
376 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000377 STRCAT(res, transchar_byte(*p++));
378 }
379 }
380 return res;
381}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000382
Bram Moolenaar071d4272004-06-13 20:20:40 +0000383/*
Bram Moolenaar217ad922005-03-20 22:37:15 +0000384 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the
385 * current locale.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000386 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
387 * Otherwise puts the result in "buf[buflen]".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000388 */
389 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100390str_foldcase(
391 char_u *str,
392 int orglen,
393 char_u *buf,
394 int buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000395{
396 garray_T ga;
397 int i;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000398 int len = orglen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000399
400#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
401#define GA_PTR(i) ((char_u *)ga.ga_data + i)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000402#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
403#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
Bram Moolenaarc667da52019-11-30 20:52:27 +0100405 // Copy "str" into "buf" or allocated memory, unmodified.
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000406 if (buf == NULL)
407 {
408 ga_init2(&ga, 1, 10);
409 if (ga_grow(&ga, len + 1) == FAIL)
410 return NULL;
411 mch_memmove(ga.ga_data, str, (size_t)len);
412 ga.ga_len = len;
413 }
414 else
415 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100416 if (len >= buflen) // Ugly!
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000417 len = buflen - 1;
418 mch_memmove(buf, str, (size_t)len);
419 }
420 if (buf == NULL)
421 GA_CHAR(len) = NUL;
422 else
423 buf[len] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424
Bram Moolenaarc667da52019-11-30 20:52:27 +0100425 // Make each character lower case.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426 i = 0;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000427 while (STR_CHAR(i) != NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000429 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000430 {
431 if (enc_utf8)
432 {
Bram Moolenaarb9839212008-06-28 11:03:50 +0000433 int c = utf_ptr2char(STR_PTR(i));
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100434 int olen = utf_ptr2len(STR_PTR(i));
Bram Moolenaarb9839212008-06-28 11:03:50 +0000435 int lc = utf_tolower(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000436
Bram Moolenaarc667da52019-11-30 20:52:27 +0100437 // Only replace the character when it is not an invalid
438 // sequence (ASCII character or more than one byte) and
439 // utf_tolower() doesn't return the original character.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100440 if ((c < 0x80 || olen > 1) && c != lc)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000441 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100442 int nlen = utf_char2len(lc);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000443
Bram Moolenaarc667da52019-11-30 20:52:27 +0100444 // If the byte length changes need to shift the following
445 // characters forward or backward.
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100446 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000447 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100448 if (nlen > olen)
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000449 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100450 if (buf == NULL
451 ? ga_grow(&ga, nlen - olen + 1) == FAIL
452 : len + nlen - olen >= buflen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100454 // out of memory, keep old char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000455 lc = c;
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100456 nlen = olen;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000457 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000458 }
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100459 if (olen != nlen)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000460 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000461 if (buf == NULL)
462 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100463 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
464 ga.ga_len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000465 }
466 else
467 {
Bram Moolenaar70b2a562012-01-10 22:26:17 +0100468 STRMOVE(buf + i + nlen, buf + i + olen);
469 len += nlen - olen;
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000470 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000471 }
472 }
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000473 (void)utf_char2bytes(lc, STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000474 }
475 }
Bram Moolenaarc667da52019-11-30 20:52:27 +0100476 // skip to next multi-byte char
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000477 i += (*mb_ptr2len)(STR_PTR(i));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000478 }
479 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000480 {
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000481 if (buf == NULL)
482 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
483 else
484 buf[i] = TOLOWER_LOC(buf[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000485 ++i;
486 }
487 }
488
Bram Moolenaar6ebb1142005-01-25 21:58:26 +0000489 if (buf == NULL)
490 return (char_u *)ga.ga_data;
491 return buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000492}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000493
494/*
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100495 * Catch 22: g_chartab[] can't be initialized before the options are
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 * initialized, and initializing options may cause transchar() to be called!
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100497 * When chartab_initialized == FALSE don't use g_chartab[].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000498 * Does NOT work for multi-byte characters, c must be <= 255.
499 * Also doesn't work for the first byte of a multi-byte, "c" must be a
500 * character!
501 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200502static char_u transchar_charbuf[7];
Bram Moolenaar071d4272004-06-13 20:20:40 +0000503
504 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100505transchar(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506{
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200507 return transchar_buf(curbuf, c);
508}
509
510 char_u *
511transchar_buf(buf_T *buf, int c)
512{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000513 int i;
514
515 i = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100516 if (IS_SPECIAL(c)) // special key code, display as ~@ char
Bram Moolenaar071d4272004-06-13 20:20:40 +0000517 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200518 transchar_charbuf[0] = '~';
519 transchar_charbuf[1] = '@';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000520 i = 2;
521 c = K_SECOND(c);
522 }
523
524 if ((!chartab_initialized && (
525#ifdef EBCDIC
526 (c >= 64 && c < 255)
527#else
528 (c >= ' ' && c <= '~')
529#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000530 )) || (c < 256 && vim_isprintc_strict(c)))
531 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100532 // printable character
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200533 transchar_charbuf[i] = c;
534 transchar_charbuf[i + 1] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000535 }
536 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200537 transchar_nonprint(buf, transchar_charbuf + i, c);
538 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000539}
540
Bram Moolenaar071d4272004-06-13 20:20:40 +0000541/*
542 * Like transchar(), but called with a byte instead of a character. Checks
543 * for an illegal UTF-8 byte.
544 */
545 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +0100546transchar_byte(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000547{
548 if (enc_utf8 && c >= 0x80)
549 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200550 transchar_nonprint(curbuf, transchar_charbuf, c);
551 return transchar_charbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000552 }
553 return transchar(c);
554}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000555
556/*
557 * Convert non-printable character to two or more printable characters in
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200558 * "buf[]". "charbuf" needs to be able to hold five bytes.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000559 * Does NOT work for multi-byte characters, c must be <= 255.
560 */
561 void
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200562transchar_nonprint(buf_T *buf, char_u *charbuf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563{
564 if (c == NL)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100565 c = NUL; // we use newline in place of a NUL
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200566 else if (c == CAR && get_fileformat(buf) == EOL_MAC)
Bram Moolenaarc667da52019-11-30 20:52:27 +0100567 c = NL; // we use CR in place of NL in this case
Bram Moolenaar071d4272004-06-13 20:20:40 +0000568
Bram Moolenaarc667da52019-11-30 20:52:27 +0100569 if (dy_flags & DY_UHEX) // 'display' has "uhex"
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200570 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000571
572#ifdef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100573 // For EBCDIC only the characters 0-63 and 255 are not printable
Bram Moolenaar071d4272004-06-13 20:20:40 +0000574 else if (CtrlChar(c) != 0 || c == DEL)
575#else
Bram Moolenaarc667da52019-11-30 20:52:27 +0100576 else if (c <= 0x7f) // 0x00 - 0x1f and 0x7f
Bram Moolenaar071d4272004-06-13 20:20:40 +0000577#endif
578 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200579 charbuf[0] = '^';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000580#ifdef EBCDIC
581 if (c == DEL)
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200582 charbuf[1] = '?'; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000583 else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200584 charbuf[1] = CtrlChar(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000585#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200586 charbuf[1] = c ^ 0x40; // DEL displayed as ^?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000587#endif
588
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200589 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000591 else if (enc_utf8 && c >= 0x80)
592 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200593 transchar_hex(charbuf, c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000594 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595#ifndef EBCDIC
Bram Moolenaarc667da52019-11-30 20:52:27 +0100596 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) // 0xa0 - 0xfe
Bram Moolenaar071d4272004-06-13 20:20:40 +0000597 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200598 charbuf[0] = '|';
599 charbuf[1] = c - 0x80;
600 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000601 }
602#else
603 else if (c < 64)
604 {
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200605 charbuf[0] = '~';
606 charbuf[1] = MetaChar(c);
607 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000608 }
609#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +0100610 else // 0x80 - 0x9f and 0xff
Bram Moolenaar071d4272004-06-13 20:20:40 +0000611 {
612 /*
613 * TODO: EBCDIC I don't know what to do with this chars, so I display
614 * them as '~?' for now
615 */
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200616 charbuf[0] = '~';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000617#ifdef EBCDIC
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200618 charbuf[1] = '?'; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000619#else
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200620 charbuf[1] = (c - 0x80) ^ 0x40; // 0xff displayed as ~?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000621#endif
Bram Moolenaar32ee6272020-06-10 14:16:49 +0200622 charbuf[2] = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000623 }
624}
625
626 void
Bram Moolenaar7454a062016-01-30 15:14:10 +0100627transchar_hex(char_u *buf, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000628{
629 int i = 0;
630
631 buf[0] = '<';
Bram Moolenaar071d4272004-06-13 20:20:40 +0000632 if (c > 255)
633 {
634 buf[++i] = nr2hex((unsigned)c >> 12);
635 buf[++i] = nr2hex((unsigned)c >> 8);
636 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000637 buf[++i] = nr2hex((unsigned)c >> 4);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000638 buf[++i] = nr2hex((unsigned)c);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000639 buf[++i] = '>';
640 buf[++i] = NUL;
641}
642
643/*
644 * Convert the lower 4 bits of byte "c" to its hex character.
645 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
646 * function key 1.
647 */
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000648 static unsigned
Bram Moolenaar7454a062016-01-30 15:14:10 +0100649nr2hex(unsigned c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000650{
651 if ((c & 0xf) <= 9)
652 return (c & 0xf) + '0';
653 return (c & 0xf) - 10 + 'a';
654}
655
656/*
657 * Return number of display cells occupied by byte "b".
658 * Caller must make sure 0 <= b <= 255.
659 * For multi-byte mode "b" must be the first byte of a character.
660 * A TAB is counted as two cells: "^I".
661 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
662 * cells depends on further bytes.
663 */
664 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100665byte2cells(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667 if (enc_utf8 && b >= 0x80)
668 return 0;
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100669 return (g_chartab[b] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000670}
671
672/*
673 * Return number of display cells occupied by character "c".
674 * "c" can be a special key (negative number) in which case 3 or 4 is returned.
675 * A TAB is counted as two cells: "^I" or four: "<09>".
676 */
677 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100678char2cells(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000679{
680 if (IS_SPECIAL(c))
681 return char2cells(K_SECOND(c)) + 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000682 if (c >= 0x80)
683 {
Bram Moolenaarc667da52019-11-30 20:52:27 +0100684 // UTF-8: above 0x80 need to check the value
Bram Moolenaar071d4272004-06-13 20:20:40 +0000685 if (enc_utf8)
686 return utf_char2cells(c);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100687 // DBCS: double-byte means double-width, except for euc-jp with first
688 // byte 0x8e
Bram Moolenaar071d4272004-06-13 20:20:40 +0000689 if (enc_dbcs != 0 && c >= 0x100)
690 {
691 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
692 return 1;
693 return 2;
694 }
695 }
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100696 return (g_chartab[c & 0xff] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000697}
698
699/*
700 * Return number of display cells occupied by character at "*p".
701 * A TAB is counted as two cells: "^I" or four: "<09>".
702 */
703 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100704ptr2cells(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705{
Bram Moolenaarc667da52019-11-30 20:52:27 +0100706 // For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000707 if (enc_utf8 && *p >= 0x80)
708 return utf_ptr2cells(p);
Bram Moolenaarc667da52019-11-30 20:52:27 +0100709 // For DBCS we can tell the cell count from the first byte.
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100710 return (g_chartab[*p] & CT_CELL_MASK);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000711}
712
713/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100714 * Return the number of character cells string "s" will take on the screen,
Bram Moolenaar071d4272004-06-13 20:20:40 +0000715 * counting TABs as two characters: "^I".
716 */
717 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100718vim_strsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000719{
720 return vim_strnsize(s, (int)MAXCOL);
721}
722
723/*
Bram Moolenaar06af6022012-01-26 13:40:08 +0100724 * Return the number of character cells string "s[len]" will take on the
725 * screen, counting TABs as two characters: "^I".
Bram Moolenaar071d4272004-06-13 20:20:40 +0000726 */
727 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100728vim_strnsize(char_u *s, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000729{
730 int size = 0;
731
732 while (*s != NUL && --len >= 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000733 if (has_mbyte)
734 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000735 int l = (*mb_ptr2len)(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000736
737 size += ptr2cells(s);
738 s += l;
739 len -= l - 1;
740 }
741 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 size += byte2cells(*s++);
Bram Moolenaar13505972019-01-24 15:04:48 +0100743
Bram Moolenaar071d4272004-06-13 20:20:40 +0000744 return size;
745}
746
747/*
748 * Return the number of characters 'c' will take on the screen, taking
749 * into account the size of a tab.
750 * Use a define to make it fast, this is used very often!!!
751 * Also see getvcol() below.
752 */
753
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200754#ifdef FEAT_VARTABS
755# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
756 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
757 { \
758 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
759 } \
760 else \
761 return ptr2cells(p);
762#else
763# define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
Bram Moolenaar071d4272004-06-13 20:20:40 +0000764 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
765 { \
766 int ts; \
767 ts = (buf)->b_p_ts; \
768 return (int)(ts - (col % ts)); \
769 } \
770 else \
771 return ptr2cells(p);
Bram Moolenaar04958cb2018-06-23 19:23:02 +0200772#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000773
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100775chartabsize(char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000776{
777 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
778}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000779
780#ifdef FEAT_LINEBREAK
781 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100782win_chartabsize(win_T *wp, char_u *p, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000783{
784 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
785}
786#endif
787
788/*
Bram Moolenaardc536092010-07-18 15:45:49 +0200789 * Return the number of characters the string 's' will take on the screen,
790 * taking into account the size of a tab.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000791 */
792 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100793linetabsize(char_u *s)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000794{
Bram Moolenaardc536092010-07-18 15:45:49 +0200795 return linetabsize_col(0, s);
796}
797
798/*
799 * Like linetabsize(), but starting at column "startcol".
800 */
801 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100802linetabsize_col(int startcol, char_u *s)
Bram Moolenaardc536092010-07-18 15:45:49 +0200803{
804 colnr_T col = startcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100805 char_u *line = s; // pointer to start of line, for breakindent
Bram Moolenaar071d4272004-06-13 20:20:40 +0000806
807 while (*s != NUL)
Bram Moolenaar597a4222014-06-25 14:39:50 +0200808 col += lbr_chartabsize_adv(line, &s, col);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000809 return (int)col;
810}
811
812/*
813 * Like linetabsize(), but for a given window instead of the current one.
814 */
815 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100816win_linetabsize(win_T *wp, char_u *line, colnr_T len)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000817{
818 colnr_T col = 0;
819 char_u *s;
820
Bram Moolenaar597a4222014-06-25 14:39:50 +0200821 for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100822 MB_PTR_ADV(s))
Bram Moolenaar597a4222014-06-25 14:39:50 +0200823 col += win_lbr_chartabsize(wp, line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000824 return (int)col;
825}
826
827/*
Bram Moolenaar81695252004-12-29 20:58:21 +0000828 * Return TRUE if 'c' is a normal identifier character:
829 * Letters and characters from the 'isident' option.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830 */
831 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100832vim_isIDc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100834 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000835}
836
837/*
838 * return TRUE if 'c' is a keyword character: Letters and characters from
Bram Moolenaarcaa55b62017-01-10 13:51:09 +0100839 * 'iskeyword' option for the current buffer.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840 * For multi-byte characters mb_get_class() is used (builtin rules).
841 */
842 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100843vim_iswordc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000844{
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100845 return vim_iswordc_buf(c, curbuf);
846}
847
848 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100849vim_iswordc_buf(int c, buf_T *buf)
Bram Moolenaar9d182dd2013-01-23 15:53:15 +0100850{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851 if (c >= 0x100)
852 {
853 if (enc_dbcs != 0)
Bram Moolenaar0ab2a882009-05-13 10:51:08 +0000854 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 if (enc_utf8)
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100856 return utf_class_buf(c, buf) >= 2;
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100857 return FALSE;
858 }
859 return (c > 0 && GET_CHARTAB(buf, c) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860}
861
862/*
863 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
864 */
865 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100866vim_iswordp(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000867{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100868 return vim_iswordp_buf(p, curbuf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000869}
870
Bram Moolenaar071d4272004-06-13 20:20:40 +0000871 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100872vim_iswordp_buf(char_u *p, buf_T *buf)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000873{
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100874 int c = *p;
875
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100876 if (has_mbyte && MB_BYTE2LEN(c) > 1)
877 c = (*mb_ptr2char)(p);
Bram Moolenaar4019cf92017-01-28 16:39:34 +0100878 return vim_iswordc_buf(c, buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000879}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000880
881/*
882 * return TRUE if 'c' is a valid file-name character
883 * Assume characters above 0x100 are valid (multi-byte).
884 */
885 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100886vim_isfilec(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887{
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100888 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889}
890
891/*
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000892 * return TRUE if 'c' is a valid file-name character or a wildcard character
893 * Assume characters above 0x100 are valid (multi-byte).
894 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
895 * returns false.
896 */
897 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100898vim_isfilec_or_wc(int c)
Bram Moolenaardd87969c2007-08-21 13:07:12 +0000899{
900 char_u buf[2];
901
902 buf[0] = (char_u)c;
903 buf[1] = NUL;
904 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
905}
906
907/*
Bram Moolenaar3317d5e2017-04-08 19:12:06 +0200908 * Return TRUE if 'c' is a printable character.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000909 * Assume characters above 0x100 are printable (multi-byte), except for
910 * Unicode.
911 */
912 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100913vim_isprintc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000914{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000915 if (enc_utf8 && c >= 0x100)
916 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100917 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000918}
919
920/*
921 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
922 * byte of a double-byte character.
923 */
924 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100925vim_isprintc_strict(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000926{
Bram Moolenaar071d4272004-06-13 20:20:40 +0000927 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
928 return FALSE;
929 if (enc_utf8 && c >= 0x100)
930 return utf_printable(c);
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +0100931 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000932}
933
934/*
935 * like chartabsize(), but also check for line breaks on the screen
936 */
937 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100938lbr_chartabsize(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100939 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100940 unsigned char *s,
941 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000942{
943#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +0100944 if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL
945 && !curwin->w_p_bri)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000946 {
947#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000948 if (curwin->w_p_wrap)
949 return win_nolbr_chartabsize(curwin, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000950 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
951#ifdef FEAT_LINEBREAK
952 }
Bram Moolenaar597a4222014-06-25 14:39:50 +0200953 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000954#endif
955}
956
957/*
958 * Call lbr_chartabsize() and advance the pointer.
959 */
960 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100961lbr_chartabsize_adv(
Bram Moolenaarc667da52019-11-30 20:52:27 +0100962 char_u *line, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100963 char_u **s,
964 colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000965{
966 int retval;
967
Bram Moolenaar597a4222014-06-25 14:39:50 +0200968 retval = lbr_chartabsize(line, *s, col);
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100969 MB_PTR_ADV(*s);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000970 return retval;
971}
972
973/*
974 * This function is used very often, keep it fast!!!!
975 *
976 * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
977 * string at start of line. Warning: *headp is only set if it's a non-zero
978 * value, init to 0 before calling.
979 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000980 int
Bram Moolenaar7454a062016-01-30 15:14:10 +0100981win_lbr_chartabsize(
982 win_T *wp,
Bram Moolenaarc667da52019-11-30 20:52:27 +0100983 char_u *line UNUSED, // start of the line
Bram Moolenaar7454a062016-01-30 15:14:10 +0100984 char_u *s,
985 colnr_T col,
986 int *headp UNUSED)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000987{
988#ifdef FEAT_LINEBREAK
989 int c;
990 int size;
991 colnr_T col2;
Bram Moolenaarc667da52019-11-30 20:52:27 +0100992 colnr_T col_adj = 0; // col + screen size of tab
Bram Moolenaar071d4272004-06-13 20:20:40 +0000993 colnr_T colmax;
994 int added;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995 int mb_added = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000996 int numberextra;
997 char_u *ps;
998 int tab_corr = (*s == TAB);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000999 int n;
Bram Moolenaaree857022019-11-09 23:26:40 +01001000 char_u *sbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001
1002 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001003 * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004 */
Bram Moolenaaree857022019-11-09 23:26:40 +01001005 if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001006#endif
1007 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001008 if (wp->w_p_wrap)
1009 return win_nolbr_chartabsize(wp, s, col, headp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001010 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1011 }
1012
1013#ifdef FEAT_LINEBREAK
1014 /*
1015 * First get normal size, without 'linebreak'
1016 */
1017 size = win_chartabsize(wp, s, col);
1018 c = *s;
Bram Moolenaaree739b42014-07-02 19:37:42 +02001019 if (tab_corr)
1020 col_adj = size - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001021
1022 /*
1023 * If 'linebreak' set check at a blank before a non-blank if the line
1024 * needs a break here
1025 */
1026 if (wp->w_p_lbr
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001027 && VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001028 && !VIM_ISBREAK((int)s[1])
Bram Moolenaar071d4272004-06-13 20:20:40 +00001029 && wp->w_p_wrap
Bram Moolenaar4033c552017-09-16 20:54:51 +02001030 && wp->w_width != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001031 {
1032 /*
1033 * Count all characters from first non-blank after a blank up to next
1034 * non-blank after a blank.
1035 */
1036 numberextra = win_col_off(wp);
1037 col2 = col;
Bram Moolenaar02631462017-09-22 15:20:32 +02001038 colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001039 if (col >= colmax)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001040 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001041 colmax += col_adj;
1042 n = colmax + win_col_off2(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001043 if (n > 0)
Bram Moolenaaree739b42014-07-02 19:37:42 +02001044 colmax += (((col - colmax) / n) + 1) * n - col_adj;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001045 }
1046
Bram Moolenaar071d4272004-06-13 20:20:40 +00001047 for (;;)
1048 {
1049 ps = s;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001050 MB_PTR_ADV(s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001051 c = *s;
1052 if (!(c != NUL
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001053 && (VIM_ISBREAK(c)
1054 || (!VIM_ISBREAK(c)
Bram Moolenaar977d0372017-03-12 21:31:58 +01001055 && (col2 == col || !VIM_ISBREAK((int)*ps))))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001056 break;
1057
1058 col2 += win_chartabsize(wp, s, col2);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001059 if (col2 >= colmax) // doesn't fit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001060 {
Bram Moolenaaree739b42014-07-02 19:37:42 +02001061 size = colmax - col + col_adj;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001062 break;
1063 }
1064 }
1065 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001066 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1067 && wp->w_p_wrap && in_win_border(wp, col))
1068 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001069 ++size; // Count the ">" in the last column.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070 mb_added = 1;
1071 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001072
1073 /*
Bram Moolenaar597a4222014-06-25 14:39:50 +02001074 * May have to add something for 'breakindent' and/or 'showbreak'
1075 * string at start of line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 * Set *headp to the size of what we add.
1077 */
1078 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001079 sbr = get_showbreak_value(wp);
1080 if ((*sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 {
Bram Moolenaard574ea22015-01-14 19:35:14 +01001082 colnr_T sbrlen = 0;
1083 int numberwidth = win_col_off(wp);
1084
1085 numberextra = numberwidth;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001086 col += numberextra + mb_added;
Bram Moolenaar02631462017-09-22 15:20:32 +02001087 if (col >= (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001088 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001089 col -= wp->w_width;
1090 numberextra = wp->w_width - (numberextra - win_col_off2(wp));
Bram Moolenaard574ea22015-01-14 19:35:14 +01001091 if (col >= numberextra && numberextra > 0)
Bram Moolenaarfe3c4102014-10-31 12:42:01 +01001092 col %= numberextra;
Bram Moolenaaree857022019-11-09 23:26:40 +01001093 if (*sbr != NUL)
Bram Moolenaar1c852102014-10-15 21:26:40 +02001094 {
Bram Moolenaaree857022019-11-09 23:26:40 +01001095 sbrlen = (colnr_T)MB_CHARLEN(sbr);
Bram Moolenaar1c852102014-10-15 21:26:40 +02001096 if (col >= sbrlen)
1097 col -= sbrlen;
1098 }
Bram Moolenaard574ea22015-01-14 19:35:14 +01001099 if (col >= numberextra && numberextra > 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001100 col = col % numberextra;
Bram Moolenaard574ea22015-01-14 19:35:14 +01001101 else if (col > 0 && numberextra > 0)
1102 col += numberwidth - win_col_off2(wp);
1103
1104 numberwidth -= win_col_off2(wp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001105 }
Bram Moolenaar02631462017-09-22 15:20:32 +02001106 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001107 {
Bram Moolenaar597a4222014-06-25 14:39:50 +02001108 added = 0;
Bram Moolenaaree857022019-11-09 23:26:40 +01001109 if (*sbr != NUL)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001110 {
Bram Moolenaar02631462017-09-22 15:20:32 +02001111 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
Bram Moolenaard574ea22015-01-14 19:35:14 +01001112 {
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001113 // calculate effective window width
Bram Moolenaar02631462017-09-22 15:20:32 +02001114 int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01001115 int prev_width = col
1116 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001117
1118 if (width <= 0)
1119 width = (colnr_T)1;
Bram Moolenaaree857022019-11-09 23:26:40 +01001120 added += ((size - prev_width) / width) * vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001121 if ((size - prev_width) % width)
Bram Moolenaar7833dab2019-05-27 22:01:40 +02001122 // wrapped, add another length of 'sbr'
Bram Moolenaaree857022019-11-09 23:26:40 +01001123 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001124 }
1125 else
Bram Moolenaaree857022019-11-09 23:26:40 +01001126 added += vim_strsize(sbr);
Bram Moolenaard574ea22015-01-14 19:35:14 +01001127 }
Bram Moolenaar597a4222014-06-25 14:39:50 +02001128 if (wp->w_p_bri)
1129 added += get_breakindent_win(wp, line);
1130
Bram Moolenaar95765082014-08-24 21:19:25 +02001131 size += added;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001132 if (col != 0)
1133 added = 0;
1134 }
1135 }
1136 if (headp != NULL)
1137 *headp = added + mb_added;
1138 return size;
1139#endif
1140}
1141
Bram Moolenaar071d4272004-06-13 20:20:40 +00001142/*
1143 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1144 * 'wrap' is on. This means we need to check for a double-byte character that
1145 * doesn't fit at the end of the screen line.
1146 */
1147 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001148win_nolbr_chartabsize(
1149 win_T *wp,
1150 char_u *s,
1151 colnr_T col,
1152 int *headp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001153{
1154 int n;
1155
1156 if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1157 {
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001158# ifdef FEAT_VARTABS
1159 return tabstop_padding(col, wp->w_buffer->b_p_ts,
1160 wp->w_buffer->b_p_vts_array);
1161# else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001162 n = wp->w_buffer->b_p_ts;
1163 return (int)(n - (col % n));
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001164# endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001165 }
1166 n = ptr2cells(s);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001167 // Add one cell for a double-width character in the last column of the
1168 // window, displayed with a ">".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001169 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1170 {
1171 if (headp != NULL)
1172 *headp = 1;
1173 return 3;
1174 }
1175 return n;
1176}
1177
1178/*
1179 * Return TRUE if virtual column "vcol" is in the rightmost column of window
1180 * "wp".
1181 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02001182 static int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001183in_win_border(win_T *wp, colnr_T vcol)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001184{
Bram Moolenaarc667da52019-11-30 20:52:27 +01001185 int width1; // width of first line (after line number)
1186 int width2; // width of further lines
Bram Moolenaar071d4272004-06-13 20:20:40 +00001187
Bram Moolenaarc667da52019-11-30 20:52:27 +01001188 if (wp->w_width == 0) // there is no border
Bram Moolenaar071d4272004-06-13 20:20:40 +00001189 return FALSE;
Bram Moolenaar02631462017-09-22 15:20:32 +02001190 width1 = wp->w_width - win_col_off(wp);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001191 if ((int)vcol < width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192 return FALSE;
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001193 if ((int)vcol == width1 - 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001194 return TRUE;
1195 width2 = width1 + win_col_off2(wp);
Bram Moolenaar8701cd62009-10-07 14:20:30 +00001196 if (width2 <= 0)
1197 return FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001198 return ((vcol - width1) % width2 == width2 - 1);
1199}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001200
1201/*
1202 * Get virtual column number of pos.
1203 * start: on the first position of this character (TAB, ctrl)
1204 * cursor: where the cursor is on this character (first char, except for TAB)
1205 * end: on the last position of this character (TAB, ctrl)
1206 *
1207 * This is used very often, keep it fast!
1208 */
1209 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001210getvcol(
1211 win_T *wp,
1212 pos_T *pos,
1213 colnr_T *start,
1214 colnr_T *cursor,
1215 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001216{
1217 colnr_T vcol;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001218 char_u *ptr; // points to current char
1219 char_u *posptr; // points to char at pos->col
1220 char_u *line; // start of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001221 int incr;
1222 int head;
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001223#ifdef FEAT_VARTABS
1224 int *vts = wp->w_buffer->b_p_vts_array;
1225#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001226 int ts = wp->w_buffer->b_p_ts;
1227 int c;
1228
1229 vcol = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001230 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001231 if (pos->col == MAXCOL)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001232 posptr = NULL; // continue until the NUL
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001233 else
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001234 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001235 // Special check for an empty line, which can happen on exit, when
1236 // ml_get_buf() always returns an empty string.
Bram Moolenaar955f1982017-02-05 15:10:51 +01001237 if (*ptr == NUL)
1238 pos->col = 0;
Bram Moolenaar37d619f2010-03-10 14:46:26 +01001239 posptr = ptr + pos->col;
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001240 if (has_mbyte)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001241 // always start on the first byte
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001242 posptr -= (*mb_head_off)(line, posptr);
Bram Moolenaar0c0590d2017-01-28 13:48:10 +01001243 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001244
1245 /*
1246 * This function is used very often, do some speed optimizations.
Bram Moolenaar597a4222014-06-25 14:39:50 +02001247 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1248 * use a simple loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001249 * Also use this when 'list' is set but tabs take their normal size.
1250 */
1251 if ((!wp->w_p_list || lcs_tab1 != NUL)
1252#ifdef FEAT_LINEBREAK
Bram Moolenaaree857022019-11-09 23:26:40 +01001253 && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri
Bram Moolenaar071d4272004-06-13 20:20:40 +00001254#endif
1255 )
1256 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001257 for (;;)
1258 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001259 head = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001260 c = *ptr;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001261 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262 if (c == NUL)
1263 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001264 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001265 break;
1266 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001267 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001268 if (c == TAB)
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001269#ifdef FEAT_VARTABS
1270 incr = tabstop_padding(vcol, ts, vts);
1271#else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001272 incr = ts - (vcol % ts);
Bram Moolenaar04958cb2018-06-23 19:23:02 +02001273#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001274 else
1275 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001276 if (has_mbyte)
1277 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001278 // For utf-8, if the byte is >= 0x80, need to look at
1279 // further bytes to find the cell width.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001280 if (enc_utf8 && c >= 0x80)
1281 incr = utf_ptr2cells(ptr);
1282 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001283 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001284
Bram Moolenaarc667da52019-11-30 20:52:27 +01001285 // If a double-cell char doesn't fit at the end of a line
1286 // it wraps to the next line, it's like this char is three
1287 // cells wide.
Bram Moolenaar9c33a7c2008-02-20 13:59:32 +00001288 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1289 && in_win_border(wp, vcol))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001290 {
1291 ++incr;
1292 head = 1;
1293 }
1294 }
1295 else
Bram Moolenaar88e8f9f2016-01-20 22:48:02 +01001296 incr = g_chartab[c] & CT_CELL_MASK;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001297 }
1298
Bram Moolenaarc667da52019-11-30 20:52:27 +01001299 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001300 break;
1301
1302 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001303 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001304 }
1305 }
1306 else
1307 {
1308 for (;;)
1309 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001310 // A tab gets expanded, depending on the current column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001311 head = 0;
Bram Moolenaar597a4222014-06-25 14:39:50 +02001312 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001313 // make sure we don't go past the end of the line
Bram Moolenaar071d4272004-06-13 20:20:40 +00001314 if (*ptr == NUL)
1315 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001316 incr = 1; // NUL at end of line only takes one column
Bram Moolenaar071d4272004-06-13 20:20:40 +00001317 break;
1318 }
1319
Bram Moolenaarc667da52019-11-30 20:52:27 +01001320 if (posptr != NULL && ptr >= posptr) // character at pos->col
Bram Moolenaar071d4272004-06-13 20:20:40 +00001321 break;
1322
1323 vcol += incr;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001324 MB_PTR_ADV(ptr);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001325 }
1326 }
1327 if (start != NULL)
1328 *start = vcol + head;
1329 if (end != NULL)
1330 *end = vcol + incr - 1;
1331 if (cursor != NULL)
1332 {
1333 if (*ptr == TAB
1334 && (State & NORMAL)
1335 && !wp->w_p_list
1336 && !virtual_active()
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001337 && !(VIsual_active
1338 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001339 )
Bram Moolenaarc667da52019-11-30 20:52:27 +01001340 *cursor = vcol + incr - 1; // cursor at end
Bram Moolenaar071d4272004-06-13 20:20:40 +00001341 else
Bram Moolenaarc667da52019-11-30 20:52:27 +01001342 *cursor = vcol + head; // cursor at start
Bram Moolenaar071d4272004-06-13 20:20:40 +00001343 }
1344}
1345
1346/*
1347 * Get virtual cursor column in the current window, pretending 'list' is off.
1348 */
1349 colnr_T
Bram Moolenaar7454a062016-01-30 15:14:10 +01001350getvcol_nolist(pos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001351{
1352 int list_save = curwin->w_p_list;
1353 colnr_T vcol;
1354
1355 curwin->w_p_list = FALSE;
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001356 if (posp->coladd)
1357 getvvcol(curwin, posp, NULL, &vcol, NULL);
1358 else
Bram Moolenaardb0eede2018-04-25 22:38:17 +02001359 getvcol(curwin, posp, NULL, &vcol, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001360 curwin->w_p_list = list_save;
1361 return vcol;
1362}
1363
Bram Moolenaar071d4272004-06-13 20:20:40 +00001364/*
1365 * Get virtual column in virtual mode.
1366 */
1367 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001368getvvcol(
1369 win_T *wp,
1370 pos_T *pos,
1371 colnr_T *start,
1372 colnr_T *cursor,
1373 colnr_T *end)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374{
1375 colnr_T col;
1376 colnr_T coladd;
1377 colnr_T endadd;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001378 char_u *ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001379
1380 if (virtual_active())
1381 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001382 // For virtual mode, only want one value
Bram Moolenaar071d4272004-06-13 20:20:40 +00001383 getvcol(wp, pos, &col, NULL, NULL);
1384
1385 coladd = pos->coladd;
1386 endadd = 0;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001387 // Cannot put the cursor on part of a wide character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001388 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001389 if (pos->col < (colnr_T)STRLEN(ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390 {
1391 int c = (*mb_ptr2char)(ptr + pos->col);
1392
1393 if (c != TAB && vim_isprintc(c))
1394 {
Bram Moolenaar0ab2a882009-05-13 10:51:08 +00001395 endadd = (colnr_T)(char2cells(c) - 1);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001396 if (coladd > endadd) // past end of line
Bram Moolenaara5792f52005-11-23 21:25:05 +00001397 endadd = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001398 else
1399 coladd = 0;
1400 }
1401 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001402 col += coladd;
1403 if (start != NULL)
1404 *start = col;
1405 if (cursor != NULL)
1406 *cursor = col;
1407 if (end != NULL)
1408 *end = col + endadd;
1409 }
1410 else
1411 getvcol(wp, pos, start, cursor, end);
1412}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001413
Bram Moolenaar071d4272004-06-13 20:20:40 +00001414/*
1415 * Get the leftmost and rightmost virtual column of pos1 and pos2.
1416 * Used for Visual block mode.
1417 */
1418 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001419getvcols(
1420 win_T *wp,
1421 pos_T *pos1,
1422 pos_T *pos2,
1423 colnr_T *left,
1424 colnr_T *right)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001425{
1426 colnr_T from1, from2, to1, to2;
1427
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001428 if (LT_POSP(pos1, pos2))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001429 {
1430 getvvcol(wp, pos1, &from1, NULL, &to1);
1431 getvvcol(wp, pos2, &from2, NULL, &to2);
1432 }
1433 else
1434 {
1435 getvvcol(wp, pos2, &from1, NULL, &to1);
1436 getvvcol(wp, pos1, &from2, NULL, &to2);
1437 }
1438 if (from2 < from1)
1439 *left = from2;
1440 else
1441 *left = from1;
1442 if (to2 > to1)
1443 {
1444 if (*p_sel == 'e' && from2 - 1 >= to1)
1445 *right = from2 - 1;
1446 else
1447 *right = to2;
1448 }
1449 else
1450 *right = to1;
1451}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001452
1453/*
1454 * skipwhite: skip over ' ' and '\t'.
1455 */
1456 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001457skipwhite(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001458{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001459 char_u *p = q;
1460
Bram Moolenaarc667da52019-11-30 20:52:27 +01001461 while (VIM_ISWHITE(*p)) // skip to next non-white
Bram Moolenaar071d4272004-06-13 20:20:40 +00001462 ++p;
1463 return p;
1464}
1465
1466/*
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001467 * getwhitecols: return the number of whitespace
1468 * columns (bytes) at the start of a given line
1469 */
1470 int
1471getwhitecols_curline()
1472{
1473 return getwhitecols(ml_get_curline());
1474}
1475
1476 int
1477getwhitecols(char_u *p)
1478{
1479 return skipwhite(p) - p;
1480}
1481
1482/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001483 * skip over digits
Bram Moolenaar071d4272004-06-13 20:20:40 +00001484 */
1485 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001486skipdigits(char_u *q)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001487{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001488 char_u *p = q;
1489
Bram Moolenaarc667da52019-11-30 20:52:27 +01001490 while (VIM_ISDIGIT(*p)) // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001491 ++p;
1492 return p;
1493}
1494
Bram Moolenaarc4956c82006-03-12 21:58:43 +00001495#if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001496/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001497 * skip over binary digits
1498 */
1499 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001500skipbin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001501{
1502 char_u *p = q;
1503
Bram Moolenaarc667da52019-11-30 20:52:27 +01001504 while (vim_isbdigit(*p)) // skip to next non-digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001505 ++p;
1506 return p;
1507}
1508
1509/*
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001510 * skip over digits and hex characters
1511 */
1512 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001513skiphex(char_u *q)
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001514{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001515 char_u *p = q;
1516
Bram Moolenaarc667da52019-11-30 20:52:27 +01001517 while (vim_isxdigit(*p)) // skip to next non-digit
Bram Moolenaar75c50c42005-06-04 22:06:24 +00001518 ++p;
1519 return p;
1520}
1521#endif
1522
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001523/*
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001524 * skip to bin digit (or NUL after the string)
1525 */
1526 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001527skiptobin(char_u *q)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001528{
1529 char_u *p = q;
1530
Bram Moolenaarc667da52019-11-30 20:52:27 +01001531 while (*p != NUL && !vim_isbdigit(*p)) // skip to next digit
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001532 ++p;
1533 return p;
1534}
1535
1536/*
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001537 * skip to digit (or NUL after the string)
1538 */
1539 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001540skiptodigit(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001541{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001542 char_u *p = q;
1543
Bram Moolenaarc667da52019-11-30 20:52:27 +01001544 while (*p != NUL && !VIM_ISDIGIT(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001545 ++p;
1546 return p;
1547}
1548
1549/*
1550 * skip to hex character (or NUL after the string)
1551 */
1552 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001553skiptohex(char_u *q)
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001554{
Bram Moolenaar1387a602008-07-24 19:31:11 +00001555 char_u *p = q;
1556
Bram Moolenaarc667da52019-11-30 20:52:27 +01001557 while (*p != NUL && !vim_isxdigit(*p)) // skip to next digit
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001558 ++p;
1559 return p;
1560}
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001561
Bram Moolenaar071d4272004-06-13 20:20:40 +00001562/*
1563 * Variant of isdigit() that can handle characters > 0x100.
1564 * We don't use isdigit() here, because on some systems it also considers
1565 * superscript 1 to be a digit.
1566 * Use the VIM_ISDIGIT() macro for simple arguments.
1567 */
1568 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001569vim_isdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570{
1571 return (c >= '0' && c <= '9');
1572}
1573
1574/*
1575 * Variant of isxdigit() that can handle characters > 0x100.
1576 * We don't use isxdigit() here, because on some systems it also considers
1577 * superscript 1 to be a digit.
1578 */
1579 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001580vim_isxdigit(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001581{
1582 return (c >= '0' && c <= '9')
1583 || (c >= 'a' && c <= 'f')
1584 || (c >= 'A' && c <= 'F');
1585}
1586
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001587/*
1588 * Corollary of vim_isdigit and vim_isxdigit() that can handle
1589 * characters > 0x100.
1590 */
1591 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001592vim_isbdigit(int c)
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001593{
1594 return (c == '0' || c == '1');
1595}
1596
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001597 static int
1598vim_isodigit(int c)
1599{
1600 return (c >= '0' && c <= '7');
1601}
1602
Bram Moolenaar78622822005-08-23 21:00:13 +00001603/*
1604 * Vim's own character class functions. These exist because many library
1605 * islower()/toupper() etc. do not work properly: they crash when used with
1606 * invalid values or can't handle latin1 when the locale is C.
1607 * Speed is most important here.
1608 */
1609#define LATIN1LOWER 'l'
1610#define LATIN1UPPER 'U'
1611
Bram Moolenaar6e7c7f32005-08-24 22:16:11 +00001612static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
Bram Moolenaar936347b2012-05-25 11:56:22 +02001613static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1614static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
Bram Moolenaar78622822005-08-23 21:00:13 +00001615
1616 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001617vim_islower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001618{
1619 if (c <= '@')
1620 return FALSE;
1621 if (c >= 0x80)
1622 {
1623 if (enc_utf8)
1624 return utf_islower(c);
1625 if (c >= 0x100)
1626 {
1627#ifdef HAVE_ISWLOWER
1628 if (has_mbyte)
1629 return iswlower(c);
1630#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001631 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001632 return FALSE;
1633 }
1634 if (enc_latin1like)
1635 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1636 }
1637 return islower(c);
1638}
1639
1640 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001641vim_isupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001642{
1643 if (c <= '@')
1644 return FALSE;
1645 if (c >= 0x80)
1646 {
1647 if (enc_utf8)
1648 return utf_isupper(c);
1649 if (c >= 0x100)
1650 {
1651#ifdef HAVE_ISWUPPER
1652 if (has_mbyte)
1653 return iswupper(c);
1654#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001655 // islower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001656 return FALSE;
1657 }
1658 if (enc_latin1like)
1659 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1660 }
1661 return isupper(c);
1662}
1663
1664 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001665vim_toupper(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001666{
1667 if (c <= '@')
1668 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001669 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001670 {
1671 if (enc_utf8)
1672 return utf_toupper(c);
1673 if (c >= 0x100)
1674 {
1675#ifdef HAVE_TOWUPPER
1676 if (has_mbyte)
1677 return towupper(c);
1678#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001679 // toupper() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001680 return c;
1681 }
1682 if (enc_latin1like)
1683 return latin1upper[c];
1684 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001685 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1686 return TOUPPER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001687 return TOUPPER_LOC(c);
1688}
1689
1690 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001691vim_tolower(int c)
Bram Moolenaar78622822005-08-23 21:00:13 +00001692{
1693 if (c <= '@')
1694 return c;
Bram Moolenaar3317d5e2017-04-08 19:12:06 +02001695 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
Bram Moolenaar78622822005-08-23 21:00:13 +00001696 {
1697 if (enc_utf8)
1698 return utf_tolower(c);
1699 if (c >= 0x100)
1700 {
1701#ifdef HAVE_TOWLOWER
1702 if (has_mbyte)
1703 return towlower(c);
1704#endif
Bram Moolenaarc667da52019-11-30 20:52:27 +01001705 // tolower() can't handle these chars and may crash
Bram Moolenaar78622822005-08-23 21:00:13 +00001706 return c;
1707 }
1708 if (enc_latin1like)
1709 return latin1lower[c];
1710 }
Bram Moolenaar1cc48202017-04-09 13:41:59 +02001711 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1712 return TOLOWER_ASC(c);
Bram Moolenaar78622822005-08-23 21:00:13 +00001713 return TOLOWER_LOC(c);
1714}
Bram Moolenaar78622822005-08-23 21:00:13 +00001715
Bram Moolenaar071d4272004-06-13 20:20:40 +00001716/*
1717 * skiptowhite: skip over text until ' ' or '\t' or NUL.
1718 */
1719 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001720skiptowhite(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001721{
1722 while (*p != ' ' && *p != '\t' && *p != NUL)
1723 ++p;
1724 return p;
1725}
1726
Bram Moolenaar071d4272004-06-13 20:20:40 +00001727/*
1728 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1729 */
1730 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01001731skiptowhite_esc(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001732{
1733 while (*p != ' ' && *p != '\t' && *p != NUL)
1734 {
1735 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1736 ++p;
1737 ++p;
1738 }
1739 return p;
1740}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001741
1742/*
1743 * Getdigits: Get a number from a string and skip over it.
1744 * Note: the argument is a pointer to a char_u pointer!
1745 */
1746 long
Bram Moolenaar7454a062016-01-30 15:14:10 +01001747getdigits(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001748{
1749 char_u *p;
1750 long retval;
1751
1752 p = *pp;
1753 retval = atol((char *)p);
Bram Moolenaarc667da52019-11-30 20:52:27 +01001754 if (*p == '-') // skip negative sign
Bram Moolenaar071d4272004-06-13 20:20:40 +00001755 ++p;
Bram Moolenaarc667da52019-11-30 20:52:27 +01001756 p = skipdigits(p); // skip to next non-digit
Bram Moolenaar071d4272004-06-13 20:20:40 +00001757 *pp = p;
1758 return retval;
1759}
1760
1761/*
1762 * Return TRUE if "lbuf" is empty or only contains blanks.
1763 */
1764 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001765vim_isblankline(char_u *lbuf)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001766{
1767 char_u *p;
1768
1769 p = skipwhite(lbuf);
1770 return (*p == NUL || *p == '\r' || *p == '\n');
1771}
1772
1773/*
1774 * Convert a string into a long and/or unsigned long, taking care of
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001775 * hexadecimal, octal, and binary numbers. Accepts a '-' sign.
1776 * If "prep" is not NULL, returns a flag to indicate the type of the number:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001777 * 0 decimal
1778 * '0' octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001779 * 'O' octal
1780 * 'o' octal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001781 * 'B' bin
1782 * 'b' bin
Bram Moolenaar071d4272004-06-13 20:20:40 +00001783 * 'X' hex
1784 * 'x' hex
1785 * If "len" is not NULL, the length of the number in characters is returned.
1786 * If "nptr" is not NULL, the signed result is returned in it.
1787 * If "unptr" is not NULL, the unsigned result is returned in it.
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001788 * If "what" contains STR2NR_BIN recognize binary numbers
1789 * If "what" contains STR2NR_OCT recognize octal numbers
1790 * If "what" contains STR2NR_HEX recognize hex numbers
1791 * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001792 * If "what" contains STR2NR_QUOTE ignore embedded single quotes
Bram Moolenaarce157752017-10-28 16:07:33 +02001793 * If maxlen > 0, check at a maximum maxlen chars.
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001794 * If strict is TRUE, check the number strictly. return *len = 0 if fail.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001795 */
1796 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01001797vim_str2nr(
1798 char_u *start,
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001799 int *prep, // return: type of number 0 = decimal, 'x'
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001800 // or 'X' is hex, '0', 'o' or 'O' is octal,
1801 // 'b' or 'B' is bin
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001802 int *len, // return: detected length of number
1803 int what, // what numbers to recognize
1804 varnumber_T *nptr, // return: signed result
1805 uvarnumber_T *unptr, // return: unsigned result
1806 int maxlen, // max length of string to check
1807 int strict) // check strictly
Bram Moolenaar071d4272004-06-13 20:20:40 +00001808{
1809 char_u *ptr = start;
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001810 int pre = 0; // default is decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811 int negative = FALSE;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001812 uvarnumber_T un = 0;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001813 int n;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001814
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001815 if (len != NULL)
1816 *len = 0;
1817
Bram Moolenaar071d4272004-06-13 20:20:40 +00001818 if (ptr[0] == '-')
1819 {
1820 negative = TRUE;
1821 ++ptr;
1822 }
1823
Bram Moolenaarc667da52019-11-30 20:52:27 +01001824 // Recognize hex, octal, and bin.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001825 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1826 && (maxlen == 0 || maxlen > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001827 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001828 pre = ptr[1];
1829 if ((what & STR2NR_HEX)
1830 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1831 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001832 // hexadecimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001833 ptr += 2;
1834 else if ((what & STR2NR_BIN)
1835 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1836 && (maxlen == 0 || maxlen > 2))
Bram Moolenaarc667da52019-11-30 20:52:27 +01001837 // binary
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001838 ptr += 2;
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001839 else if ((what & STR2NR_OOCT)
Bram Moolenaarc37b6552021-01-07 19:36:30 +01001840 && (pre == 'O' || pre == 'o') && vim_isodigit(ptr[2])
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001841 && (maxlen == 0 || maxlen > 2))
1842 // octal with prefix "0o"
1843 ptr += 2;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001844 else
1845 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001846 // decimal or octal, default is decimal
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001847 pre = 0;
1848 if (what & STR2NR_OCT)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001849 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001850 // Don't interpret "0", "08" or "0129" as octal.
Bram Moolenaarce157752017-10-28 16:07:33 +02001851 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001852 {
1853 if (ptr[n] > '7')
1854 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001855 pre = 0; // can't be octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001856 break;
1857 }
Bram Moolenaarc667da52019-11-30 20:52:27 +01001858 pre = '0'; // assume octal
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001859 }
1860 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001861 }
1862 }
1863
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001864 // Do the conversion manually to avoid sscanf() quirks.
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001865 n = 1;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001866 if (pre == 'B' || pre == 'b'
1867 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001868 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001869 // bin
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001870 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001871 n += 2; // skip over "0b"
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001872 while ('0' <= *ptr && *ptr <= '1')
1873 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001874 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001875 if (un <= UVARNUM_MAX / 2)
1876 un = 2 * un + (uvarnumber_T)(*ptr - '0');
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001877 else
1878 un = UVARNUM_MAX;
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001879 ++ptr;
1880 if (n++ == maxlen)
1881 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001882 if ((what & STR2NR_QUOTE) && *ptr == '\''
1883 && '0' <= ptr[1] && ptr[1] <= '1')
1884 {
1885 ++ptr;
1886 if (n++ == maxlen)
1887 break;
1888 }
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001889 }
1890 }
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001891 else if (pre == 'O' || pre == 'o' ||
1892 pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001893 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001894 // octal
Bram Moolenaarc17e66c2020-06-02 21:38:22 +02001895 if (pre != 0 && pre != '0')
1896 n += 2; // skip over "0o"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001897 while ('0' <= *ptr && *ptr <= '7')
Bram Moolenaar071d4272004-06-13 20:20:40 +00001898 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001899 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001900 if (un <= UVARNUM_MAX / 8)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001901 un = 8 * un + (uvarnumber_T)(*ptr - '0');
1902 else
1903 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001904 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001905 if (n++ == maxlen)
1906 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001907 if ((what & STR2NR_QUOTE) && *ptr == '\''
1908 && '0' <= ptr[1] && ptr[1] <= '7')
1909 {
1910 ++ptr;
1911 if (n++ == maxlen)
1912 break;
1913 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001914 }
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001915 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001916 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001917 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001918 // hex
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001919 if (pre != 0)
Bram Moolenaarc667da52019-11-30 20:52:27 +01001920 n += 2; // skip over "0x"
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001921 while (vim_isxdigit(*ptr))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001922 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001923 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001924 if (un <= UVARNUM_MAX / 16)
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001925 un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1926 else
1927 un = UVARNUM_MAX;
Bram Moolenaar5c06f8b2005-05-31 22:14:58 +00001928 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001929 if (n++ == maxlen)
1930 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001931 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1932 {
1933 ++ptr;
1934 if (n++ == maxlen)
1935 break;
1936 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001937 }
1938 }
1939 else
1940 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001941 // decimal
Bram Moolenaar071d4272004-06-13 20:20:40 +00001942 while (VIM_ISDIGIT(*ptr))
1943 {
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001944 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0');
1945
Bram Moolenaarc667da52019-11-30 20:52:27 +01001946 // avoid ubsan error for overflow
Bram Moolenaar07ccf7c2018-06-12 17:25:36 +02001947 if (un < UVARNUM_MAX / 10
1948 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1949 un = 10 * un + digit;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001950 else
1951 un = UVARNUM_MAX;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001952 ++ptr;
Bram Moolenaar5d1bc782015-07-17 13:03:48 +02001953 if (n++ == maxlen)
1954 break;
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001955 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
1956 {
1957 ++ptr;
1958 if (n++ == maxlen)
1959 break;
1960 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001961 }
1962 }
Bram Moolenaar1ac90b42019-09-15 14:49:52 +02001963
Bram Moolenaar4b96df52020-01-26 22:00:26 +01001964 // Check for an alphanumeric character immediately following, that is
Bram Moolenaar16e9b852019-05-19 19:59:35 +02001965 // most likely a typo.
1966 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1967 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001968
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01001969 if (prep != NULL)
1970 *prep = pre;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001971 if (len != NULL)
1972 *len = (int)(ptr - start);
1973 if (nptr != NULL)
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001974 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001975 if (negative) // account for leading '-' for decimal numbers
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001976 {
Bram Moolenaarc667da52019-11-30 20:52:27 +01001977 // avoid ubsan error for overflow
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001978 if (un > VARNUM_MAX)
1979 *nptr = VARNUM_MIN;
1980 else
1981 *nptr = -(varnumber_T)un;
1982 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001983 else
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001984 {
1985 if (un > VARNUM_MAX)
1986 un = VARNUM_MAX;
Bram Moolenaar22fcfad2016-07-01 18:17:26 +02001987 *nptr = (varnumber_T)un;
Bram Moolenaar7a40ea22017-01-22 18:34:57 +01001988 }
Bram Moolenaar2df6dcc2004-07-12 15:53:54 +00001989 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001990 if (unptr != NULL)
1991 *unptr = un;
1992}
1993
1994/*
1995 * Return the value of a single hex character.
1996 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1997 */
1998 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01001999hex2nr(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002000{
2001 if (c >= 'a' && c <= 'f')
2002 return c - 'a' + 10;
2003 if (c >= 'A' && c <= 'F')
2004 return c - 'A' + 10;
2005 return c - '0';
2006}
2007
Bram Moolenaar4033c552017-09-16 20:54:51 +02002008#if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002009/*
2010 * Convert two hex characters to a byte.
2011 * Return -1 if one of the characters is not hex.
2012 */
2013 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002014hexhex2nr(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002015{
2016 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2017 return -1;
2018 return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2019}
2020#endif
2021
2022/*
2023 * Return TRUE if "str" starts with a backslash that should be removed.
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002024 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
Bram Moolenaar071d4272004-06-13 20:20:40 +00002025 * backslash is not a normal file name character.
2026 * '$' is a valid file name character, we don't remove the backslash before
2027 * it. This means it is not possible to use an environment variable after a
2028 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2029 * Although "\ name" is valid, the backslash in "Program\ files" must be
2030 * removed. Assume a file name doesn't start with a space.
2031 * For multi-byte names, never remove a backslash before a non-ascii
2032 * character, assume that all multi-byte characters are valid file name
2033 * characters.
2034 */
2035 int
Bram Moolenaar7454a062016-01-30 15:14:10 +01002036rem_backslash(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002037{
2038#ifdef BACKSLASH_IN_FILENAME
2039 return (str[0] == '\\'
Bram Moolenaar071d4272004-06-13 20:20:40 +00002040 && str[1] < 0x80
Bram Moolenaar071d4272004-06-13 20:20:40 +00002041 && (str[1] == ' '
2042 || (str[1] != NUL
2043 && str[1] != '*'
2044 && str[1] != '?'
2045 && !vim_isfilec(str[1]))));
2046#else
2047 return (str[0] == '\\' && str[1] != NUL);
2048#endif
2049}
2050
2051/*
2052 * Halve the number of backslashes in a file name argument.
2053 * For MS-DOS we only do this if the character after the backslash
2054 * is not a normal file character.
2055 */
2056 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002057backslash_halve(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002058{
2059 for ( ; *p; ++p)
2060 if (rem_backslash(p))
Bram Moolenaar446cb832008-06-24 21:56:24 +00002061 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002062}
2063
2064/*
2065 * backslash_halve() plus save the result in allocated memory.
Bram Moolenaare2c453d2019-08-21 14:37:09 +02002066 * However, returns "p" when out of memory.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002067 */
2068 char_u *
Bram Moolenaar7454a062016-01-30 15:14:10 +01002069backslash_halve_save(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002070{
2071 char_u *res;
2072
2073 res = vim_strsave(p);
2074 if (res == NULL)
2075 return p;
2076 backslash_halve(res);
2077 return res;
2078}
2079
2080#if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2081/*
2082 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2083 * The first 64 entries have been added to map control characters defined in
2084 * ascii.h
2085 */
2086static char_u ebcdic2ascii_tab[256] =
2087{
2088 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2089 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2090 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2091 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2092 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2093 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2094 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2095 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2096 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2097 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2098 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2099 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2100 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2101 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2102 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2103 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2104 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2105 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2106 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2107 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2108 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2109 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2110 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2111 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2112 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2113 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2114 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2115 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2116 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2117 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2118 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2119 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2120};
2121
2122/*
2123 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if
2124 * wanting 7-bit ASCII characters out the other end.
2125 */
2126 void
Bram Moolenaar7454a062016-01-30 15:14:10 +01002127ebcdic2ascii(char_u *buffer, int len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002128{
2129 int i;
2130
2131 for (i = 0; i < len; i++)
2132 buffer[i] = ebcdic2ascii_tab[buffer[i]];
2133}
2134#endif