blob: ff201d9ffee18aeef10b8746d4d0c0331f7490e7 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
Bram Moolenaar155f2d12022-06-20 13:38:33 +010024static sig_atomic_t dummy_timeout_flag = 0;
25static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010026#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
Bram Moolenaar0f618382022-08-26 21:33:04 +010054static int timeout_nesting = 0;
55
56/*
57 * Start a timer that will cause the regexp to abort after "msec".
58 * This doesn't work well recursively. In case it happens anyway, the first
59 * set timeout will prevail, nested ones are ignored.
60 * The caller must make sure there is a matching disable_regexp_timeout() call!
61 */
Paul Ollis65745772022-06-05 16:55:54 +010062 void
63init_regexp_timeout(long msec)
64{
Bram Moolenaar0f618382022-08-26 21:33:04 +010065 if (timeout_nesting == 0)
66 timeout_flag = start_timeout(msec);
67 ++timeout_nesting;
Paul Ollis65745772022-06-05 16:55:54 +010068}
69
70 void
71disable_regexp_timeout(void)
72{
Bram Moolenaar0f618382022-08-26 21:33:04 +010073 if (timeout_nesting == 0)
74 iemsg("disable_regexp_timeout() called without active timer");
75 else if (--timeout_nesting == 0)
76 {
77 stop_timeout();
78 timeout_flag = &dummy_timeout_flag;
79 }
Paul Ollis65745772022-06-05 16:55:54 +010080}
81#endif
82
Bram Moolenaar9781d9c2022-09-20 13:51:25 +010083#if defined(FEAT_EVAL) || defined(PROTO)
84# ifdef FEAT_RELTIME
85static sig_atomic_t *saved_timeout_flag;
86# endif
87
88/*
89 * Used at the debug prompt: disable the timeout so that expression evaluation
90 * can used patterns.
91 * Must be followed by calling restore_timeout_for_debugging().
92 */
93 void
94save_timeout_for_debugging(void)
95{
96# ifdef FEAT_RELTIME
97 saved_timeout_flag = (sig_atomic_t *)timeout_flag;
98 timeout_flag = &dummy_timeout_flag;
99# endif
100}
101
102 void
103restore_timeout_for_debugging(void)
104{
105# ifdef FEAT_RELTIME
106 timeout_flag = saved_timeout_flag;
107# endif
108}
109#endif
110
Bram Moolenaar071d4272004-06-13 20:20:40 +0000111/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200112 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 * number; the start node begins in the second byte. It's used to catch the
114 * most severe mutilation of the program by the caller.
115 */
116
117#define REGMAGIC 0234
118
119/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 * Utility definitions.
121 */
122#define UCHARAT(p) ((int)*(char_u *)(p))
123
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100124// Used for an error (down from) vim_regcomp(): give the error message, set
125// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100126#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
127#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
128#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
129#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +0100130#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100131#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +0000132#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000133
Bram Moolenaar95f09602016-11-10 20:01:45 +0100134
Bram Moolenaar071d4272004-06-13 20:20:40 +0000135#define MAX_LIMIT (32767L << 16L)
136
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137#define NOT_MULTI 0
138#define MULTI_ONE 1
139#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200140
141// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100142#define RA_FAIL 1 // something failed, abort
143#define RA_CONT 2 // continue in inner loop
144#define RA_BREAK 3 // break inner loop
145#define RA_MATCH 4 // successful match
146#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200147
Bram Moolenaar071d4272004-06-13 20:20:40 +0000148/*
149 * Return NOT_MULTI if c is not a "multi" operator.
150 * Return MULTI_ONE if c is a single "multi" operator.
151 * Return MULTI_MULT if c is a multi "multi" operator.
152 */
153 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100154re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000155{
156 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
157 return MULTI_ONE;
158 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
159 return MULTI_MULT;
160 return NOT_MULTI;
161}
162
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000163static char_u *reg_prev_sub = NULL;
John Marriott82792db2024-05-12 00:07:17 +0200164static size_t reg_prev_sublen = 0;
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000165
Bram Moolenaar071d4272004-06-13 20:20:40 +0000166/*
167 * REGEXP_INRANGE contains all characters which are always special in a []
168 * range after '\'.
169 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
170 * These are:
171 * \n - New line (NL).
172 * \r - Carriage Return (CR).
173 * \t - Tab (TAB).
174 * \e - Escape (ESC).
175 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000176 * \d - Character code in decimal, eg \d123
177 * \o - Character code in octal, eg \o80
178 * \x - Character code in hex, eg \x4a
179 * \u - Multibyte character code, eg \u20ac
180 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000181 */
182static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000183static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000184
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185/*
186 * Translate '\x' to its control character, except "\n", which is Magic.
187 */
188 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100189backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000190{
191 switch (c)
192 {
193 case 'r': return CAR;
194 case 't': return TAB;
195 case 'e': return ESC;
196 case 'b': return BS;
197 }
198 return c;
199}
200
John Marriott82792db2024-05-12 00:07:17 +0200201enum
202{
203 CLASS_ALNUM = 0,
204 CLASS_ALPHA,
205 CLASS_BLANK,
206 CLASS_CNTRL,
207 CLASS_DIGIT,
208 CLASS_GRAPH,
209 CLASS_LOWER,
210 CLASS_PRINT,
211 CLASS_PUNCT,
212 CLASS_SPACE,
213 CLASS_UPPER,
214 CLASS_XDIGIT,
215 CLASS_TAB,
216 CLASS_RETURN,
217 CLASS_BACKSPACE,
218 CLASS_ESCAPE,
219 CLASS_IDENT,
220 CLASS_KEYWORD,
221 CLASS_FNAME,
222 CLASS_NONE = 99
223};
224
Bram Moolenaar071d4272004-06-13 20:20:40 +0000225/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000226 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000227 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
228 * recognized. Otherwise "pp" is advanced to after the item.
229 */
230 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100231get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000232{
John Marriott82792db2024-05-12 00:07:17 +0200233 // must be sorted by the 'value' field because it is used by bsearch()!
234 static keyvalue_T char_class_tab[] =
Bram Moolenaar071d4272004-06-13 20:20:40 +0000235 {
John Marriott82792db2024-05-12 00:07:17 +0200236 KEYVALUE_ENTRY(CLASS_ALNUM, "alnum:]"),
237 KEYVALUE_ENTRY(CLASS_ALPHA, "alpha:]"),
238 KEYVALUE_ENTRY(CLASS_BACKSPACE, "backspace:]"),
239 KEYVALUE_ENTRY(CLASS_BLANK, "blank:]"),
240 KEYVALUE_ENTRY(CLASS_CNTRL, "cntrl:]"),
241 KEYVALUE_ENTRY(CLASS_DIGIT, "digit:]"),
242 KEYVALUE_ENTRY(CLASS_ESCAPE, "escape:]"),
243 KEYVALUE_ENTRY(CLASS_FNAME, "fname:]"),
244 KEYVALUE_ENTRY(CLASS_GRAPH, "graph:]"),
245 KEYVALUE_ENTRY(CLASS_IDENT, "ident:]"),
246 KEYVALUE_ENTRY(CLASS_KEYWORD, "keyword:]"),
247 KEYVALUE_ENTRY(CLASS_LOWER, "lower:]"),
248 KEYVALUE_ENTRY(CLASS_PRINT, "print:]"),
249 KEYVALUE_ENTRY(CLASS_PUNCT, "punct:]"),
250 KEYVALUE_ENTRY(CLASS_RETURN, "return:]"),
251 KEYVALUE_ENTRY(CLASS_SPACE, "space:]"),
252 KEYVALUE_ENTRY(CLASS_TAB, "tab:]"),
253 KEYVALUE_ENTRY(CLASS_UPPER, "upper:]"),
254 KEYVALUE_ENTRY(CLASS_XDIGIT, "xdigit:]")
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 };
Bram Moolenaar071d4272004-06-13 20:20:40 +0000256
John Marriott82792db2024-05-12 00:07:17 +0200257 // check that the value of "pp" has a chance of matching
258 if ((*pp)[1] == ':' && ASCII_ISLOWER((*pp)[2])
259 && ASCII_ISLOWER((*pp)[3]) && ASCII_ISLOWER((*pp)[4]))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260 {
John Marriott82792db2024-05-12 00:07:17 +0200261 keyvalue_T target;
262 keyvalue_T *entry;
263 // this function can be called repeatedly with the same value for "pp"
264 // so we cache the last found entry.
265 static keyvalue_T *last_entry = NULL;
266
267 target.key = 0;
268 target.value = (char *)*pp + 2;
269 target.length = 0; // not used, see cmp_keyvalue_value_n()
270
271 if (last_entry != NULL && cmp_keyvalue_value_n(&target, last_entry) == 0)
272 entry = last_entry;
273 else
274 entry = (keyvalue_T *)bsearch(&target, &char_class_tab,
275 ARRAY_LENGTH(char_class_tab),
276 sizeof(char_class_tab[0]), cmp_keyvalue_value_n);
277 if (entry != NULL)
278 {
279 last_entry = entry;
280 *pp += entry->length + 2;
281 return entry->key;
282 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283 }
284 return CLASS_NONE;
285}
286
287/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000288 * Specific version of character class functions.
289 * Using a table to keep this fast.
290 */
291static short class_tab[256];
292
293#define RI_DIGIT 0x01
294#define RI_HEX 0x02
295#define RI_OCTAL 0x04
296#define RI_WORD 0x08
297#define RI_HEAD 0x10
298#define RI_ALPHA 0x20
299#define RI_LOWER 0x40
300#define RI_UPPER 0x80
301#define RI_WHITE 0x100
302
303 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100304init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000305{
306 int i;
307 static int done = FALSE;
308
309 if (done)
310 return;
311
312 for (i = 0; i < 256; ++i)
313 {
314 if (i >= '0' && i <= '7')
315 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
316 else if (i >= '8' && i <= '9')
317 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
318 else if (i >= 'a' && i <= 'f')
319 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000321 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
322 else if (i >= 'A' && i <= 'F')
323 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000324 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
326 else if (i == '_')
327 class_tab[i] = RI_WORD + RI_HEAD;
328 else
329 class_tab[i] = 0;
330 }
331 class_tab[' '] |= RI_WHITE;
332 class_tab['\t'] |= RI_WHITE;
333 done = TRUE;
334}
335
kylo252ae6f1d82022-02-16 19:24:07 +0000336#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
337#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
338#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
339#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
340#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
341#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
342#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
343#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
344#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100346// flags for regflags
347#define RF_ICASE 1 // ignore case
348#define RF_NOICASE 2 // don't ignore case
349#define RF_HASNL 4 // can match a NL
350#define RF_ICOMBINE 8 // ignore combining characters
351#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000352
353/*
354 * Global work variables for vim_regcomp().
355 */
356
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100357static char_u *regparse; // Input-scan pointer.
358static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100359static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000360#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100361static int regnzpar; // \z() count.
362static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000363#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100364static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000365#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100366static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000367#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000368
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100369static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100371static int reg_string; // matching with a string instead of a buffer
372 // line
373static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000374
375/*
376 * META contains all characters that may be magic, except '^' and '$'.
377 */
378
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100379// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380static char_u META_flags[] = {
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100383// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100385// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000386 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100387// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000388 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100389// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000390 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100391// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000392 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100393// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000394 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
395};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000396
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100397static int curchr; // currently parsed character
398// Previous character. Note: prevchr is sometimes -1 when we are not at the
399// start, eg in /[ ^I]^ the pattern was never found even if it existed,
400// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200401static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100402static int prevprevchr; // previous-previous character
403static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100405// arguments for reg()
406#define REG_NOPAREN 0 // toplevel reg()
407#define REG_PAREN 1 // \(\)
408#define REG_ZPAREN 2 // \z(\)
409#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200411typedef struct
412{
413 char_u *regparse;
414 int prevchr_len;
415 int curchr;
416 int prevchr;
417 int prevprevchr;
418 int nextchr;
419 int at_start;
420 int prev_at_start;
421 int regnpar;
422} parse_state_T;
423
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100424static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100425static int getchr(void);
426static void skipchr_keepstart(void);
427static int peekchr(void);
428static void skipchr(void);
429static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100430static long gethexchrs(int maxinputlen);
431static long getoctchrs(void);
432static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100433static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100434static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200435static int cstrncmp(char_u *s1, char_u *s2, int *n);
436static char_u *cstrchr(char_u *, int);
437static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100438static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100439#ifdef FEAT_EVAL
440static void report_re_switch(char_u *pat);
441#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200443static regengine_T bt_regengine;
444static regengine_T nfa_regengine;
445
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446/*
447 * Return TRUE if compiled regular expression "prog" can match a line break.
448 */
449 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100450re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451{
452 return (prog->regflags & RF_HASNL);
453}
454
455/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000456 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
457 * Returns a character representing the class. Zero means that no item was
458 * recognized. Otherwise "pp" is advanced to after the item.
459 */
460 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100461get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000462{
463 int c;
464 int l = 1;
465 char_u *p = *pp;
466
Bram Moolenaar985079c2019-02-16 17:07:47 +0100467 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000468 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000470 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000471 if (p[l + 2] == '=' && p[l + 3] == ']')
472 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000473 if (has_mbyte)
474 c = mb_ptr2char(p + 2);
475 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000476 c = p[2];
477 *pp += l + 4;
478 return c;
479 }
480 }
481 return 0;
482}
483
484/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000485 * Check for a collating element "[.a.]". "pp" points to the '['.
486 * Returns a character. Zero means that no item was recognized. Otherwise
487 * "pp" is advanced to after the item.
488 * Currently only single characters are recognized!
489 */
490 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100491get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000492{
493 int c;
494 int l = 1;
495 char_u *p = *pp;
496
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100497 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000498 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000499 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000500 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000501 if (p[l + 2] == '.' && p[l + 3] == ']')
502 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000503 if (has_mbyte)
504 c = mb_ptr2char(p + 2);
505 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000506 c = p[2];
507 *pp += l + 4;
508 return c;
509 }
510 }
511 return 0;
512}
513
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100514static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
515static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200516
517 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100518get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200519{
520 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
521 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
522}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000523
524/*
525 * Skip over a "[]" range.
526 * "p" must point to the character after the '['.
527 * The returned pointer is on the matching ']', or the terminating NUL.
528 */
529 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100530skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000531{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000532 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000533
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100534 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000535 ++p;
536 if (*p == ']' || *p == '-')
537 ++p;
538 while (*p != NUL && *p != ']')
539 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000540 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000541 p += l;
542 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000543 if (*p == '-')
544 {
545 ++p;
546 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100547 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000548 }
549 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200550 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000551 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200552 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000553 p += 2;
554 else if (*p == '[')
555 {
556 if (get_char_class(&p) == CLASS_NONE
557 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200558 && get_coll_element(&p) == 0
559 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100560 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000561 }
562 else
563 ++p;
564 }
565
566 return p;
567}
568
569/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000570 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200571 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000572 * Take care of characters with a backslash in front of it.
573 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000574 */
575 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100576skip_regexp(
577 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200578 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200579 int magic)
580{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100581 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200582}
583
584/*
585 * Call skip_regexp() and when the delimiter does not match give an error and
586 * return NULL.
587 */
588 char_u *
589skip_regexp_err(
590 char_u *startp,
591 int delim,
592 int magic)
593{
594 char_u *p = skip_regexp(startp, delim, magic);
595
596 if (*p != delim)
597 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000598 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200599 return NULL;
600 }
601 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200602}
603
604/*
605 * skip_regexp() with extra arguments:
606 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
607 * expression and change "\?" to "?". If "*newp" is not NULL the expression
608 * is changed in-place.
609 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100610 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200611 */
612 char_u *
613skip_regexp_ex(
614 char_u *startp,
615 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100616 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200617 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100618 int *dropped,
619 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000620{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100621 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000622 char_u *p = startp;
zeertzjq30741372024-05-24 07:37:36 +0200623 size_t startplen = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000624
625 if (magic)
626 mymagic = MAGIC_ON;
627 else
628 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200629 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100631 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000632 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100633 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000634 break;
635 if ((p[0] == '[' && mymagic >= MAGIC_ON)
636 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
637 {
638 p = skip_anyof(p + 1);
639 if (p[0] == NUL)
640 break;
641 }
642 else if (p[0] == '\\' && p[1] != NUL)
643 {
644 if (dirc == '?' && newp != NULL && p[1] == '?')
645 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100646 // change "\?" to "?", make a copy first.
zeertzjq30741372024-05-24 07:37:36 +0200647 if (startplen == 0)
648 startplen = STRLEN(startp);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000649 if (*newp == NULL)
650 {
John Marriott82792db2024-05-12 00:07:17 +0200651 *newp = vim_strnsave(startp, startplen);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000652 if (*newp != NULL)
zeertzjq30741372024-05-24 07:37:36 +0200653 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000654 p = *newp + (p - startp);
zeertzjq30741372024-05-24 07:37:36 +0200655 startp = *newp;
656 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000657 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200658 if (dropped != NULL)
659 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000660 if (*newp != NULL)
zeertzjq30741372024-05-24 07:37:36 +0200661 mch_memmove(p, p + 1, startplen - ((p + 1) - startp) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000662 else
663 ++p;
664 }
665 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100666 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667 if (*p == 'v')
668 mymagic = MAGIC_ALL;
669 else if (*p == 'V')
670 mymagic = MAGIC_NONE;
671 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000672 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100673 if (magic_val != NULL)
674 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000675 return p;
676}
677
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200678/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200679 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200680 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100681static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200682static int at_start; // True when on the first character
683static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100684
Bram Moolenaar071d4272004-06-13 20:20:40 +0000685/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200686 * Start parsing at "str".
687 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000688 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100689initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000690{
691 regparse = str;
692 prevchr_len = 0;
693 curchr = prevprevchr = prevchr = nextchr = -1;
694 at_start = TRUE;
695 prev_at_start = FALSE;
696}
697
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200698/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200699 * Save the current parse state, so that it can be restored and parsing
700 * starts in the same state again.
701 */
702 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100703save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200704{
705 ps->regparse = regparse;
706 ps->prevchr_len = prevchr_len;
707 ps->curchr = curchr;
708 ps->prevchr = prevchr;
709 ps->prevprevchr = prevprevchr;
710 ps->nextchr = nextchr;
711 ps->at_start = at_start;
712 ps->prev_at_start = prev_at_start;
713 ps->regnpar = regnpar;
714}
715
716/*
717 * Restore a previously saved parse state.
718 */
719 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100720restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200721{
722 regparse = ps->regparse;
723 prevchr_len = ps->prevchr_len;
724 curchr = ps->curchr;
725 prevchr = ps->prevchr;
726 prevprevchr = ps->prevprevchr;
727 nextchr = ps->nextchr;
728 at_start = ps->at_start;
729 prev_at_start = ps->prev_at_start;
730 regnpar = ps->regnpar;
731}
732
733
734/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200735 * Get the next character without advancing.
736 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000737 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100738peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000739{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000740 static int after_slash = FALSE;
741
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000742 if (curchr != -1)
743 return curchr;
744
745 switch (curchr = regparse[0])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000746 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000747 case '.':
748 case '[':
749 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100750 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000751 if (reg_magic >= MAGIC_ON)
752 curchr = Magic(curchr);
753 break;
754 case '(':
755 case ')':
756 case '{':
757 case '%':
758 case '+':
759 case '=':
760 case '?':
761 case '@':
762 case '!':
763 case '&':
764 case '|':
765 case '<':
766 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100767 case '#': // future ext.
768 case '"': // future ext.
769 case '\'': // future ext.
770 case ',': // future ext.
771 case '-': // future ext.
772 case ':': // future ext.
773 case ';': // future ext.
774 case '`': // future ext.
775 case '/': // Can't be used in / command
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000776 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777 if (reg_magic == MAGIC_ALL)
778 curchr = Magic(curchr);
779 break;
780 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100781 // * is not magic as the very first character, eg "?*ptr", when
782 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
783 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000784 if (reg_magic >= MAGIC_ON
785 && !at_start
786 && !(prev_at_start && prevchr == Magic('^'))
787 && (after_slash
788 || (prevchr != Magic('(')
789 && prevchr != Magic('&')
790 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000791 curchr = Magic('*');
792 break;
793 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100794 // '^' is only magic as the very first character and if it's after
795 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000796 if (reg_magic >= MAGIC_OFF
797 && (at_start
798 || reg_magic == MAGIC_ALL
799 || prevchr == Magic('(')
800 || prevchr == Magic('|')
801 || prevchr == Magic('&')
802 || prevchr == Magic('n')
803 || (no_Magic(prevchr) == '('
804 && prevprevchr == Magic('%'))))
805 {
806 curchr = Magic('^');
807 at_start = TRUE;
808 prev_at_start = FALSE;
809 }
810 break;
811 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100812 // '$' is only magic as the very last char and if it's in front of
813 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000814 if (reg_magic >= MAGIC_OFF)
815 {
816 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200817 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000818
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100819 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000820 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000821 || p[1] == 'm' || p[1] == 'M'
822 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200823 {
824 if (p[1] == 'v')
825 is_magic_all = TRUE;
826 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
827 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200829 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830 if (p[0] == NUL
831 || (p[0] == '\\'
832 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
833 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200834 || (is_magic_all
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000835 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000836 || reg_magic == MAGIC_ALL)
837 curchr = Magic('$');
838 }
839 break;
840 case '\\':
841 {
842 int c = regparse[1];
843
844 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100845 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000846 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000847 {
848 /*
849 * META contains everything that may be magic sometimes,
850 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200851 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000852 * magicness. Therefore, \ is so meta-magic that it is
853 * not in META.
854 */
855 curchr = -1;
856 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100857 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000858 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000859 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860 peekchr();
861 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000862 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000863 curchr = toggle_Magic(curchr);
864 }
865 else if (vim_strchr(REGEXP_ABBR, c))
866 {
867 /*
868 * Handle abbreviations, like "\t" for TAB -- webb
869 */
870 curchr = backslash_trans(c);
871 }
872 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
873 curchr = toggle_Magic(c);
874 else
875 {
876 /*
877 * Next character can never be (made) magic?
878 * Then backslashing it won't do anything.
879 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000880 if (has_mbyte)
881 curchr = (*mb_ptr2char)(regparse + 1);
882 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000883 curchr = c;
884 }
885 break;
886 }
887
Bram Moolenaar071d4272004-06-13 20:20:40 +0000888 default:
889 if (has_mbyte)
890 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000891 }
892
893 return curchr;
894}
895
896/*
897 * Eat one lexed character. Do this in a way that we can undo it.
898 */
899 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100900skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000901{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100902 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000903 if (*regparse == '\\')
904 prevchr_len = 1;
905 else
906 prevchr_len = 0;
907 if (regparse[prevchr_len] != NUL)
908 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000909 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100910 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000911 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000912 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000913 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000914 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000915 ++prevchr_len;
916 }
917 regparse += prevchr_len;
918 prev_at_start = at_start;
919 at_start = FALSE;
920 prevprevchr = prevchr;
921 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100922 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000923 nextchr = -1;
924}
925
926/*
927 * Skip a character while keeping the value of prev_at_start for at_start.
928 * prevchr and prevprevchr are also kept.
929 */
930 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100931skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000932{
933 int as = prev_at_start;
934 int pr = prevchr;
935 int prpr = prevprevchr;
936
937 skipchr();
938 at_start = as;
939 prevchr = pr;
940 prevprevchr = prpr;
941}
942
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200943/*
944 * Get the next character from the pattern. We know about magic and such, so
945 * therefore we need a lexical analyzer.
946 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000947 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100948getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000949{
950 int chr = peekchr();
951
952 skipchr();
953 return chr;
954}
955
956/*
957 * put character back. Works only once!
958 */
959 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100960ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000961{
962 nextchr = curchr;
963 curchr = prevchr;
964 prevchr = prevprevchr;
965 at_start = prev_at_start;
966 prev_at_start = FALSE;
967
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100968 // Backup regparse, so that it's at the same position as before the
969 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000970 regparse -= prevchr_len;
971}
972
973/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000974 * Get and return the value of the hex string at the current position.
975 * Return -1 if there is no valid hex number.
976 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000977 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000978 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000979 * The parameter controls the maximum number of input characters. This will be
980 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
981 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100982 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100983gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000984{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100985 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000986 int c;
987 int i;
988
989 for (i = 0; i < maxinputlen; ++i)
990 {
991 c = regparse[0];
992 if (!vim_isxdigit(c))
993 break;
994 nr <<= 4;
995 nr |= hex2nr(c);
996 ++regparse;
997 }
998
999 if (i == 0)
1000 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001001 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001002}
1003
1004/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001005 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001006 * current position. Return -1 for invalid. Consumes all digits.
1007 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001008 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01001009getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001010{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001011 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001012 int c;
1013 int i;
1014
1015 for (i = 0; ; ++i)
1016 {
1017 c = regparse[0];
1018 if (c < '0' || c > '9')
1019 break;
1020 nr *= 10;
1021 nr += c - '0';
1022 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001023 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001024 }
1025
1026 if (i == 0)
1027 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001028 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001029}
1030
1031/*
1032 * get and return the value of the octal string immediately after the current
1033 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
1034 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
1035 * treat 8 or 9 as recognised characters. Position is updated:
1036 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00001037 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001038 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001039 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01001040getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001041{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001042 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001043 int c;
1044 int i;
1045
1046 for (i = 0; i < 3 && nr < 040; ++i)
1047 {
1048 c = regparse[0];
1049 if (c < '0' || c > '7')
1050 break;
1051 nr <<= 3;
1052 nr |= hex2nr(c);
1053 ++regparse;
1054 }
1055
1056 if (i == 0)
1057 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001058 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001059}
1060
1061/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001062 * read_limits - Read two integers to be taken as a minimum and maximum.
1063 * If the first character is '-', then the range is reversed.
1064 * Should end with 'end'. If minval is missing, zero is default, if maxval is
1065 * missing, a very big number is the default.
1066 */
1067 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001068read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001069{
1070 int reverse = FALSE;
1071 char_u *first_char;
1072 long tmp;
1073
1074 if (*regparse == '-')
1075 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001076 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001077 regparse++;
1078 reverse = TRUE;
1079 }
1080 first_char = regparse;
1081 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001082 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001083 {
1084 if (vim_isdigit(*++regparse))
1085 *maxval = getdigits(&regparse);
1086 else
1087 *maxval = MAX_LIMIT;
1088 }
1089 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001090 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001091 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001092 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001093 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001094 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001095 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001096 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001097 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001098
1099 /*
1100 * Reverse the range if there was a '-', or make sure it is in the right
1101 * order otherwise.
1102 */
1103 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1104 {
1105 tmp = *minval;
1106 *minval = *maxval;
1107 *maxval = tmp;
1108 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001109 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001110 return OK;
1111}
1112
1113/*
1114 * vim_regexec and friends
1115 */
1116
1117/*
1118 * Global work variables for vim_regexec().
1119 */
1120
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001121static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001122#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001123static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001124#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001125static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001126
1127/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001128 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1129 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001130 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001131 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001132static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001133static unsigned reg_tofreelen;
1134
1135/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001136 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001137 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001138 * done:
1139 * single-line multi-line
1140 * reg_match &regmatch_T NULL
1141 * reg_mmatch NULL &regmmatch_T
1142 * reg_startp reg_match->startp <invalid>
1143 * reg_endp reg_match->endp <invalid>
1144 * reg_startpos <invalid> reg_mmatch->startpos
1145 * reg_endpos <invalid> reg_mmatch->endpos
1146 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001147 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001148 * reg_firstlnum <invalid> first line in which to search
1149 * reg_maxline 0 last line nr
1150 * reg_line_lbr FALSE or TRUE FALSE
1151 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001152typedef struct {
1153 regmatch_T *reg_match;
1154 regmmatch_T *reg_mmatch;
Bram Moolenaar01105b32022-11-26 11:47:10 +00001155
Bram Moolenaar6100d022016-10-02 16:51:57 +02001156 char_u **reg_startp;
1157 char_u **reg_endp;
1158 lpos_T *reg_startpos;
1159 lpos_T *reg_endpos;
Bram Moolenaar01105b32022-11-26 11:47:10 +00001160
Bram Moolenaar6100d022016-10-02 16:51:57 +02001161 win_T *reg_win;
1162 buf_T *reg_buf;
1163 linenr_T reg_firstlnum;
1164 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001165 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001166
Bram Moolenaar0270f382018-07-17 05:43:58 +02001167 // The current match-position is stord in these variables:
1168 linenr_T lnum; // line number, relative to first line
1169 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001170 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001171
1172 int need_clear_subexpr; // subexpressions still need to be cleared
1173#ifdef FEAT_SYN_HL
1174 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1175 // cleared
1176#endif
1177
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001178 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1179 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1180 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001181 int reg_ic;
1182
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001183 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1184 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001185 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001186
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001187 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1188 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001189 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001190
1191 // State for the NFA engine regexec.
1192 int nfa_has_zend; // NFA regexp \ze operator encountered.
1193 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1194 int nfa_nsubexpr; // Number of sub expressions actually being used
1195 // during execution. 1 if only the whole match
1196 // (subexpr 0) is used.
1197 // listid is global, so that it increases on recursive calls to
1198 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1199 // all the states.
1200 int nfa_listid;
1201 int nfa_alt_listid;
1202
1203#ifdef FEAT_SYN_HL
1204 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1205#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001206} regexec_T;
1207
1208static regexec_T rex;
1209static int rex_in_use = FALSE;
1210
Bram Moolenaar071d4272004-06-13 20:20:40 +00001211/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001212 * Return TRUE if character 'c' is included in 'iskeyword' option for
1213 * "reg_buf" buffer.
1214 */
1215 static int
1216reg_iswordc(int c)
1217{
1218 return vim_iswordc_buf(c, rex.reg_buf);
1219}
1220
John Marriott82792db2024-05-12 00:07:17 +02001221#ifdef FEAT_EVAL
1222static int can_f_submatch = FALSE; // TRUE when submatch() can be used
1223
1224// This struct is used for reg_submatch(). Needed for when the
1225// substitution string is an expression that contains a call to substitute()
1226// and submatch().
1227typedef struct {
1228 regmatch_T *sm_match;
1229 regmmatch_T *sm_mmatch;
1230 linenr_T sm_firstlnum;
1231 linenr_T sm_maxline;
1232 int sm_line_lbr;
1233} regsubmatch_T;
1234
1235static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
1236#endif
1237
1238typedef enum
1239{
1240 RGLF_LINE = 0x01,
1241 RGLF_LENGTH = 0x02
1242#ifdef FEAT_EVAL
1243 ,
1244 RGLF_SUBMATCH = 0x04
1245#endif
1246} reg_getline_flags_T;
1247
1248//
1249// common code for reg_getline(), reg_getline_len(), reg_getline_submatch() and
1250// reg_getline_submatch_len().
1251// the flags argument (which is a bitmask) controls what info is to be returned and whether
1252// or not submatch is in effect.
1253// note:
1254// submatch is available only if FEAT_EVAL is defined.
1255 static void
1256reg_getline_common(linenr_T lnum, reg_getline_flags_T flags, char_u **line, colnr_T *length)
1257{
1258 int get_line = flags & RGLF_LINE;
1259 int get_length = flags & RGLF_LENGTH;
1260 linenr_T firstlnum;
1261 linenr_T maxline;
1262
1263#ifdef FEAT_EVAL
1264 if (flags & RGLF_SUBMATCH)
1265 {
1266 firstlnum = rsm.sm_firstlnum + lnum;
1267 maxline = rsm.sm_maxline;
1268 }
1269 else
1270#endif
1271 {
1272 firstlnum = rex.reg_firstlnum + lnum;
1273 maxline = rex.reg_maxline;
1274 }
1275
1276 // when looking behind for a match/no-match lnum is negative. but we
1277 // can't go before line 1.
1278 if (firstlnum < 1)
1279 {
1280 if (get_line)
1281 *line = NULL;
1282 if (get_length)
1283 *length = 0;
1284
1285 return;
1286 }
1287
1288 if (lnum > maxline)
1289 {
1290 // must have matched the "\n" in the last line.
1291 if (get_line)
1292 *line = (char_u *)"";
1293 if (get_length)
1294 *length = 0;
1295
1296 return;
1297 }
1298
1299 if (get_line)
1300 *line = ml_get_buf(rex.reg_buf, firstlnum, FALSE);
1301 if (get_length)
1302 *length = ml_get_buf_len(rex.reg_buf, firstlnum);
1303}
1304
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001305/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001306 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1307 */
1308 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001309reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310{
John Marriott82792db2024-05-12 00:07:17 +02001311 char_u *line;
1312
1313 reg_getline_common(lnum, RGLF_LINE, &line, NULL);
1314
1315 return line;
1316}
1317
1318/*
1319 * Get length of line "lnum", which is relative to "reg_firstlnum".
1320 */
1321 static colnr_T
1322reg_getline_len(linenr_T lnum)
1323{
1324 colnr_T length;
1325
1326 reg_getline_common(lnum, RGLF_LENGTH, NULL, &length);
1327
1328 return length;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001329}
1330
Bram Moolenaar071d4272004-06-13 20:20:40 +00001331#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001332static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1333static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1334static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1335static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001336#endif
1337
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001338// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001339#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001340
Bram Moolenaar071d4272004-06-13 20:20:40 +00001341#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001342/*
1343 * Create a new extmatch and mark it as referenced once.
1344 */
1345 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001346make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001347{
1348 reg_extmatch_T *em;
1349
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001350 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001351 if (em != NULL)
1352 em->refcnt = 1;
1353 return em;
1354}
1355
1356/*
1357 * Add a reference to an extmatch.
1358 */
1359 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001360ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001361{
1362 if (em != NULL)
1363 em->refcnt++;
1364 return em;
1365}
1366
1367/*
1368 * Remove a reference to an extmatch. If there are no references left, free
1369 * the info.
1370 */
1371 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001372unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001373{
1374 int i;
1375
1376 if (em != NULL && --em->refcnt <= 0)
1377 {
1378 for (i = 0; i < NSUBEXP; ++i)
1379 vim_free(em->matches[i]);
1380 vim_free(em);
1381 }
1382}
1383#endif
1384
1385/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386 * Get class of previous character.
1387 */
1388 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001389reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001391 if (rex.input > rex.line)
1392 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001393 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001394 return -1;
1395}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001396
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001397/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001398 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001399 */
1400 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001401reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001402{
1403 pos_T top, bot;
1404 linenr_T lnum;
1405 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001406 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001407 int mode;
1408 colnr_T start, end;
1409 colnr_T start2, end2;
1410 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001411 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001412
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001413 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001414 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001415 return FALSE;
1416
1417 if (VIsual_active)
1418 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001419 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001420 {
1421 top = VIsual;
1422 bot = wp->w_cursor;
1423 }
1424 else
1425 {
1426 top = wp->w_cursor;
1427 bot = VIsual;
1428 }
1429 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001430 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001431 }
1432 else
1433 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001434 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001435 {
1436 top = curbuf->b_visual.vi_start;
1437 bot = curbuf->b_visual.vi_end;
1438 }
1439 else
1440 {
1441 top = curbuf->b_visual.vi_end;
1442 bot = curbuf->b_visual.vi_start;
1443 }
zeertzjqe7102202024-02-13 20:32:04 +01001444 // a substitute command may have removed some lines
Christian Brabandt7c71db32024-01-22 20:12:34 +01001445 if (bot.lnum > curbuf->b_ml.ml_line_count)
1446 bot.lnum = curbuf->b_ml.ml_line_count;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001447 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001448 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001449 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001450 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001451 if (lnum < top.lnum || lnum > bot.lnum)
1452 return FALSE;
1453
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001454 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001455 if (mode == 'v')
1456 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001457 if ((lnum == top.lnum && col < top.col)
1458 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1459 return FALSE;
1460 }
1461 else if (mode == Ctrl_V)
1462 {
1463 getvvcol(wp, &top, &start, NULL, &end);
1464 getvvcol(wp, &bot, &start2, NULL, &end2);
1465 if (start2 < start)
1466 start = start2;
1467 if (end2 > end)
1468 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001469 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001470 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001471
1472 // getvvcol() flushes rex.line, need to get it again
1473 rex.line = reg_getline(rex.lnum);
1474 rex.input = rex.line + col;
1475
Bram Moolenaar7f9969c2022-07-25 18:13:54 +01001476 cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001477 if (cols < start || cols > end - (*p_sel == 'e'))
1478 return FALSE;
1479 }
1480 return TRUE;
1481}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001482
Bram Moolenaar071d4272004-06-13 20:20:40 +00001483/*
1484 * Check the regexp program for its magic number.
1485 * Return TRUE if it's wrong.
1486 */
1487 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001488prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001489{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001490 regprog_T *prog;
1491
Bram Moolenaar6100d022016-10-02 16:51:57 +02001492 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001493 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001494 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001495 return FALSE;
1496
1497 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001498 {
RestorerZ68ebcee2023-05-31 17:12:14 +01001499 iemsg(e_corrupted_regexp_program);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001500 return TRUE;
1501 }
1502 return FALSE;
1503}
1504
1505/*
1506 * Cleanup the subexpressions, if this wasn't done yet.
1507 * This construction is used to clear the subexpressions only when they are
1508 * used (to increase speed).
1509 */
1510 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001511cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001512{
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001513 if (!rex.need_clear_subexpr)
1514 return;
1515
1516 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001517 {
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001518 // Use 0xff to set lnum to -1
1519 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1520 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001521 }
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001522 else
1523 {
1524 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1525 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
1526 }
1527 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001528}
1529
1530#ifdef FEAT_SYN_HL
1531 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001532cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001533{
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001534 if (!rex.need_clear_zsubexpr)
1535 return;
1536
1537 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001538 {
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001539 // Use 0xff to set lnum to -1
1540 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1541 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001542 }
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001543 else
1544 {
1545 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1546 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1547 }
1548 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001549}
1550#endif
1551
1552/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001553 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001554 */
1555 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001556reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001557{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001558 rex.line = reg_getline(++rex.lnum);
1559 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001560 fast_breakcheck();
1561}
1562
1563/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001564 * Check whether a backreference matches.
1565 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001566 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1567 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001568 */
1569 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001570match_with_backref(
1571 linenr_T start_lnum,
1572 colnr_T start_col,
1573 linenr_T end_lnum,
1574 colnr_T end_col,
1575 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001576{
1577 linenr_T clnum = start_lnum;
1578 colnr_T ccol = start_col;
1579 int len;
1580 char_u *p;
1581
1582 if (bytelen != NULL)
1583 *bytelen = 0;
1584 for (;;)
1585 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001586 // Since getting one line may invalidate the other, need to make copy.
1587 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001588 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001589 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001590 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001591 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1592 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001593 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001594 vim_free(reg_tofree);
1595 reg_tofree = alloc(len);
1596 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001597 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001598 reg_tofreelen = len;
1599 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001600 STRCPY(reg_tofree, rex.line);
1601 rex.input = reg_tofree + (rex.input - rex.line);
1602 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001603 }
1604
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001605 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001606 p = reg_getline(clnum);
1607 if (clnum == end_lnum)
1608 len = end_col - ccol;
1609 else
John Marriott82792db2024-05-12 00:07:17 +02001610 len = (int)reg_getline_len(clnum) - ccol;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001611
Bram Moolenaar0270f382018-07-17 05:43:58 +02001612 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001613 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001614 if (bytelen != NULL)
1615 *bytelen += len;
1616 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001617 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001618 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001619 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001620
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001621 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001622 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001623 if (bytelen != NULL)
1624 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001625 ++clnum;
1626 ccol = 0;
1627 if (got_int)
1628 return RA_FAIL;
1629 }
1630
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001631 // found a match! Note that rex.line may now point to a copy of the line,
1632 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001633 return RA_MATCH;
1634}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001635
Bram Moolenaarfb031402014-09-09 17:18:49 +02001636/*
1637 * Used in a place where no * or \+ can follow.
1638 */
1639 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001640re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001641{
1642 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001643 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001644 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001645 rc_did_emsg = TRUE;
1646 return FAIL;
1647 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001648 return OK;
1649}
1650
Bram Moolenaar071d4272004-06-13 20:20:40 +00001651typedef struct
1652{
1653 int a, b, c;
1654} decomp_T;
1655
1656
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001657// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001658static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001659{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001660 {0x5e2,0,0}, // 0xfb20 alt ayin
1661 {0x5d0,0,0}, // 0xfb21 alt alef
1662 {0x5d3,0,0}, // 0xfb22 alt dalet
1663 {0x5d4,0,0}, // 0xfb23 alt he
1664 {0x5db,0,0}, // 0xfb24 alt kaf
1665 {0x5dc,0,0}, // 0xfb25 alt lamed
1666 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1667 {0x5e8,0,0}, // 0xfb27 alt resh
1668 {0x5ea,0,0}, // 0xfb28 alt tav
1669 {'+', 0, 0}, // 0xfb29 alt plus
1670 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1671 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1672 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1673 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1674 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1675 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1676 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1677 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1678 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1679 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1680 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1681 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1682 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1683 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1684 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1685 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1686 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1687 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1688 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1689 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1690 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1691 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1692 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1693 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1694 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1695 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1696 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1697 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1698 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1699 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1700 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1701 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1702 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1703 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1704 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1705 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1706 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1707 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001708};
1709
1710 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001711mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001712{
1713 decomp_T d;
1714
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001715 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001716 {
1717 d = decomp_table[c - 0xfb20];
1718 *c1 = d.a;
1719 *c2 = d.b;
1720 *c3 = d.c;
1721 }
1722 else
1723 {
1724 *c1 = c;
1725 *c2 = *c3 = 0;
1726 }
1727}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001728
1729/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001730 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001731 * Return 0 if strings match, non-zero otherwise.
Christian Brabandtc97f4d62024-04-10 16:18:15 +02001732 * Correct the length "*n" when composing characters are ignored.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001733 */
1734 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001735cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001736{
1737 int result;
1738
Bram Moolenaar6100d022016-10-02 16:51:57 +02001739 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001740 result = STRNCMP(s1, s2, *n);
1741 else
1742 result = MB_STRNICMP(s1, s2, *n);
1743
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001744 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001745 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 {
1747 char_u *str1, *str2;
1748 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001749 int junk;
1750
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001751 // we have to handle the strcmp ourselves, since it is necessary to
1752 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001753 str1 = s1;
1754 str2 = s2;
1755 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001756 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001757 {
1758 c1 = mb_ptr2char_adv(&str1);
1759 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001760
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001761 // Decompose the character if necessary, into 'base' characters.
1762 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001763 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001764 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001765 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001766 mb_decompose(c1, &c11, &junk, &junk);
1767 mb_decompose(c2, &c12, &junk, &junk);
1768 c1 = c11;
1769 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001770 if (c11 != c12
1771 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001772 break;
1773 }
1774 }
1775 result = c2 - c1;
1776 if (result == 0)
1777 *n = (int)(str2 - s2);
1778 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001779
1780 return result;
1781}
1782
1783/*
1784 * cstrchr: This function is used a lot for simple searches, keep it fast!
1785 */
1786 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001787cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001788{
1789 char_u *p;
1790 int cc;
1791
Bram Moolenaara12a1612019-01-24 16:39:02 +01001792 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001793 return vim_strchr(s, c);
1794
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001795 // tolower() and toupper() can be slow, comparing twice should be a lot
1796 // faster (esp. when using MS Visual C++!).
1797 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001798 if (enc_utf8 && c > 0x80)
1799 cc = utf_fold(c);
1800 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001801 if (MB_ISUPPER(c))
1802 cc = MB_TOLOWER(c);
1803 else if (MB_ISLOWER(c))
1804 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001805 else
1806 return vim_strchr(s, c);
1807
Bram Moolenaar071d4272004-06-13 20:20:40 +00001808 if (has_mbyte)
1809 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001810 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811 {
1812 if (enc_utf8 && c > 0x80)
1813 {
Bram Moolenaarf5094052022-07-29 16:22:25 +01001814 int uc = utf_ptr2char(p);
1815
1816 // Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
1817 // not 0xff.
1818 if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001819 return p;
1820 }
1821 else if (*p == c || *p == cc)
1822 return p;
1823 }
1824 }
1825 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001826 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001827 for (p = s; *p != NUL; ++p)
1828 if (*p == c || *p == cc)
1829 return p;
1830
1831 return NULL;
1832}
1833
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001834////////////////////////////////////////////////////////////////
1835// regsub stuff //
1836////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001837
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001838typedef void (*fptr_T)(int *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001839
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001840static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001842 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001843do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001844{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001845 *d = MB_TOUPPER(c);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001846}
1847
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001848 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001849do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001850{
1851 *d = MB_TOLOWER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001852}
1853
1854/*
1855 * regtilde(): Replace tildes in the pattern by the old pattern.
1856 *
1857 * Short explanation of the tilde: It stands for the previous replacement
1858 * pattern. If that previous pattern also contains a ~ we should go back a
1859 * step further... But we insert the previous pattern into the current one
1860 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001861 * This still does not handle the case where "magic" changes. So require the
1862 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001863 *
1864 * The tildes are parsed once before the first call to vim_regsub().
1865 */
1866 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001867regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001868{
1869 char_u *newsub = source;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001870 char_u *p;
John Marriott82792db2024-05-12 00:07:17 +02001871 size_t newsublen = 0;
1872 char_u tilde[3] = {'~', NUL, NUL};
1873 size_t tildelen = 1;
1874 int error = FALSE;
1875
1876 if (!magic)
1877 {
1878 tilde[0] = '\\';
1879 tilde[1] = '~';
1880 tilde[2] = NUL;
1881 tildelen = 2;
1882 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001883
1884 for (p = newsub; *p; ++p)
1885 {
John Marriott82792db2024-05-12 00:07:17 +02001886 if (STRNCMP(p, tilde, tildelen) == 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001887 {
John Marriott82792db2024-05-12 00:07:17 +02001888 size_t prefixlen = p - newsub; // not including the tilde
1889 char_u *postfix = p + tildelen;
1890 size_t postfixlen;
1891 size_t tmpsublen;
1892
1893 if (newsublen == 0)
1894 newsublen = STRLEN(newsub);
1895 newsublen -= tildelen;
1896 postfixlen = newsublen - prefixlen;
1897 tmpsublen = prefixlen + reg_prev_sublen + postfixlen;
1898
1899 if (tmpsublen > 0 && reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001900 {
John Marriott82792db2024-05-12 00:07:17 +02001901 char_u *tmpsub;
1902
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001903 // Avoid making the text longer than MAXCOL, it will cause
1904 // trouble at some point.
John Marriott82792db2024-05-12 00:07:17 +02001905 if (tmpsublen > MAXCOL)
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001906 {
1907 emsg(_(e_resulting_text_too_long));
John Marriott82792db2024-05-12 00:07:17 +02001908 error = TRUE;
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001909 break;
1910 }
1911
John Marriott82792db2024-05-12 00:07:17 +02001912 tmpsub = alloc(tmpsublen + 1);
1913 if (tmpsub == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001914 {
John Marriott82792db2024-05-12 00:07:17 +02001915 emsg(_(e_out_of_memory));
1916 error = TRUE;
1917 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001918 }
John Marriott82792db2024-05-12 00:07:17 +02001919
1920 // copy prefix
1921 mch_memmove(tmpsub, newsub, prefixlen);
1922 // interpret tilde
1923 mch_memmove(tmpsub + prefixlen, reg_prev_sub, reg_prev_sublen);
1924 // copy postfix
1925 STRCPY(tmpsub + prefixlen + reg_prev_sublen, postfix);
1926
1927 if (newsub != source) // allocated newsub before
1928 vim_free(newsub);
1929 newsub = tmpsub;
1930 newsublen = tmpsublen;
1931 p = newsub + prefixlen + reg_prev_sublen;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001932 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001933 else
John Marriott82792db2024-05-12 00:07:17 +02001934 mch_memmove(p, postfix, postfixlen + 1); // remove the tilde (+1 for the NUL)
1935
Bram Moolenaar071d4272004-06-13 20:20:40 +00001936 --p;
1937 }
1938 else
1939 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001940 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001941 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001942 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001943 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001944 }
1945 }
1946
John Marriott82792db2024-05-12 00:07:17 +02001947 if (error)
1948 {
1949 if (newsub != source)
1950 vim_free(newsub);
1951 return source;
1952 }
1953
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001954 // Store a copy of newsub in reg_prev_sub. It is always allocated,
1955 // because recursive calls may make the returned string invalid.
John Marriott82792db2024-05-12 00:07:17 +02001956 // Only store it if there something to store.
1957 newsublen = p - newsub;
1958 if (newsublen == 0)
1959 VIM_CLEAR(reg_prev_sub);
1960 else
1961 {
1962 vim_free(reg_prev_sub);
1963 reg_prev_sub = vim_strnsave(newsub, newsublen);
1964 }
1965
1966 if (reg_prev_sub == NULL)
1967 reg_prev_sublen = 0;
1968 else
1969 reg_prev_sublen = newsublen;
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001970
Bram Moolenaar071d4272004-06-13 20:20:40 +00001971 return newsub;
1972}
1973
1974#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001975
1976/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001977 * Put the submatches in "argv[argskip]" which is a list passed into
1978 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001979 */
1980 static int
zeertzjq48db5da2022-09-16 12:10:03 +01001981fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001982{
1983 listitem_T *li;
1984 int i;
1985 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001986 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001987
zeertzjqabd58d82022-09-16 16:06:32 +01001988 if (!has_varargs(fp) && fp->uf_args.ga_len <= argskip)
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001989 // called function doesn't take a submatches argument
1990 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001991
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001992 // Relies on sl_list to be the first item in staticList10_T.
1993 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001994
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001995 // There are always 10 list items in staticList10_T.
1996 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001997 for (i = 0; i < 10; ++i)
1998 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001999 s = rsm.sm_match->startp[i];
2000 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002001 s = NULL;
2002 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002003 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002004 li->li_tv.v_type = VAR_STRING;
2005 li->li_tv.vval.v_string = s;
2006 li = li->li_next;
2007 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01002008 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002009}
2010
2011 static void
2012clear_submatch_list(staticList10_T *sl)
2013{
2014 int i;
2015
2016 for (i = 0; i < 10; ++i)
2017 vim_free(sl->sl_items[i].li_tv.vval.v_string);
2018}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02002019#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002020
Bram Moolenaar071d4272004-06-13 20:20:40 +00002021/*
2022 * vim_regsub() - perform substitutions after a vim_regexec() or
2023 * vim_regexec_multi() match.
2024 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002025 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
dundargocc57b5bc2022-11-02 13:30:51 +00002026 * Otherwise nothing is copied, only compute the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002027 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002028 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
2029 *
2030 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
2031 * double them to keep them, and insert a backslash before a CR to avoid it
2032 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002033 *
2034 * Note: The matched text must not change between the call of
2035 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
2036 * references invalid!
2037 *
2038 * Returns the size of the replacement, including terminating NUL.
2039 */
2040 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002041vim_regsub(
2042 regmatch_T *rmp,
2043 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002044 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01002045 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002046 int destlen,
2047 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002048{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002049 int result;
2050 regexec_T rex_save;
2051 int rex_in_use_save = rex_in_use;
2052
2053 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002054 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002055 rex_save = rex;
2056 rex_in_use = TRUE;
2057
2058 rex.reg_match = rmp;
2059 rex.reg_mmatch = NULL;
2060 rex.reg_maxline = 0;
2061 rex.reg_buf = curbuf;
2062 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002063 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002064
2065 rex_in_use = rex_in_use_save;
2066 if (rex_in_use)
2067 rex = rex_save;
2068
2069 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002070}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002071
2072 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002073vim_regsub_multi(
2074 regmmatch_T *rmp,
2075 linenr_T lnum,
2076 char_u *source,
2077 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002078 int destlen,
2079 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002080{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002081 int result;
2082 regexec_T rex_save;
2083 int rex_in_use_save = rex_in_use;
2084
2085 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002086 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002087 rex_save = rex;
2088 rex_in_use = TRUE;
2089
2090 rex.reg_match = NULL;
2091 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002092 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02002093 rex.reg_firstlnum = lnum;
2094 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
2095 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002096 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002097
2098 rex_in_use = rex_in_use_save;
2099 if (rex_in_use)
2100 rex = rex_save;
2101
2102 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002103}
2104
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002105#if defined(FEAT_EVAL) || defined(PROTO)
2106// When nesting more than a couple levels it's probably a mistake.
2107# define MAX_REGSUB_NESTING 4
2108static char_u *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
2109
2110# if defined(EXITFREE) || defined(PROTO)
2111 void
2112free_resub_eval_result(void)
2113{
2114 int i;
2115
2116 for (i = 0; i < MAX_REGSUB_NESTING; ++i)
2117 VIM_CLEAR(eval_result[i]);
2118}
2119# endif
2120#endif
2121
Bram Moolenaar071d4272004-06-13 20:20:40 +00002122 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002123vim_regsub_both(
2124 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002125 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01002126 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002127 int destlen,
2128 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002129{
2130 char_u *src;
2131 char_u *dst;
2132 char_u *s;
2133 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002134 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002135 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002136 fptr_T func_all = (fptr_T)NULL;
2137 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002138 linenr_T clnum = 0; // init for GCC
2139 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00002140#ifdef FEAT_EVAL
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002141 static int nesting = 0;
2142 int nested;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002143#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002144 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002145
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002146 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002147 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002148 {
RestorerZ68ebcee2023-05-31 17:12:14 +01002149 iemsg(e_null_argument);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002150 return 0;
2151 }
2152 if (prog_magic_wrong())
2153 return 0;
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002154#ifdef FEAT_EVAL
2155 if (nesting == MAX_REGSUB_NESTING)
2156 {
2157 emsg(_(e_substitute_nesting_too_deep));
2158 return 0;
2159 }
2160 nested = nesting;
2161#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002162 src = source;
2163 dst = dest;
2164
2165 /*
2166 * When the substitute part starts with "\=" evaluate it as an expression.
2167 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002168 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002169 {
2170#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002171 // To make sure that the length doesn't change between checking the
2172 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01002173 // resulting string is saved from the call with
2174 // "flags & REGSUB_COPY" == 0 to the call with
2175 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002176 if (copy)
2177 {
John Marriott82792db2024-05-12 00:07:17 +02002178 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002179 {
John Marriott82792db2024-05-12 00:07:17 +02002180 int eval_len = (int)STRLEN(eval_result[nested]);
2181
2182 if (eval_len < destlen)
2183 {
2184 STRCPY(dest, eval_result[nested]);
2185 dst += eval_len;
2186 VIM_CLEAR(eval_result[nested]);
2187 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002188 }
2189 }
2190 else
2191 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002192 int prev_can_f_submatch = can_f_submatch;
2193 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002194
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002195 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002196
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002197 // The expression may contain substitute(), which calls us
2198 // recursively. Make sure submatch() gets the text from the first
2199 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002200 if (can_f_submatch)
2201 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002202 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002203 rsm.sm_match = rex.reg_match;
2204 rsm.sm_mmatch = rex.reg_mmatch;
2205 rsm.sm_firstlnum = rex.reg_firstlnum;
2206 rsm.sm_maxline = rex.reg_maxline;
2207 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002208
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002209 // Although unlikely, it is possible that the expression invokes a
2210 // substitute command (it might fail, but still). Therefore keep
Bram Moolenaarabd56da2022-06-23 20:46:27 +01002211 // an array of eval results.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002212 ++nesting;
2213
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002214 if (expr != NULL)
2215 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002216 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002217 char_u buf[NUMBUFLEN];
2218 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002219 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002220 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002221
2222 rettv.v_type = VAR_STRING;
2223 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002224 argv[0].v_type = VAR_LIST;
2225 argv[0].vval.v_list = &matchList.sl_list;
2226 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002227 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002228 funcexe.fe_argv_func = fill_submatch_list;
2229 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002230 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002231 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002232 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002233 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002234 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002235 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002236 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002237 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002238
Bram Moolenaar6100d022016-10-02 16:51:57 +02002239 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002240 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002241 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002242 }
LemonBoyf3b48952022-05-05 13:53:03 +01002243 else if (expr->v_type == VAR_INSTR)
2244 {
2245 exe_typval_instr(expr, &rettv);
2246 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002247 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002248 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002249 clear_submatch_list(&matchList);
2250
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002251 if (rettv.v_type == VAR_UNKNOWN)
2252 // something failed, no need to report another error
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002253 eval_result[nested] = NULL;
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002254 else
2255 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002256 eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
2257 if (eval_result[nested] != NULL)
2258 eval_result[nested] = vim_strsave(eval_result[nested]);
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002259 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002260 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002261 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002262 else if (substitute_instr != NULL)
2263 // Execute instructions from ISN_SUBSTITUTE.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002264 eval_result[nested] = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002265 else
Bram Moolenaara4e0b972022-10-01 19:43:52 +01002266 eval_result[nested] = eval_to_string(source + 2, TRUE, FALSE);
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002267 --nesting;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002268
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002269 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002270 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002271 int had_backslash = FALSE;
2272
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002273 for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002274 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002275 // Change NL to CR, so that it becomes a line break,
2276 // unless called from vim_regexec_nl().
2277 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002278 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002279 *s = CAR;
2280 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002281 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002282 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002283 /* Change NL to CR here too, so that this works:
2284 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2285 * abc\
2286 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002287 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002288 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002289 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002290 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002291 had_backslash = TRUE;
2292 }
2293 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002294 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002295 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002296 // Backslashes will be consumed, need to double them.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002297 s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
Bram Moolenaar06975a42010-03-23 16:27:22 +01002298 if (s != NULL)
2299 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002300 vim_free(eval_result[nested]);
2301 eval_result[nested] = s;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002302 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002303 }
2304
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002305 dst += STRLEN(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002306 }
2307
Bram Moolenaar6100d022016-10-02 16:51:57 +02002308 can_f_submatch = prev_can_f_submatch;
2309 if (can_f_submatch)
2310 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002311 }
2312#endif
2313 }
2314 else
2315 while ((c = *src++) != NUL)
2316 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002317 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002318 no = 0;
2319 else if (c == '\\' && *src != NUL)
2320 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002321 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002322 {
2323 ++src;
2324 no = 0;
2325 }
2326 else if ('0' <= *src && *src <= '9')
2327 {
2328 no = *src++ - '0';
2329 }
2330 else if (vim_strchr((char_u *)"uUlLeE", *src))
2331 {
2332 switch (*src++)
2333 {
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002334 case 'u': func_one = do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002335 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002336 case 'U': func_all = do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002337 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002338 case 'l': func_one = do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002339 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002340 case 'L': func_all = do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002341 continue;
2342 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002343 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002344 continue;
2345 }
2346 }
2347 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002348 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002349 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002350 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2351 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002352 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002353 if (copy)
2354 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002355 if (dst + 3 > dest + destlen)
2356 {
2357 iemsg("vim_regsub_both(): not enough space");
2358 return 0;
2359 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002360 *dst++ = c;
2361 *dst++ = *src++;
2362 *dst++ = *src++;
2363 }
2364 else
2365 {
2366 dst += 3;
2367 src += 2;
2368 }
2369 continue;
2370 }
2371
Bram Moolenaar071d4272004-06-13 20:20:40 +00002372 if (c == '\\' && *src != NUL)
2373 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002374 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002375 switch (*src)
2376 {
2377 case 'r': c = CAR; ++src; break;
2378 case 'n': c = NL; ++src; break;
2379 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002380 // Oh no! \e already has meaning in subst pat :-(
2381 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002382 case 'b': c = Ctrl_H; ++src; break;
2383
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002384 // If "backslash" is TRUE the backslash will be removed
2385 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002386 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002387 {
2388 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002389 {
2390 if (dst + 1 > dest + destlen)
2391 {
2392 iemsg("vim_regsub_both(): not enough space");
2393 return 0;
2394 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002395 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002396 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002397 ++dst;
2398 }
2399 c = *src++;
2400 }
2401 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002402 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002403 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002404
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002405 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002406 if (func_one != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002407 {
2408 func_one(&cc, c);
2409 func_one = NULL;
2410 }
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002411 else if (func_all != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002412 func_all(&cc, c);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002413 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002414 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002415
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002416 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002417 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002418 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002419 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002420
Bram Moolenaar071d4272004-06-13 20:20:40 +00002421 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002422 {
2423 if (dst + charlen > dest + destlen)
2424 {
2425 iemsg("vim_regsub_both(): not enough space");
2426 return 0;
2427 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002428 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002429 }
2430 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002431 if (enc_utf8)
2432 {
2433 int clen = utf_ptr2len(src - 1);
2434
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002435 // If the character length is shorter than "totlen", there
2436 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002437 if (clen < totlen)
2438 {
2439 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002440 {
2441 if (dst + totlen - clen > dest + destlen)
2442 {
2443 iemsg("vim_regsub_both(): not enough space");
2444 return 0;
2445 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002446 mch_memmove(dst + 1, src - 1 + clen,
2447 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002448 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002449 dst += totlen - clen;
2450 }
2451 }
2452 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002453 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002454 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002455 {
2456 if (dst + 1 > dest + destlen)
2457 {
2458 iemsg("vim_regsub_both(): not enough space");
2459 return 0;
2460 }
2461 *dst = cc;
2462 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002463 dst++;
2464 }
2465 else
2466 {
2467 if (REG_MULTI)
2468 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002469 clnum = rex.reg_mmatch->startpos[no].lnum;
2470 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002471 s = NULL;
2472 else
2473 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002474 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2475 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2476 len = rex.reg_mmatch->endpos[no].col
2477 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002478 else
John Marriott82792db2024-05-12 00:07:17 +02002479 len = (int)reg_getline_len(clnum) - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002480 }
2481 }
2482 else
2483 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002484 s = rex.reg_match->startp[no];
2485 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002486 s = NULL;
2487 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002488 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002489 }
2490 if (s != NULL)
2491 {
2492 for (;;)
2493 {
2494 if (len == 0)
2495 {
2496 if (REG_MULTI)
2497 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002498 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002499 break;
2500 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002501 {
2502 if (dst + 1 > dest + destlen)
2503 {
2504 iemsg("vim_regsub_both(): not enough space");
2505 return 0;
2506 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002507 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002508 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002509 ++dst;
2510 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002511 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2512 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002513 else
John Marriott82792db2024-05-12 00:07:17 +02002514 len = (int)reg_getline_len(clnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002515 }
2516 else
2517 break;
2518 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002519 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002520 {
2521 if (copy)
RestorerZ68ebcee2023-05-31 17:12:14 +01002522 iemsg(e_damaged_match_string);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002523 goto exit;
2524 }
2525 else
2526 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002527 if ((flags & REGSUB_BACKSLASH)
2528 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002529 {
2530 /*
2531 * Insert a backslash in front of a CR, otherwise
2532 * it will be replaced by a line break.
2533 * Number of backslashes will be halved later,
2534 * double them here.
2535 */
2536 if (copy)
2537 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002538 if (dst + 2 > dest + destlen)
2539 {
2540 iemsg("vim_regsub_both(): not enough space");
2541 return 0;
2542 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002543 dst[0] = '\\';
2544 dst[1] = *s;
2545 }
2546 dst += 2;
2547 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002548 else
2549 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002550 if (has_mbyte)
2551 c = mb_ptr2char(s);
2552 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002553 c = *s;
2554
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002555 if (func_one != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002556 {
2557 func_one(&cc, c);
2558 func_one = NULL;
2559 }
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002560 else if (func_all != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002561 func_all(&cc, c);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002562 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002563 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002564
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002565 if (has_mbyte)
2566 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002567 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002568 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002569
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002570 // Copy composing characters separately, one
2571 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002572 if (enc_utf8)
2573 l = utf_ptr2len(s) - 1;
2574 else
2575 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002576
2577 s += l;
2578 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002579 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002580 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002581 {
2582 if (dst + charlen > dest + destlen)
2583 {
2584 iemsg("vim_regsub_both(): not enough space");
2585 return 0;
2586 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002587 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002588 }
2589 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002590 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002591 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002592 {
2593 if (dst + 1 > dest + destlen)
2594 {
2595 iemsg("vim_regsub_both(): not enough space");
2596 return 0;
2597 }
2598 *dst = cc;
2599 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002600 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002601 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002602
Bram Moolenaar071d4272004-06-13 20:20:40 +00002603 ++s;
2604 --len;
2605 }
2606 }
2607 }
2608 no = -1;
2609 }
2610 }
2611 if (copy)
2612 *dst = NUL;
2613
2614exit:
2615 return (int)((dst - dest) + 1);
2616}
2617
2618#ifdef FEAT_EVAL
John Marriott82792db2024-05-12 00:07:17 +02002619
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002620 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002621reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002622{
John Marriott82792db2024-05-12 00:07:17 +02002623 char_u *line;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002624
John Marriott82792db2024-05-12 00:07:17 +02002625 reg_getline_common(lnum, RGLF_LINE | RGLF_SUBMATCH, &line, NULL);
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002626
John Marriott82792db2024-05-12 00:07:17 +02002627 return line;
2628}
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002629
John Marriott82792db2024-05-12 00:07:17 +02002630 static colnr_T
2631reg_getline_submatch_len(linenr_T lnum)
2632{
2633 colnr_T length;
2634
2635 reg_getline_common(lnum, RGLF_LENGTH | RGLF_SUBMATCH, NULL, &length);
2636
2637 return length;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002638}
2639
2640/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002641 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002642 * allocated memory.
2643 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2644 */
2645 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002646reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002647{
2648 char_u *retval = NULL;
2649 char_u *s;
2650 int len;
2651 int round;
2652 linenr_T lnum;
2653
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002654 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002655 return NULL;
2656
Bram Moolenaar6100d022016-10-02 16:51:57 +02002657 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002658 {
2659 /*
2660 * First round: compute the length and allocate memory.
2661 * Second round: copy the text.
2662 */
2663 for (round = 1; round <= 2; ++round)
2664 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002665 lnum = rsm.sm_mmatch->startpos[no].lnum;
2666 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002667 return NULL;
2668
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002669 s = reg_getline_submatch(lnum);
2670 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002671 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002672 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002673 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002674 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002675 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002676 len = rsm.sm_mmatch->endpos[no].col
2677 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002678 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002679 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002680 ++len;
2681 }
2682 else
2683 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002684 // Multiple lines: take start line from start col, middle
2685 // lines completely and end line up to end col.
John Marriott82792db2024-05-12 00:07:17 +02002686 len = (int)reg_getline_submatch_len(lnum) - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002687 if (round == 2)
2688 {
2689 STRCPY(retval, s);
2690 retval[len] = '\n';
2691 }
2692 ++len;
2693 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002694 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002695 {
John Marriott82792db2024-05-12 00:07:17 +02002696 s = reg_getline_submatch(lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002697 if (round == 2)
2698 STRCPY(retval + len, s);
John Marriott82792db2024-05-12 00:07:17 +02002699 len += (int)reg_getline_submatch_len(lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002700 if (round == 2)
2701 retval[len] = '\n';
2702 ++len;
John Marriott82792db2024-05-12 00:07:17 +02002703 ++lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002704 }
2705 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002706 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002707 rsm.sm_mmatch->endpos[no].col);
2708 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002709 if (round == 2)
2710 retval[len] = NUL;
2711 ++len;
2712 }
2713
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002714 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002715 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002716 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002717 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002718 return NULL;
2719 }
2720 }
2721 }
2722 else
2723 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002724 s = rsm.sm_match->startp[no];
2725 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002726 retval = NULL;
2727 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002728 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002729 }
2730
2731 return retval;
2732}
Bram Moolenaar41571762014-04-02 19:00:58 +02002733
2734/*
2735 * Used for the submatch() function with the optional non-zero argument: get
2736 * the list of strings from the n'th submatch in allocated memory with NULs
2737 * represented in NLs.
2738 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2739 * command, for a non-existing submatch and for any error.
2740 */
2741 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002742reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002743{
2744 char_u *s;
2745 linenr_T slnum;
2746 linenr_T elnum;
2747 colnr_T scol;
2748 colnr_T ecol;
2749 int i;
2750 list_T *list;
2751 int error = FALSE;
2752
2753 if (!can_f_submatch || no < 0)
2754 return NULL;
2755
Bram Moolenaar6100d022016-10-02 16:51:57 +02002756 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002757 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002758 slnum = rsm.sm_mmatch->startpos[no].lnum;
2759 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002760 if (slnum < 0 || elnum < 0)
2761 return NULL;
2762
Bram Moolenaar6100d022016-10-02 16:51:57 +02002763 scol = rsm.sm_mmatch->startpos[no].col;
2764 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002765
2766 list = list_alloc();
2767 if (list == NULL)
2768 return NULL;
2769
2770 s = reg_getline_submatch(slnum) + scol;
2771 if (slnum == elnum)
2772 {
2773 if (list_append_string(list, s, ecol - scol) == FAIL)
2774 error = TRUE;
2775 }
2776 else
2777 {
John Marriott82792db2024-05-12 00:07:17 +02002778 int max_lnum = elnum - slnum;
2779
Bram Moolenaar41571762014-04-02 19:00:58 +02002780 if (list_append_string(list, s, -1) == FAIL)
2781 error = TRUE;
John Marriott82792db2024-05-12 00:07:17 +02002782 for (i = 1; i < max_lnum; i++)
Bram Moolenaar41571762014-04-02 19:00:58 +02002783 {
2784 s = reg_getline_submatch(slnum + i);
2785 if (list_append_string(list, s, -1) == FAIL)
2786 error = TRUE;
2787 }
2788 s = reg_getline_submatch(elnum);
2789 if (list_append_string(list, s, ecol) == FAIL)
2790 error = TRUE;
2791 }
2792 }
2793 else
2794 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002795 s = rsm.sm_match->startp[no];
2796 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002797 return NULL;
2798 list = list_alloc();
2799 if (list == NULL)
2800 return NULL;
2801 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002802 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002803 error = TRUE;
2804 }
2805
2806 if (error)
2807 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002808 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002809 return NULL;
2810 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002811 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002812 return list;
2813}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002814#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002815
Bram Moolenaarf4140482020-02-15 23:06:45 +01002816/*
2817 * Initialize the values used for matching against multiple lines
2818 */
2819 static void
2820init_regexec_multi(
2821 regmmatch_T *rmp,
2822 win_T *win, // window in which to search or NULL
2823 buf_T *buf, // buffer in which to search
2824 linenr_T lnum) // nr of line to start looking for match
2825{
2826 rex.reg_match = NULL;
2827 rex.reg_mmatch = rmp;
2828 rex.reg_buf = buf;
2829 rex.reg_win = win;
2830 rex.reg_firstlnum = lnum;
2831 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2832 rex.reg_line_lbr = FALSE;
2833 rex.reg_ic = rmp->rmm_ic;
2834 rex.reg_icombine = FALSE;
2835 rex.reg_maxcol = rmp->rmm_maxcol;
2836}
2837
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002838#include "regexp_bt.c"
2839
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002840static regengine_T bt_regengine =
2841{
2842 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002843 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002844 bt_regexec_nl,
Christian Brabandtd2cc51f2024-01-04 22:54:08 +01002845 bt_regexec_multi
2846#ifdef DEBUG
2847 ,(char_u *)""
2848#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002849};
2850
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002851#include "regexp_nfa.c"
2852
2853static regengine_T nfa_regengine =
2854{
2855 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002856 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002857 nfa_regexec_nl,
Christian Brabandtd2cc51f2024-01-04 22:54:08 +01002858 nfa_regexec_multi
2859#ifdef DEBUG
2860 ,(char_u *)""
2861#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002862};
2863
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002864// Which regexp engine to use? Needed for vim_regcomp().
2865// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002866static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002867
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002868#ifdef DEBUG
2869static char_u regname[][30] = {
2870 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002871 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002872 "NFA Regexp Engine"
2873 };
2874#endif
2875
2876/*
2877 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002878 * Returns the program in allocated memory.
2879 * Use vim_regfree() to free the memory.
2880 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002881 */
2882 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002883vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002884{
2885 regprog_T *prog = NULL;
2886 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002887 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002888
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002889 regexp_engine = p_re;
2890
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002891 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002892 if (STRNCMP(expr, "\\%#=", 4) == 0)
2893 {
2894 int newengine = expr[4] - '0';
2895
2896 if (newengine == AUTOMATIC_ENGINE
2897 || newengine == BACKTRACKING_ENGINE
2898 || newengine == NFA_ENGINE)
2899 {
2900 regexp_engine = expr[4] - '0';
2901 expr += 5;
2902#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002903 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002904 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002905#endif
2906 }
2907 else
2908 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002909 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002910 regexp_engine = AUTOMATIC_ENGINE;
2911 }
2912 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002913#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002914 bt_regengine.expr = expr;
2915 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002916#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002917 // reg_iswordc() uses rex.reg_buf
2918 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002919
2920 /*
2921 * First try the NFA engine, unless backtracking was requested.
2922 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002923 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002924 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002925 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002926 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002927 else
2928 prog = bt_regengine.regcomp(expr, re_flags);
2929
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002930 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002931 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002932 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002933#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002934 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002935 {
2936 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002937 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002938 if (f)
2939 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002940 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002941 fclose(f);
2942 }
2943 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002944 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002945 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002946 }
2947#endif
2948 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002949 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002950 * The NFA engine also fails for patterns that it can't handle well
2951 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002952 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002953 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002954 if (regexp_engine == AUTOMATIC_ENGINE
2955 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002956 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002957 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002958#ifdef FEAT_EVAL
2959 report_re_switch(expr);
2960#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002961 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002962 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002963 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002964
Bram Moolenaarfda37292014-11-05 14:27:36 +01002965 if (prog != NULL)
2966 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002967 // Store the info needed to call regcomp() again when the engine turns
2968 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002969 prog->re_engine = regexp_engine;
2970 prog->re_flags = re_flags;
2971 }
2972
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002973 return prog;
2974}
2975
2976/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002977 * Free a compiled regexp program, returned by vim_regcomp().
2978 */
2979 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002980vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002981{
2982 if (prog != NULL)
2983 prog->engine->regfree(prog);
2984}
2985
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002986#if defined(EXITFREE) || defined(PROTO)
2987 void
2988free_regexp_stuff(void)
2989{
2990 ga_clear(&regstack);
2991 ga_clear(&backpos);
2992 vim_free(reg_tofree);
2993 vim_free(reg_prev_sub);
2994}
2995#endif
2996
Bram Moolenaarfda37292014-11-05 14:27:36 +01002997#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002998 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002999report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01003000{
3001 if (p_verbose > 0)
3002 {
3003 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01003004 msg_puts(_("Switching to backtracking RE engine for pattern: "));
3005 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01003006 verbose_leave();
3007 }
3008}
3009#endif
3010
Bram Moolenaar651fca82021-11-29 20:39:38 +00003011#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02003012/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01003013 * Return whether "prog" is currently being executed.
3014 */
3015 int
3016regprog_in_use(regprog_T *prog)
3017{
3018 return prog->re_in_use;
3019}
Bram Moolenaar113e1072019-01-20 15:30:40 +01003020#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01003021
3022/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003023 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01003024 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003025 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003026 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003027 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003028 *
3029 * Return TRUE if there is a match, FALSE if not.
3030 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01003031 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003032vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01003033 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003034 char_u *line, // string to match against
3035 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01003036 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01003037{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003038 int result;
3039 regexec_T rex_save;
3040 int rex_in_use_save = rex_in_use;
3041
Bram Moolenaar0270f382018-07-17 05:43:58 +02003042 // Cannot use the same prog recursively, it contains state.
3043 if (rmp->regprog->re_in_use)
3044 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00003045 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02003046 return FALSE;
3047 }
3048 rmp->regprog->re_in_use = TRUE;
3049
Bram Moolenaar6100d022016-10-02 16:51:57 +02003050 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003051 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02003052 rex_save = rex;
3053 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003054
Bram Moolenaar6100d022016-10-02 16:51:57 +02003055 rex.reg_startp = NULL;
3056 rex.reg_endp = NULL;
3057 rex.reg_startpos = NULL;
3058 rex.reg_endpos = NULL;
3059
3060 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02003061 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003062
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003063 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003064 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
3065 && result == NFA_TOO_EXPENSIVE)
3066 {
3067 int save_p_re = p_re;
3068 int re_flags = rmp->regprog->re_flags;
3069 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
3070
3071 p_re = BACKTRACKING_ENGINE;
3072 vim_regfree(rmp->regprog);
3073 if (pat != NULL)
3074 {
3075#ifdef FEAT_EVAL
3076 report_re_switch(pat);
3077#endif
3078 rmp->regprog = vim_regcomp(pat, re_flags);
3079 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003080 {
3081 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003082 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02003083 rmp->regprog->re_in_use = FALSE;
3084 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003085 vim_free(pat);
3086 }
3087
3088 p_re = save_p_re;
3089 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02003090
3091 rex_in_use = rex_in_use_save;
3092 if (rex_in_use)
3093 rex = rex_save;
3094
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003095 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003096}
3097
Dominique Pellee764d1b2023-03-12 21:20:59 +00003098#if defined(FEAT_SPELL) || defined(FEAT_EVAL) || defined(FEAT_X11) || defined(PROTO)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003099/*
3100 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003101 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003102 */
3103 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003104vim_regexec_prog(
3105 regprog_T **prog,
3106 int ignore_case,
3107 char_u *line,
3108 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003109{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003110 int r;
3111 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003112
3113 regmatch.regprog = *prog;
3114 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003115 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003116 *prog = regmatch.regprog;
3117 return r;
3118}
Dominique Pellee764d1b2023-03-12 21:20:59 +00003119#endif
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003120
3121/*
3122 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003123 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003124 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003125 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003126vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003127{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003128 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003129}
3130
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003131/*
3132 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003133 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003134 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003135 */
3136 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003137vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003138{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003139 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003140}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003141
3142/*
3143 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003144 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
3145 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003146 * Uses curbuf for line count and 'iskeyword'.
3147 *
3148 * Return zero if there is no match. Return number of lines contained in the
3149 * match otherwise.
3150 */
3151 long
Bram Moolenaar05540972016-01-30 20:31:25 +01003152vim_regexec_multi(
3153 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003154 win_T *win, // window in which to search or NULL
3155 buf_T *buf, // buffer in which to search
3156 linenr_T lnum, // nr of line to start looking for match
3157 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003158 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003159{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003160 int result;
3161 regexec_T rex_save;
3162 int rex_in_use_save = rex_in_use;
3163
Bram Moolenaar0270f382018-07-17 05:43:58 +02003164 // Cannot use the same prog recursively, it contains state.
3165 if (rmp->regprog->re_in_use)
3166 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00003167 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02003168 return FALSE;
3169 }
3170 rmp->regprog->re_in_use = TRUE;
3171
Bram Moolenaar6100d022016-10-02 16:51:57 +02003172 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003173 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02003174 rex_save = rex;
3175 rex_in_use = TRUE;
3176
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003177 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003178 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003179 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003180
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003181 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003182 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
3183 && result == NFA_TOO_EXPENSIVE)
3184 {
3185 int save_p_re = p_re;
3186 int re_flags = rmp->regprog->re_flags;
3187 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
3188
3189 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003190 if (pat != NULL)
3191 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003192 regprog_T *prev_prog = rmp->regprog;
3193
Bram Moolenaarfda37292014-11-05 14:27:36 +01003194#ifdef FEAT_EVAL
3195 report_re_switch(pat);
3196#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003197#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003198 // checking for \z misuse was already done when compiling for NFA,
3199 // allow all here
3200 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003201#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01003202 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003203#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003204 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003205#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003206 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003207 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003208 // Somehow compiling the pattern failed now, put back the
3209 // previous one to avoid "regprog" becoming NULL.
3210 rmp->regprog = prev_prog;
3211 }
3212 else
3213 {
3214 vim_regfree(prev_prog);
3215
Bram Moolenaar41499802018-07-18 06:02:09 +02003216 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003217 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003218 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003219 rmp->regprog->re_in_use = FALSE;
3220 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003221 vim_free(pat);
3222 }
3223 p_re = save_p_re;
3224 }
3225
Bram Moolenaar6100d022016-10-02 16:51:57 +02003226 rex_in_use = rex_in_use_save;
3227 if (rex_in_use)
3228 rex = rex_save;
3229
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003230 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003231}