blob: 147452aae27bea40c2cd1a2eae080305dd7831b7 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
Bram Moolenaar155f2d12022-06-20 13:38:33 +010024static sig_atomic_t dummy_timeout_flag = 0;
25static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010026#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
Bram Moolenaar0f618382022-08-26 21:33:04 +010054static int timeout_nesting = 0;
55
56/*
57 * Start a timer that will cause the regexp to abort after "msec".
58 * This doesn't work well recursively. In case it happens anyway, the first
59 * set timeout will prevail, nested ones are ignored.
60 * The caller must make sure there is a matching disable_regexp_timeout() call!
61 */
Paul Ollis65745772022-06-05 16:55:54 +010062 void
63init_regexp_timeout(long msec)
64{
Bram Moolenaar0f618382022-08-26 21:33:04 +010065 if (timeout_nesting == 0)
66 timeout_flag = start_timeout(msec);
67 ++timeout_nesting;
Paul Ollis65745772022-06-05 16:55:54 +010068}
69
70 void
71disable_regexp_timeout(void)
72{
Bram Moolenaar0f618382022-08-26 21:33:04 +010073 if (timeout_nesting == 0)
74 iemsg("disable_regexp_timeout() called without active timer");
75 else if (--timeout_nesting == 0)
76 {
77 stop_timeout();
78 timeout_flag = &dummy_timeout_flag;
79 }
Paul Ollis65745772022-06-05 16:55:54 +010080}
81#endif
82
Bram Moolenaar9781d9c2022-09-20 13:51:25 +010083#if defined(FEAT_EVAL) || defined(PROTO)
84# ifdef FEAT_RELTIME
85static sig_atomic_t *saved_timeout_flag;
86# endif
87
88/*
89 * Used at the debug prompt: disable the timeout so that expression evaluation
90 * can used patterns.
91 * Must be followed by calling restore_timeout_for_debugging().
92 */
93 void
94save_timeout_for_debugging(void)
95{
96# ifdef FEAT_RELTIME
97 saved_timeout_flag = (sig_atomic_t *)timeout_flag;
98 timeout_flag = &dummy_timeout_flag;
99# endif
100}
101
102 void
103restore_timeout_for_debugging(void)
104{
105# ifdef FEAT_RELTIME
106 timeout_flag = saved_timeout_flag;
107# endif
108}
109#endif
110
Bram Moolenaar071d4272004-06-13 20:20:40 +0000111/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200112 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 * number; the start node begins in the second byte. It's used to catch the
114 * most severe mutilation of the program by the caller.
115 */
116
117#define REGMAGIC 0234
118
119/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 * Utility definitions.
121 */
122#define UCHARAT(p) ((int)*(char_u *)(p))
123
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100124// Used for an error (down from) vim_regcomp(): give the error message, set
125// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100126#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
127#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
128#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
129#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +0100130#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100131#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +0000132#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000133
Bram Moolenaar95f09602016-11-10 20:01:45 +0100134
Bram Moolenaar071d4272004-06-13 20:20:40 +0000135#define MAX_LIMIT (32767L << 16L)
136
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137#define NOT_MULTI 0
138#define MULTI_ONE 1
139#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200140
141// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100142#define RA_FAIL 1 // something failed, abort
143#define RA_CONT 2 // continue in inner loop
144#define RA_BREAK 3 // break inner loop
145#define RA_MATCH 4 // successful match
146#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200147
Bram Moolenaar071d4272004-06-13 20:20:40 +0000148/*
149 * Return NOT_MULTI if c is not a "multi" operator.
150 * Return MULTI_ONE if c is a single "multi" operator.
151 * Return MULTI_MULT if c is a multi "multi" operator.
152 */
153 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100154re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000155{
156 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
157 return MULTI_ONE;
158 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
159 return MULTI_MULT;
160 return NOT_MULTI;
161}
162
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000163static char_u *reg_prev_sub = NULL;
John Marriott82792db2024-05-12 00:07:17 +0200164static size_t reg_prev_sublen = 0;
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000165
Bram Moolenaar071d4272004-06-13 20:20:40 +0000166/*
167 * REGEXP_INRANGE contains all characters which are always special in a []
168 * range after '\'.
169 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
170 * These are:
171 * \n - New line (NL).
172 * \r - Carriage Return (CR).
173 * \t - Tab (TAB).
174 * \e - Escape (ESC).
175 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000176 * \d - Character code in decimal, eg \d123
177 * \o - Character code in octal, eg \o80
178 * \x - Character code in hex, eg \x4a
179 * \u - Multibyte character code, eg \u20ac
180 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000181 */
182static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000183static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000184
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185/*
186 * Translate '\x' to its control character, except "\n", which is Magic.
187 */
188 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100189backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000190{
191 switch (c)
192 {
193 case 'r': return CAR;
194 case 't': return TAB;
195 case 'e': return ESC;
196 case 'b': return BS;
197 }
198 return c;
199}
200
John Marriott82792db2024-05-12 00:07:17 +0200201enum
202{
203 CLASS_ALNUM = 0,
204 CLASS_ALPHA,
205 CLASS_BLANK,
206 CLASS_CNTRL,
207 CLASS_DIGIT,
208 CLASS_GRAPH,
209 CLASS_LOWER,
210 CLASS_PRINT,
211 CLASS_PUNCT,
212 CLASS_SPACE,
213 CLASS_UPPER,
214 CLASS_XDIGIT,
215 CLASS_TAB,
216 CLASS_RETURN,
217 CLASS_BACKSPACE,
218 CLASS_ESCAPE,
219 CLASS_IDENT,
220 CLASS_KEYWORD,
221 CLASS_FNAME,
222 CLASS_NONE = 99
223};
224
Bram Moolenaar071d4272004-06-13 20:20:40 +0000225/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000226 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000227 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
228 * recognized. Otherwise "pp" is advanced to after the item.
229 */
230 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100231get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000232{
John Marriott82792db2024-05-12 00:07:17 +0200233 // must be sorted by the 'value' field because it is used by bsearch()!
234 static keyvalue_T char_class_tab[] =
Bram Moolenaar071d4272004-06-13 20:20:40 +0000235 {
John Marriott82792db2024-05-12 00:07:17 +0200236 KEYVALUE_ENTRY(CLASS_ALNUM, "alnum:]"),
237 KEYVALUE_ENTRY(CLASS_ALPHA, "alpha:]"),
238 KEYVALUE_ENTRY(CLASS_BACKSPACE, "backspace:]"),
239 KEYVALUE_ENTRY(CLASS_BLANK, "blank:]"),
240 KEYVALUE_ENTRY(CLASS_CNTRL, "cntrl:]"),
241 KEYVALUE_ENTRY(CLASS_DIGIT, "digit:]"),
242 KEYVALUE_ENTRY(CLASS_ESCAPE, "escape:]"),
243 KEYVALUE_ENTRY(CLASS_FNAME, "fname:]"),
244 KEYVALUE_ENTRY(CLASS_GRAPH, "graph:]"),
245 KEYVALUE_ENTRY(CLASS_IDENT, "ident:]"),
246 KEYVALUE_ENTRY(CLASS_KEYWORD, "keyword:]"),
247 KEYVALUE_ENTRY(CLASS_LOWER, "lower:]"),
248 KEYVALUE_ENTRY(CLASS_PRINT, "print:]"),
249 KEYVALUE_ENTRY(CLASS_PUNCT, "punct:]"),
250 KEYVALUE_ENTRY(CLASS_RETURN, "return:]"),
251 KEYVALUE_ENTRY(CLASS_SPACE, "space:]"),
252 KEYVALUE_ENTRY(CLASS_TAB, "tab:]"),
253 KEYVALUE_ENTRY(CLASS_UPPER, "upper:]"),
254 KEYVALUE_ENTRY(CLASS_XDIGIT, "xdigit:]")
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 };
Bram Moolenaar071d4272004-06-13 20:20:40 +0000256
John Marriott82792db2024-05-12 00:07:17 +0200257 // check that the value of "pp" has a chance of matching
258 if ((*pp)[1] == ':' && ASCII_ISLOWER((*pp)[2])
259 && ASCII_ISLOWER((*pp)[3]) && ASCII_ISLOWER((*pp)[4]))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260 {
John Marriott82792db2024-05-12 00:07:17 +0200261 keyvalue_T target;
262 keyvalue_T *entry;
263 // this function can be called repeatedly with the same value for "pp"
264 // so we cache the last found entry.
265 static keyvalue_T *last_entry = NULL;
266
267 target.key = 0;
268 target.value = (char *)*pp + 2;
269 target.length = 0; // not used, see cmp_keyvalue_value_n()
270
271 if (last_entry != NULL && cmp_keyvalue_value_n(&target, last_entry) == 0)
272 entry = last_entry;
273 else
274 entry = (keyvalue_T *)bsearch(&target, &char_class_tab,
275 ARRAY_LENGTH(char_class_tab),
276 sizeof(char_class_tab[0]), cmp_keyvalue_value_n);
277 if (entry != NULL)
278 {
279 last_entry = entry;
280 *pp += entry->length + 2;
281 return entry->key;
282 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283 }
284 return CLASS_NONE;
285}
286
287/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000288 * Specific version of character class functions.
289 * Using a table to keep this fast.
290 */
291static short class_tab[256];
292
293#define RI_DIGIT 0x01
294#define RI_HEX 0x02
295#define RI_OCTAL 0x04
296#define RI_WORD 0x08
297#define RI_HEAD 0x10
298#define RI_ALPHA 0x20
299#define RI_LOWER 0x40
300#define RI_UPPER 0x80
301#define RI_WHITE 0x100
302
303 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100304init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000305{
306 int i;
307 static int done = FALSE;
308
309 if (done)
310 return;
311
312 for (i = 0; i < 256; ++i)
313 {
314 if (i >= '0' && i <= '7')
315 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
316 else if (i >= '8' && i <= '9')
317 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
318 else if (i >= 'a' && i <= 'f')
319 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000321 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
322 else if (i >= 'A' && i <= 'F')
323 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000324 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
326 else if (i == '_')
327 class_tab[i] = RI_WORD + RI_HEAD;
328 else
329 class_tab[i] = 0;
330 }
331 class_tab[' '] |= RI_WHITE;
332 class_tab['\t'] |= RI_WHITE;
333 done = TRUE;
334}
335
kylo252ae6f1d82022-02-16 19:24:07 +0000336#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
337#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
338#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
339#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
340#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
341#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
342#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
343#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
344#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100346// flags for regflags
347#define RF_ICASE 1 // ignore case
348#define RF_NOICASE 2 // don't ignore case
349#define RF_HASNL 4 // can match a NL
350#define RF_ICOMBINE 8 // ignore combining characters
351#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000352
353/*
354 * Global work variables for vim_regcomp().
355 */
356
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100357static char_u *regparse; // Input-scan pointer.
358static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100359static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000360#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100361static int regnzpar; // \z() count.
362static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000363#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100364static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000365#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100366static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000367#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000368
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100369static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100371static int reg_string; // matching with a string instead of a buffer
372 // line
373static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000374
375/*
376 * META contains all characters that may be magic, except '^' and '$'.
377 */
378
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100379// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380static char_u META_flags[] = {
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100383// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100385// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000386 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100387// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000388 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100389// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000390 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100391// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000392 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100393// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000394 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
395};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000396
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100397static int curchr; // currently parsed character
398// Previous character. Note: prevchr is sometimes -1 when we are not at the
399// start, eg in /[ ^I]^ the pattern was never found even if it existed,
400// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200401static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100402static int prevprevchr; // previous-previous character
403static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100405// arguments for reg()
406#define REG_NOPAREN 0 // toplevel reg()
407#define REG_PAREN 1 // \(\)
408#define REG_ZPAREN 2 // \z(\)
409#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200411typedef struct
412{
413 char_u *regparse;
414 int prevchr_len;
415 int curchr;
416 int prevchr;
417 int prevprevchr;
418 int nextchr;
419 int at_start;
420 int prev_at_start;
421 int regnpar;
422} parse_state_T;
423
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100424static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100425static int getchr(void);
426static void skipchr_keepstart(void);
427static int peekchr(void);
428static void skipchr(void);
429static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100430static long gethexchrs(int maxinputlen);
431static long getoctchrs(void);
432static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100433static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100434static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200435static int cstrncmp(char_u *s1, char_u *s2, int *n);
436static char_u *cstrchr(char_u *, int);
437static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100438static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100439#ifdef FEAT_EVAL
440static void report_re_switch(char_u *pat);
441#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200443static regengine_T bt_regengine;
444static regengine_T nfa_regengine;
445
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446/*
447 * Return TRUE if compiled regular expression "prog" can match a line break.
448 */
449 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100450re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451{
452 return (prog->regflags & RF_HASNL);
453}
454
455/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000456 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
457 * Returns a character representing the class. Zero means that no item was
458 * recognized. Otherwise "pp" is advanced to after the item.
459 */
460 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100461get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000462{
463 int c;
464 int l = 1;
465 char_u *p = *pp;
466
Bram Moolenaar985079c2019-02-16 17:07:47 +0100467 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000468 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000470 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000471 if (p[l + 2] == '=' && p[l + 3] == ']')
472 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000473 if (has_mbyte)
474 c = mb_ptr2char(p + 2);
475 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000476 c = p[2];
477 *pp += l + 4;
478 return c;
479 }
480 }
481 return 0;
482}
483
484/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000485 * Check for a collating element "[.a.]". "pp" points to the '['.
486 * Returns a character. Zero means that no item was recognized. Otherwise
487 * "pp" is advanced to after the item.
488 * Currently only single characters are recognized!
489 */
490 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100491get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000492{
493 int c;
494 int l = 1;
495 char_u *p = *pp;
496
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100497 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000498 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000499 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000500 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000501 if (p[l + 2] == '.' && p[l + 3] == ']')
502 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000503 if (has_mbyte)
504 c = mb_ptr2char(p + 2);
505 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000506 c = p[2];
507 *pp += l + 4;
508 return c;
509 }
510 }
511 return 0;
512}
513
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100514static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
515static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200516
517 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100518get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200519{
520 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
521 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
522}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000523
524/*
525 * Skip over a "[]" range.
526 * "p" must point to the character after the '['.
527 * The returned pointer is on the matching ']', or the terminating NUL.
528 */
529 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100530skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000531{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000532 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000533
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100534 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000535 ++p;
536 if (*p == ']' || *p == '-')
537 ++p;
538 while (*p != NUL && *p != ']')
539 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000540 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000541 p += l;
542 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000543 if (*p == '-')
544 {
545 ++p;
546 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100547 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000548 }
549 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200550 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000551 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200552 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000553 p += 2;
554 else if (*p == '[')
555 {
556 if (get_char_class(&p) == CLASS_NONE
557 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200558 && get_coll_element(&p) == 0
559 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100560 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000561 }
562 else
563 ++p;
564 }
565
566 return p;
567}
568
569/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000570 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200571 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000572 * Take care of characters with a backslash in front of it.
573 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000574 */
575 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100576skip_regexp(
577 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200578 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200579 int magic)
580{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100581 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200582}
583
584/*
585 * Call skip_regexp() and when the delimiter does not match give an error and
586 * return NULL.
587 */
588 char_u *
589skip_regexp_err(
590 char_u *startp,
591 int delim,
592 int magic)
593{
594 char_u *p = skip_regexp(startp, delim, magic);
595
596 if (*p != delim)
597 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000598 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200599 return NULL;
600 }
601 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200602}
603
604/*
605 * skip_regexp() with extra arguments:
606 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
607 * expression and change "\?" to "?". If "*newp" is not NULL the expression
608 * is changed in-place.
609 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100610 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200611 */
612 char_u *
613skip_regexp_ex(
614 char_u *startp,
615 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100616 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200617 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100618 int *dropped,
619 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000620{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100621 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000622 char_u *p = startp;
zeertzjq789679c2024-05-23 17:41:26 +0200623 size_t startplen = STRLEN(startp);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000624
625 if (magic)
626 mymagic = MAGIC_ON;
627 else
628 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200629 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100631 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000632 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100633 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000634 break;
635 if ((p[0] == '[' && mymagic >= MAGIC_ON)
636 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
637 {
638 p = skip_anyof(p + 1);
639 if (p[0] == NUL)
640 break;
641 }
642 else if (p[0] == '\\' && p[1] != NUL)
643 {
644 if (dirc == '?' && newp != NULL && p[1] == '?')
645 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100646 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000647 if (*newp == NULL)
648 {
John Marriott82792db2024-05-12 00:07:17 +0200649 *newp = vim_strnsave(startp, startplen);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000650 if (*newp != NULL)
651 p = *newp + (p - startp);
652 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200653 if (dropped != NULL)
654 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000655 if (*newp != NULL)
John Marriott82792db2024-05-12 00:07:17 +0200656 mch_memmove(p, p + 1, (startplen - ((p + 1) - *newp)) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000657 else
658 ++p;
659 }
660 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100661 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000662 if (*p == 'v')
663 mymagic = MAGIC_ALL;
664 else if (*p == 'V')
665 mymagic = MAGIC_NONE;
666 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100668 if (magic_val != NULL)
669 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000670 return p;
671}
672
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200673/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200674 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200675 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100676static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200677static int at_start; // True when on the first character
678static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100679
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200681 * Start parsing at "str".
682 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100684initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000685{
686 regparse = str;
687 prevchr_len = 0;
688 curchr = prevprevchr = prevchr = nextchr = -1;
689 at_start = TRUE;
690 prev_at_start = FALSE;
691}
692
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200693/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200694 * Save the current parse state, so that it can be restored and parsing
695 * starts in the same state again.
696 */
697 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100698save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200699{
700 ps->regparse = regparse;
701 ps->prevchr_len = prevchr_len;
702 ps->curchr = curchr;
703 ps->prevchr = prevchr;
704 ps->prevprevchr = prevprevchr;
705 ps->nextchr = nextchr;
706 ps->at_start = at_start;
707 ps->prev_at_start = prev_at_start;
708 ps->regnpar = regnpar;
709}
710
711/*
712 * Restore a previously saved parse state.
713 */
714 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100715restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200716{
717 regparse = ps->regparse;
718 prevchr_len = ps->prevchr_len;
719 curchr = ps->curchr;
720 prevchr = ps->prevchr;
721 prevprevchr = ps->prevprevchr;
722 nextchr = ps->nextchr;
723 at_start = ps->at_start;
724 prev_at_start = ps->prev_at_start;
725 regnpar = ps->regnpar;
726}
727
728
729/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200730 * Get the next character without advancing.
731 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000732 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100733peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000735 static int after_slash = FALSE;
736
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000737 if (curchr != -1)
738 return curchr;
739
740 switch (curchr = regparse[0])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000741 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742 case '.':
743 case '[':
744 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100745 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000746 if (reg_magic >= MAGIC_ON)
747 curchr = Magic(curchr);
748 break;
749 case '(':
750 case ')':
751 case '{':
752 case '%':
753 case '+':
754 case '=':
755 case '?':
756 case '@':
757 case '!':
758 case '&':
759 case '|':
760 case '<':
761 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100762 case '#': // future ext.
763 case '"': // future ext.
764 case '\'': // future ext.
765 case ',': // future ext.
766 case '-': // future ext.
767 case ':': // future ext.
768 case ';': // future ext.
769 case '`': // future ext.
770 case '/': // Can't be used in / command
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000771 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000772 if (reg_magic == MAGIC_ALL)
773 curchr = Magic(curchr);
774 break;
775 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100776 // * is not magic as the very first character, eg "?*ptr", when
777 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
778 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000779 if (reg_magic >= MAGIC_ON
780 && !at_start
781 && !(prev_at_start && prevchr == Magic('^'))
782 && (after_slash
783 || (prevchr != Magic('(')
784 && prevchr != Magic('&')
785 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000786 curchr = Magic('*');
787 break;
788 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100789 // '^' is only magic as the very first character and if it's after
790 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000791 if (reg_magic >= MAGIC_OFF
792 && (at_start
793 || reg_magic == MAGIC_ALL
794 || prevchr == Magic('(')
795 || prevchr == Magic('|')
796 || prevchr == Magic('&')
797 || prevchr == Magic('n')
798 || (no_Magic(prevchr) == '('
799 && prevprevchr == Magic('%'))))
800 {
801 curchr = Magic('^');
802 at_start = TRUE;
803 prev_at_start = FALSE;
804 }
805 break;
806 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100807 // '$' is only magic as the very last char and if it's in front of
808 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000809 if (reg_magic >= MAGIC_OFF)
810 {
811 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200812 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000813
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100814 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000816 || p[1] == 'm' || p[1] == 'M'
817 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200818 {
819 if (p[1] == 'v')
820 is_magic_all = TRUE;
821 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
822 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000823 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200824 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 if (p[0] == NUL
826 || (p[0] == '\\'
827 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
828 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200829 || (is_magic_all
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000830 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831 || reg_magic == MAGIC_ALL)
832 curchr = Magic('$');
833 }
834 break;
835 case '\\':
836 {
837 int c = regparse[1];
838
839 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100840 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000841 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000842 {
843 /*
844 * META contains everything that may be magic sometimes,
845 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200846 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000847 * magicness. Therefore, \ is so meta-magic that it is
848 * not in META.
849 */
850 curchr = -1;
851 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100852 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000853 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000854 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 peekchr();
856 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000857 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000858 curchr = toggle_Magic(curchr);
859 }
860 else if (vim_strchr(REGEXP_ABBR, c))
861 {
862 /*
863 * Handle abbreviations, like "\t" for TAB -- webb
864 */
865 curchr = backslash_trans(c);
866 }
867 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
868 curchr = toggle_Magic(c);
869 else
870 {
871 /*
872 * Next character can never be (made) magic?
873 * Then backslashing it won't do anything.
874 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875 if (has_mbyte)
876 curchr = (*mb_ptr2char)(regparse + 1);
877 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000878 curchr = c;
879 }
880 break;
881 }
882
Bram Moolenaar071d4272004-06-13 20:20:40 +0000883 default:
884 if (has_mbyte)
885 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000886 }
887
888 return curchr;
889}
890
891/*
892 * Eat one lexed character. Do this in a way that we can undo it.
893 */
894 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100895skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000896{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100897 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000898 if (*regparse == '\\')
899 prevchr_len = 1;
900 else
901 prevchr_len = 0;
902 if (regparse[prevchr_len] != NUL)
903 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000904 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100905 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000906 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000907 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000908 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000909 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000910 ++prevchr_len;
911 }
912 regparse += prevchr_len;
913 prev_at_start = at_start;
914 at_start = FALSE;
915 prevprevchr = prevchr;
916 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100917 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000918 nextchr = -1;
919}
920
921/*
922 * Skip a character while keeping the value of prev_at_start for at_start.
923 * prevchr and prevprevchr are also kept.
924 */
925 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100926skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000927{
928 int as = prev_at_start;
929 int pr = prevchr;
930 int prpr = prevprevchr;
931
932 skipchr();
933 at_start = as;
934 prevchr = pr;
935 prevprevchr = prpr;
936}
937
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200938/*
939 * Get the next character from the pattern. We know about magic and such, so
940 * therefore we need a lexical analyzer.
941 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000942 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100943getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000944{
945 int chr = peekchr();
946
947 skipchr();
948 return chr;
949}
950
951/*
952 * put character back. Works only once!
953 */
954 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100955ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000956{
957 nextchr = curchr;
958 curchr = prevchr;
959 prevchr = prevprevchr;
960 at_start = prev_at_start;
961 prev_at_start = FALSE;
962
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100963 // Backup regparse, so that it's at the same position as before the
964 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000965 regparse -= prevchr_len;
966}
967
968/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000969 * Get and return the value of the hex string at the current position.
970 * Return -1 if there is no valid hex number.
971 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000972 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000973 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000974 * The parameter controls the maximum number of input characters. This will be
975 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
976 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100977 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100978gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000979{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100980 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000981 int c;
982 int i;
983
984 for (i = 0; i < maxinputlen; ++i)
985 {
986 c = regparse[0];
987 if (!vim_isxdigit(c))
988 break;
989 nr <<= 4;
990 nr |= hex2nr(c);
991 ++regparse;
992 }
993
994 if (i == 0)
995 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100996 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000997}
998
999/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001000 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001001 * current position. Return -1 for invalid. Consumes all digits.
1002 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001003 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01001004getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001005{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001006 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001007 int c;
1008 int i;
1009
1010 for (i = 0; ; ++i)
1011 {
1012 c = regparse[0];
1013 if (c < '0' || c > '9')
1014 break;
1015 nr *= 10;
1016 nr += c - '0';
1017 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001018 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001019 }
1020
1021 if (i == 0)
1022 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001023 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001024}
1025
1026/*
1027 * get and return the value of the octal string immediately after the current
1028 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
1029 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
1030 * treat 8 or 9 as recognised characters. Position is updated:
1031 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00001032 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001033 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001034 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01001035getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001036{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001037 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001038 int c;
1039 int i;
1040
1041 for (i = 0; i < 3 && nr < 040; ++i)
1042 {
1043 c = regparse[0];
1044 if (c < '0' || c > '7')
1045 break;
1046 nr <<= 3;
1047 nr |= hex2nr(c);
1048 ++regparse;
1049 }
1050
1051 if (i == 0)
1052 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001053 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001054}
1055
1056/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057 * read_limits - Read two integers to be taken as a minimum and maximum.
1058 * If the first character is '-', then the range is reversed.
1059 * Should end with 'end'. If minval is missing, zero is default, if maxval is
1060 * missing, a very big number is the default.
1061 */
1062 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001063read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001064{
1065 int reverse = FALSE;
1066 char_u *first_char;
1067 long tmp;
1068
1069 if (*regparse == '-')
1070 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001071 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001072 regparse++;
1073 reverse = TRUE;
1074 }
1075 first_char = regparse;
1076 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001077 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001078 {
1079 if (vim_isdigit(*++regparse))
1080 *maxval = getdigits(&regparse);
1081 else
1082 *maxval = MAX_LIMIT;
1083 }
1084 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001085 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001086 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001087 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001088 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001089 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001090 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001091 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001092 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001093
1094 /*
1095 * Reverse the range if there was a '-', or make sure it is in the right
1096 * order otherwise.
1097 */
1098 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1099 {
1100 tmp = *minval;
1101 *minval = *maxval;
1102 *maxval = tmp;
1103 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001104 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001105 return OK;
1106}
1107
1108/*
1109 * vim_regexec and friends
1110 */
1111
1112/*
1113 * Global work variables for vim_regexec().
1114 */
1115
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001116static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001117#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001118static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001119#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001120static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001121
1122/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001123 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1124 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001125 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001126 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001127static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001128static unsigned reg_tofreelen;
1129
1130/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001131 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001132 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001133 * done:
1134 * single-line multi-line
1135 * reg_match &regmatch_T NULL
1136 * reg_mmatch NULL &regmmatch_T
1137 * reg_startp reg_match->startp <invalid>
1138 * reg_endp reg_match->endp <invalid>
1139 * reg_startpos <invalid> reg_mmatch->startpos
1140 * reg_endpos <invalid> reg_mmatch->endpos
1141 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001142 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001143 * reg_firstlnum <invalid> first line in which to search
1144 * reg_maxline 0 last line nr
1145 * reg_line_lbr FALSE or TRUE FALSE
1146 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001147typedef struct {
1148 regmatch_T *reg_match;
1149 regmmatch_T *reg_mmatch;
Bram Moolenaar01105b32022-11-26 11:47:10 +00001150
Bram Moolenaar6100d022016-10-02 16:51:57 +02001151 char_u **reg_startp;
1152 char_u **reg_endp;
1153 lpos_T *reg_startpos;
1154 lpos_T *reg_endpos;
Bram Moolenaar01105b32022-11-26 11:47:10 +00001155
Bram Moolenaar6100d022016-10-02 16:51:57 +02001156 win_T *reg_win;
1157 buf_T *reg_buf;
1158 linenr_T reg_firstlnum;
1159 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001160 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001161
Bram Moolenaar0270f382018-07-17 05:43:58 +02001162 // The current match-position is stord in these variables:
1163 linenr_T lnum; // line number, relative to first line
1164 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001165 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001166
1167 int need_clear_subexpr; // subexpressions still need to be cleared
1168#ifdef FEAT_SYN_HL
1169 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1170 // cleared
1171#endif
1172
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001173 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1174 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1175 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001176 int reg_ic;
1177
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001178 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1179 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001180 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001181
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001182 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1183 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001184 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001185
1186 // State for the NFA engine regexec.
1187 int nfa_has_zend; // NFA regexp \ze operator encountered.
1188 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1189 int nfa_nsubexpr; // Number of sub expressions actually being used
1190 // during execution. 1 if only the whole match
1191 // (subexpr 0) is used.
1192 // listid is global, so that it increases on recursive calls to
1193 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1194 // all the states.
1195 int nfa_listid;
1196 int nfa_alt_listid;
1197
1198#ifdef FEAT_SYN_HL
1199 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1200#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001201} regexec_T;
1202
1203static regexec_T rex;
1204static int rex_in_use = FALSE;
1205
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001207 * Return TRUE if character 'c' is included in 'iskeyword' option for
1208 * "reg_buf" buffer.
1209 */
1210 static int
1211reg_iswordc(int c)
1212{
1213 return vim_iswordc_buf(c, rex.reg_buf);
1214}
1215
John Marriott82792db2024-05-12 00:07:17 +02001216#ifdef FEAT_EVAL
1217static int can_f_submatch = FALSE; // TRUE when submatch() can be used
1218
1219// This struct is used for reg_submatch(). Needed for when the
1220// substitution string is an expression that contains a call to substitute()
1221// and submatch().
1222typedef struct {
1223 regmatch_T *sm_match;
1224 regmmatch_T *sm_mmatch;
1225 linenr_T sm_firstlnum;
1226 linenr_T sm_maxline;
1227 int sm_line_lbr;
1228} regsubmatch_T;
1229
1230static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
1231#endif
1232
1233typedef enum
1234{
1235 RGLF_LINE = 0x01,
1236 RGLF_LENGTH = 0x02
1237#ifdef FEAT_EVAL
1238 ,
1239 RGLF_SUBMATCH = 0x04
1240#endif
1241} reg_getline_flags_T;
1242
1243//
1244// common code for reg_getline(), reg_getline_len(), reg_getline_submatch() and
1245// reg_getline_submatch_len().
1246// the flags argument (which is a bitmask) controls what info is to be returned and whether
1247// or not submatch is in effect.
1248// note:
1249// submatch is available only if FEAT_EVAL is defined.
1250 static void
1251reg_getline_common(linenr_T lnum, reg_getline_flags_T flags, char_u **line, colnr_T *length)
1252{
1253 int get_line = flags & RGLF_LINE;
1254 int get_length = flags & RGLF_LENGTH;
1255 linenr_T firstlnum;
1256 linenr_T maxline;
1257
1258#ifdef FEAT_EVAL
1259 if (flags & RGLF_SUBMATCH)
1260 {
1261 firstlnum = rsm.sm_firstlnum + lnum;
1262 maxline = rsm.sm_maxline;
1263 }
1264 else
1265#endif
1266 {
1267 firstlnum = rex.reg_firstlnum + lnum;
1268 maxline = rex.reg_maxline;
1269 }
1270
1271 // when looking behind for a match/no-match lnum is negative. but we
1272 // can't go before line 1.
1273 if (firstlnum < 1)
1274 {
1275 if (get_line)
1276 *line = NULL;
1277 if (get_length)
1278 *length = 0;
1279
1280 return;
1281 }
1282
1283 if (lnum > maxline)
1284 {
1285 // must have matched the "\n" in the last line.
1286 if (get_line)
1287 *line = (char_u *)"";
1288 if (get_length)
1289 *length = 0;
1290
1291 return;
1292 }
1293
1294 if (get_line)
1295 *line = ml_get_buf(rex.reg_buf, firstlnum, FALSE);
1296 if (get_length)
1297 *length = ml_get_buf_len(rex.reg_buf, firstlnum);
1298}
1299
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001300/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001301 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1302 */
1303 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001304reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001305{
John Marriott82792db2024-05-12 00:07:17 +02001306 char_u *line;
1307
1308 reg_getline_common(lnum, RGLF_LINE, &line, NULL);
1309
1310 return line;
1311}
1312
1313/*
1314 * Get length of line "lnum", which is relative to "reg_firstlnum".
1315 */
1316 static colnr_T
1317reg_getline_len(linenr_T lnum)
1318{
1319 colnr_T length;
1320
1321 reg_getline_common(lnum, RGLF_LENGTH, NULL, &length);
1322
1323 return length;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001324}
1325
Bram Moolenaar071d4272004-06-13 20:20:40 +00001326#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001327static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1328static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1329static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1330static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001331#endif
1332
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001333// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001334#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001335
Bram Moolenaar071d4272004-06-13 20:20:40 +00001336#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001337/*
1338 * Create a new extmatch and mark it as referenced once.
1339 */
1340 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001341make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001342{
1343 reg_extmatch_T *em;
1344
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001345 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001346 if (em != NULL)
1347 em->refcnt = 1;
1348 return em;
1349}
1350
1351/*
1352 * Add a reference to an extmatch.
1353 */
1354 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001355ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001356{
1357 if (em != NULL)
1358 em->refcnt++;
1359 return em;
1360}
1361
1362/*
1363 * Remove a reference to an extmatch. If there are no references left, free
1364 * the info.
1365 */
1366 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001367unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001368{
1369 int i;
1370
1371 if (em != NULL && --em->refcnt <= 0)
1372 {
1373 for (i = 0; i < NSUBEXP; ++i)
1374 vim_free(em->matches[i]);
1375 vim_free(em);
1376 }
1377}
1378#endif
1379
1380/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001381 * Get class of previous character.
1382 */
1383 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001384reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001385{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001386 if (rex.input > rex.line)
1387 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001388 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001389 return -1;
1390}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001391
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001392/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001393 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001394 */
1395 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001396reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001397{
1398 pos_T top, bot;
1399 linenr_T lnum;
1400 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001401 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001402 int mode;
1403 colnr_T start, end;
1404 colnr_T start2, end2;
1405 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001406 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001407
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001408 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001409 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001410 return FALSE;
1411
1412 if (VIsual_active)
1413 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001414 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001415 {
1416 top = VIsual;
1417 bot = wp->w_cursor;
1418 }
1419 else
1420 {
1421 top = wp->w_cursor;
1422 bot = VIsual;
1423 }
1424 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001425 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001426 }
1427 else
1428 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001429 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001430 {
1431 top = curbuf->b_visual.vi_start;
1432 bot = curbuf->b_visual.vi_end;
1433 }
1434 else
1435 {
1436 top = curbuf->b_visual.vi_end;
1437 bot = curbuf->b_visual.vi_start;
1438 }
zeertzjqe7102202024-02-13 20:32:04 +01001439 // a substitute command may have removed some lines
Christian Brabandt7c71db32024-01-22 20:12:34 +01001440 if (bot.lnum > curbuf->b_ml.ml_line_count)
1441 bot.lnum = curbuf->b_ml.ml_line_count;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001442 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001443 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001444 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001445 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001446 if (lnum < top.lnum || lnum > bot.lnum)
1447 return FALSE;
1448
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001449 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001450 if (mode == 'v')
1451 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001452 if ((lnum == top.lnum && col < top.col)
1453 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1454 return FALSE;
1455 }
1456 else if (mode == Ctrl_V)
1457 {
1458 getvvcol(wp, &top, &start, NULL, &end);
1459 getvvcol(wp, &bot, &start2, NULL, &end2);
1460 if (start2 < start)
1461 start = start2;
1462 if (end2 > end)
1463 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001464 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001465 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001466
1467 // getvvcol() flushes rex.line, need to get it again
1468 rex.line = reg_getline(rex.lnum);
1469 rex.input = rex.line + col;
1470
Bram Moolenaar7f9969c2022-07-25 18:13:54 +01001471 cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001472 if (cols < start || cols > end - (*p_sel == 'e'))
1473 return FALSE;
1474 }
1475 return TRUE;
1476}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001477
Bram Moolenaar071d4272004-06-13 20:20:40 +00001478/*
1479 * Check the regexp program for its magic number.
1480 * Return TRUE if it's wrong.
1481 */
1482 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001483prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001484{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001485 regprog_T *prog;
1486
Bram Moolenaar6100d022016-10-02 16:51:57 +02001487 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001488 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001489 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001490 return FALSE;
1491
1492 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001493 {
RestorerZ68ebcee2023-05-31 17:12:14 +01001494 iemsg(e_corrupted_regexp_program);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001495 return TRUE;
1496 }
1497 return FALSE;
1498}
1499
1500/*
1501 * Cleanup the subexpressions, if this wasn't done yet.
1502 * This construction is used to clear the subexpressions only when they are
1503 * used (to increase speed).
1504 */
1505 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001506cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001507{
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001508 if (!rex.need_clear_subexpr)
1509 return;
1510
1511 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001512 {
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001513 // Use 0xff to set lnum to -1
1514 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1515 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001516 }
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001517 else
1518 {
1519 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1520 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
1521 }
1522 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001523}
1524
1525#ifdef FEAT_SYN_HL
1526 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001527cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001528{
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001529 if (!rex.need_clear_zsubexpr)
1530 return;
1531
1532 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001533 {
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001534 // Use 0xff to set lnum to -1
1535 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1536 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001537 }
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001538 else
1539 {
1540 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1541 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1542 }
1543 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001544}
1545#endif
1546
1547/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001548 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001549 */
1550 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001551reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001552{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001553 rex.line = reg_getline(++rex.lnum);
1554 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001555 fast_breakcheck();
1556}
1557
1558/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001559 * Check whether a backreference matches.
1560 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001561 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1562 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001563 */
1564 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001565match_with_backref(
1566 linenr_T start_lnum,
1567 colnr_T start_col,
1568 linenr_T end_lnum,
1569 colnr_T end_col,
1570 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001571{
1572 linenr_T clnum = start_lnum;
1573 colnr_T ccol = start_col;
1574 int len;
1575 char_u *p;
1576
1577 if (bytelen != NULL)
1578 *bytelen = 0;
1579 for (;;)
1580 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001581 // Since getting one line may invalidate the other, need to make copy.
1582 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001583 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001584 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001585 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001586 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1587 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001588 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001589 vim_free(reg_tofree);
1590 reg_tofree = alloc(len);
1591 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001592 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001593 reg_tofreelen = len;
1594 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001595 STRCPY(reg_tofree, rex.line);
1596 rex.input = reg_tofree + (rex.input - rex.line);
1597 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001598 }
1599
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001600 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001601 p = reg_getline(clnum);
1602 if (clnum == end_lnum)
1603 len = end_col - ccol;
1604 else
John Marriott82792db2024-05-12 00:07:17 +02001605 len = (int)reg_getline_len(clnum) - ccol;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001606
Bram Moolenaar0270f382018-07-17 05:43:58 +02001607 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001608 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001609 if (bytelen != NULL)
1610 *bytelen += len;
1611 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001612 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001613 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001614 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001615
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001616 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001617 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001618 if (bytelen != NULL)
1619 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001620 ++clnum;
1621 ccol = 0;
1622 if (got_int)
1623 return RA_FAIL;
1624 }
1625
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001626 // found a match! Note that rex.line may now point to a copy of the line,
1627 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001628 return RA_MATCH;
1629}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001630
Bram Moolenaarfb031402014-09-09 17:18:49 +02001631/*
1632 * Used in a place where no * or \+ can follow.
1633 */
1634 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001635re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001636{
1637 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001638 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001639 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001640 rc_did_emsg = TRUE;
1641 return FAIL;
1642 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001643 return OK;
1644}
1645
Bram Moolenaar071d4272004-06-13 20:20:40 +00001646typedef struct
1647{
1648 int a, b, c;
1649} decomp_T;
1650
1651
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001652// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001653static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001654{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001655 {0x5e2,0,0}, // 0xfb20 alt ayin
1656 {0x5d0,0,0}, // 0xfb21 alt alef
1657 {0x5d3,0,0}, // 0xfb22 alt dalet
1658 {0x5d4,0,0}, // 0xfb23 alt he
1659 {0x5db,0,0}, // 0xfb24 alt kaf
1660 {0x5dc,0,0}, // 0xfb25 alt lamed
1661 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1662 {0x5e8,0,0}, // 0xfb27 alt resh
1663 {0x5ea,0,0}, // 0xfb28 alt tav
1664 {'+', 0, 0}, // 0xfb29 alt plus
1665 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1666 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1667 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1668 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1669 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1670 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1671 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1672 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1673 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1674 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1675 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1676 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1677 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1678 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1679 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1680 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1681 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1682 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1683 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1684 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1685 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1686 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1687 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1688 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1689 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1690 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1691 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1692 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1693 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1694 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1695 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1696 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1697 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1698 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1699 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1700 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1701 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1702 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001703};
1704
1705 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001706mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001707{
1708 decomp_T d;
1709
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001710 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001711 {
1712 d = decomp_table[c - 0xfb20];
1713 *c1 = d.a;
1714 *c2 = d.b;
1715 *c3 = d.c;
1716 }
1717 else
1718 {
1719 *c1 = c;
1720 *c2 = *c3 = 0;
1721 }
1722}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001723
1724/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001725 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001726 * Return 0 if strings match, non-zero otherwise.
Christian Brabandtc97f4d62024-04-10 16:18:15 +02001727 * Correct the length "*n" when composing characters are ignored.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001728 */
1729 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001730cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001731{
1732 int result;
1733
Bram Moolenaar6100d022016-10-02 16:51:57 +02001734 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001735 result = STRNCMP(s1, s2, *n);
1736 else
1737 result = MB_STRNICMP(s1, s2, *n);
1738
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001739 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001740 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001741 {
1742 char_u *str1, *str2;
1743 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001744 int junk;
1745
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001746 // we have to handle the strcmp ourselves, since it is necessary to
1747 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001748 str1 = s1;
1749 str2 = s2;
1750 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001751 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001752 {
1753 c1 = mb_ptr2char_adv(&str1);
1754 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001755
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001756 // Decompose the character if necessary, into 'base' characters.
1757 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001758 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001759 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001760 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001761 mb_decompose(c1, &c11, &junk, &junk);
1762 mb_decompose(c2, &c12, &junk, &junk);
1763 c1 = c11;
1764 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001765 if (c11 != c12
1766 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001767 break;
1768 }
1769 }
1770 result = c2 - c1;
1771 if (result == 0)
1772 *n = (int)(str2 - s2);
1773 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001774
1775 return result;
1776}
1777
1778/*
1779 * cstrchr: This function is used a lot for simple searches, keep it fast!
1780 */
1781 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001782cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001783{
1784 char_u *p;
1785 int cc;
1786
Bram Moolenaara12a1612019-01-24 16:39:02 +01001787 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001788 return vim_strchr(s, c);
1789
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001790 // tolower() and toupper() can be slow, comparing twice should be a lot
1791 // faster (esp. when using MS Visual C++!).
1792 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001793 if (enc_utf8 && c > 0x80)
1794 cc = utf_fold(c);
1795 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001796 if (MB_ISUPPER(c))
1797 cc = MB_TOLOWER(c);
1798 else if (MB_ISLOWER(c))
1799 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001800 else
1801 return vim_strchr(s, c);
1802
Bram Moolenaar071d4272004-06-13 20:20:40 +00001803 if (has_mbyte)
1804 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001805 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001806 {
1807 if (enc_utf8 && c > 0x80)
1808 {
Bram Moolenaarf5094052022-07-29 16:22:25 +01001809 int uc = utf_ptr2char(p);
1810
1811 // Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
1812 // not 0xff.
1813 if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001814 return p;
1815 }
1816 else if (*p == c || *p == cc)
1817 return p;
1818 }
1819 }
1820 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001821 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001822 for (p = s; *p != NUL; ++p)
1823 if (*p == c || *p == cc)
1824 return p;
1825
1826 return NULL;
1827}
1828
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001829////////////////////////////////////////////////////////////////
1830// regsub stuff //
1831////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001832
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001833typedef void (*fptr_T)(int *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001834
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001835static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001836
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001837 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001838do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001839{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001840 *d = MB_TOUPPER(c);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001841}
1842
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001843 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001844do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001845{
1846 *d = MB_TOLOWER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001847}
1848
1849/*
1850 * regtilde(): Replace tildes in the pattern by the old pattern.
1851 *
1852 * Short explanation of the tilde: It stands for the previous replacement
1853 * pattern. If that previous pattern also contains a ~ we should go back a
1854 * step further... But we insert the previous pattern into the current one
1855 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001856 * This still does not handle the case where "magic" changes. So require the
1857 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001858 *
1859 * The tildes are parsed once before the first call to vim_regsub().
1860 */
1861 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001862regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001863{
1864 char_u *newsub = source;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001865 char_u *p;
John Marriott82792db2024-05-12 00:07:17 +02001866 size_t newsublen = 0;
1867 char_u tilde[3] = {'~', NUL, NUL};
1868 size_t tildelen = 1;
1869 int error = FALSE;
1870
1871 if (!magic)
1872 {
1873 tilde[0] = '\\';
1874 tilde[1] = '~';
1875 tilde[2] = NUL;
1876 tildelen = 2;
1877 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001878
1879 for (p = newsub; *p; ++p)
1880 {
John Marriott82792db2024-05-12 00:07:17 +02001881 if (STRNCMP(p, tilde, tildelen) == 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001882 {
John Marriott82792db2024-05-12 00:07:17 +02001883 size_t prefixlen = p - newsub; // not including the tilde
1884 char_u *postfix = p + tildelen;
1885 size_t postfixlen;
1886 size_t tmpsublen;
1887
1888 if (newsublen == 0)
1889 newsublen = STRLEN(newsub);
1890 newsublen -= tildelen;
1891 postfixlen = newsublen - prefixlen;
1892 tmpsublen = prefixlen + reg_prev_sublen + postfixlen;
1893
1894 if (tmpsublen > 0 && reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001895 {
John Marriott82792db2024-05-12 00:07:17 +02001896 char_u *tmpsub;
1897
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001898 // Avoid making the text longer than MAXCOL, it will cause
1899 // trouble at some point.
John Marriott82792db2024-05-12 00:07:17 +02001900 if (tmpsublen > MAXCOL)
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001901 {
1902 emsg(_(e_resulting_text_too_long));
John Marriott82792db2024-05-12 00:07:17 +02001903 error = TRUE;
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001904 break;
1905 }
1906
John Marriott82792db2024-05-12 00:07:17 +02001907 tmpsub = alloc(tmpsublen + 1);
1908 if (tmpsub == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001909 {
John Marriott82792db2024-05-12 00:07:17 +02001910 emsg(_(e_out_of_memory));
1911 error = TRUE;
1912 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001913 }
John Marriott82792db2024-05-12 00:07:17 +02001914
1915 // copy prefix
1916 mch_memmove(tmpsub, newsub, prefixlen);
1917 // interpret tilde
1918 mch_memmove(tmpsub + prefixlen, reg_prev_sub, reg_prev_sublen);
1919 // copy postfix
1920 STRCPY(tmpsub + prefixlen + reg_prev_sublen, postfix);
1921
1922 if (newsub != source) // allocated newsub before
1923 vim_free(newsub);
1924 newsub = tmpsub;
1925 newsublen = tmpsublen;
1926 p = newsub + prefixlen + reg_prev_sublen;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001927 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001928 else
John Marriott82792db2024-05-12 00:07:17 +02001929 mch_memmove(p, postfix, postfixlen + 1); // remove the tilde (+1 for the NUL)
1930
Bram Moolenaar071d4272004-06-13 20:20:40 +00001931 --p;
1932 }
1933 else
1934 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001935 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001936 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001937 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001938 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001939 }
1940 }
1941
John Marriott82792db2024-05-12 00:07:17 +02001942 if (error)
1943 {
1944 if (newsub != source)
1945 vim_free(newsub);
1946 return source;
1947 }
1948
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001949 // Store a copy of newsub in reg_prev_sub. It is always allocated,
1950 // because recursive calls may make the returned string invalid.
John Marriott82792db2024-05-12 00:07:17 +02001951 // Only store it if there something to store.
1952 newsublen = p - newsub;
1953 if (newsublen == 0)
1954 VIM_CLEAR(reg_prev_sub);
1955 else
1956 {
1957 vim_free(reg_prev_sub);
1958 reg_prev_sub = vim_strnsave(newsub, newsublen);
1959 }
1960
1961 if (reg_prev_sub == NULL)
1962 reg_prev_sublen = 0;
1963 else
1964 reg_prev_sublen = newsublen;
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001965
Bram Moolenaar071d4272004-06-13 20:20:40 +00001966 return newsub;
1967}
1968
1969#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001970
1971/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001972 * Put the submatches in "argv[argskip]" which is a list passed into
1973 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001974 */
1975 static int
zeertzjq48db5da2022-09-16 12:10:03 +01001976fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001977{
1978 listitem_T *li;
1979 int i;
1980 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001981 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001982
zeertzjqabd58d82022-09-16 16:06:32 +01001983 if (!has_varargs(fp) && fp->uf_args.ga_len <= argskip)
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001984 // called function doesn't take a submatches argument
1985 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001986
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001987 // Relies on sl_list to be the first item in staticList10_T.
1988 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001989
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001990 // There are always 10 list items in staticList10_T.
1991 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001992 for (i = 0; i < 10; ++i)
1993 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001994 s = rsm.sm_match->startp[i];
1995 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001996 s = NULL;
1997 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02001998 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001999 li->li_tv.v_type = VAR_STRING;
2000 li->li_tv.vval.v_string = s;
2001 li = li->li_next;
2002 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01002003 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002004}
2005
2006 static void
2007clear_submatch_list(staticList10_T *sl)
2008{
2009 int i;
2010
2011 for (i = 0; i < 10; ++i)
2012 vim_free(sl->sl_items[i].li_tv.vval.v_string);
2013}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02002014#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002015
Bram Moolenaar071d4272004-06-13 20:20:40 +00002016/*
2017 * vim_regsub() - perform substitutions after a vim_regexec() or
2018 * vim_regexec_multi() match.
2019 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002020 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
dundargocc57b5bc2022-11-02 13:30:51 +00002021 * Otherwise nothing is copied, only compute the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002022 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002023 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
2024 *
2025 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
2026 * double them to keep them, and insert a backslash before a CR to avoid it
2027 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002028 *
2029 * Note: The matched text must not change between the call of
2030 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
2031 * references invalid!
2032 *
2033 * Returns the size of the replacement, including terminating NUL.
2034 */
2035 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002036vim_regsub(
2037 regmatch_T *rmp,
2038 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002039 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01002040 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002041 int destlen,
2042 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002043{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002044 int result;
2045 regexec_T rex_save;
2046 int rex_in_use_save = rex_in_use;
2047
2048 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002049 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002050 rex_save = rex;
2051 rex_in_use = TRUE;
2052
2053 rex.reg_match = rmp;
2054 rex.reg_mmatch = NULL;
2055 rex.reg_maxline = 0;
2056 rex.reg_buf = curbuf;
2057 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002058 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002059
2060 rex_in_use = rex_in_use_save;
2061 if (rex_in_use)
2062 rex = rex_save;
2063
2064 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002065}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002066
2067 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002068vim_regsub_multi(
2069 regmmatch_T *rmp,
2070 linenr_T lnum,
2071 char_u *source,
2072 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002073 int destlen,
2074 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002075{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002076 int result;
2077 regexec_T rex_save;
2078 int rex_in_use_save = rex_in_use;
2079
2080 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002081 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002082 rex_save = rex;
2083 rex_in_use = TRUE;
2084
2085 rex.reg_match = NULL;
2086 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002087 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02002088 rex.reg_firstlnum = lnum;
2089 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
2090 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002091 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002092
2093 rex_in_use = rex_in_use_save;
2094 if (rex_in_use)
2095 rex = rex_save;
2096
2097 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002098}
2099
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002100#if defined(FEAT_EVAL) || defined(PROTO)
2101// When nesting more than a couple levels it's probably a mistake.
2102# define MAX_REGSUB_NESTING 4
2103static char_u *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
2104
2105# if defined(EXITFREE) || defined(PROTO)
2106 void
2107free_resub_eval_result(void)
2108{
2109 int i;
2110
2111 for (i = 0; i < MAX_REGSUB_NESTING; ++i)
2112 VIM_CLEAR(eval_result[i]);
2113}
2114# endif
2115#endif
2116
Bram Moolenaar071d4272004-06-13 20:20:40 +00002117 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002118vim_regsub_both(
2119 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002120 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01002121 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002122 int destlen,
2123 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002124{
2125 char_u *src;
2126 char_u *dst;
2127 char_u *s;
2128 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002129 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002130 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002131 fptr_T func_all = (fptr_T)NULL;
2132 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002133 linenr_T clnum = 0; // init for GCC
2134 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00002135#ifdef FEAT_EVAL
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002136 static int nesting = 0;
2137 int nested;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002138#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002139 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002140
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002141 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002142 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002143 {
RestorerZ68ebcee2023-05-31 17:12:14 +01002144 iemsg(e_null_argument);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002145 return 0;
2146 }
2147 if (prog_magic_wrong())
2148 return 0;
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002149#ifdef FEAT_EVAL
2150 if (nesting == MAX_REGSUB_NESTING)
2151 {
2152 emsg(_(e_substitute_nesting_too_deep));
2153 return 0;
2154 }
2155 nested = nesting;
2156#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002157 src = source;
2158 dst = dest;
2159
2160 /*
2161 * When the substitute part starts with "\=" evaluate it as an expression.
2162 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002163 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002164 {
2165#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002166 // To make sure that the length doesn't change between checking the
2167 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01002168 // resulting string is saved from the call with
2169 // "flags & REGSUB_COPY" == 0 to the call with
2170 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002171 if (copy)
2172 {
John Marriott82792db2024-05-12 00:07:17 +02002173 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002174 {
John Marriott82792db2024-05-12 00:07:17 +02002175 int eval_len = (int)STRLEN(eval_result[nested]);
2176
2177 if (eval_len < destlen)
2178 {
2179 STRCPY(dest, eval_result[nested]);
2180 dst += eval_len;
2181 VIM_CLEAR(eval_result[nested]);
2182 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002183 }
2184 }
2185 else
2186 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002187 int prev_can_f_submatch = can_f_submatch;
2188 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002189
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002190 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002191
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002192 // The expression may contain substitute(), which calls us
2193 // recursively. Make sure submatch() gets the text from the first
2194 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002195 if (can_f_submatch)
2196 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002197 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002198 rsm.sm_match = rex.reg_match;
2199 rsm.sm_mmatch = rex.reg_mmatch;
2200 rsm.sm_firstlnum = rex.reg_firstlnum;
2201 rsm.sm_maxline = rex.reg_maxline;
2202 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002203
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002204 // Although unlikely, it is possible that the expression invokes a
2205 // substitute command (it might fail, but still). Therefore keep
Bram Moolenaarabd56da2022-06-23 20:46:27 +01002206 // an array of eval results.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002207 ++nesting;
2208
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002209 if (expr != NULL)
2210 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002211 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002212 char_u buf[NUMBUFLEN];
2213 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002214 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002215 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002216
2217 rettv.v_type = VAR_STRING;
2218 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002219 argv[0].v_type = VAR_LIST;
2220 argv[0].vval.v_list = &matchList.sl_list;
2221 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002222 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002223 funcexe.fe_argv_func = fill_submatch_list;
2224 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002225 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002226 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002227 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002228 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002229 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002230 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002231 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002232 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002233
Bram Moolenaar6100d022016-10-02 16:51:57 +02002234 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002235 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002236 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002237 }
LemonBoyf3b48952022-05-05 13:53:03 +01002238 else if (expr->v_type == VAR_INSTR)
2239 {
2240 exe_typval_instr(expr, &rettv);
2241 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002242 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002243 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002244 clear_submatch_list(&matchList);
2245
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002246 if (rettv.v_type == VAR_UNKNOWN)
2247 // something failed, no need to report another error
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002248 eval_result[nested] = NULL;
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002249 else
2250 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002251 eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
2252 if (eval_result[nested] != NULL)
2253 eval_result[nested] = vim_strsave(eval_result[nested]);
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002254 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002255 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002256 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002257 else if (substitute_instr != NULL)
2258 // Execute instructions from ISN_SUBSTITUTE.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002259 eval_result[nested] = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002260 else
Bram Moolenaara4e0b972022-10-01 19:43:52 +01002261 eval_result[nested] = eval_to_string(source + 2, TRUE, FALSE);
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002262 --nesting;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002263
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002264 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002265 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002266 int had_backslash = FALSE;
2267
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002268 for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002269 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002270 // Change NL to CR, so that it becomes a line break,
2271 // unless called from vim_regexec_nl().
2272 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002273 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002274 *s = CAR;
2275 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002276 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002277 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002278 /* Change NL to CR here too, so that this works:
2279 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2280 * abc\
2281 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002282 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002283 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002284 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002285 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002286 had_backslash = TRUE;
2287 }
2288 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002289 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002290 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002291 // Backslashes will be consumed, need to double them.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002292 s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
Bram Moolenaar06975a42010-03-23 16:27:22 +01002293 if (s != NULL)
2294 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002295 vim_free(eval_result[nested]);
2296 eval_result[nested] = s;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002297 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002298 }
2299
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002300 dst += STRLEN(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002301 }
2302
Bram Moolenaar6100d022016-10-02 16:51:57 +02002303 can_f_submatch = prev_can_f_submatch;
2304 if (can_f_submatch)
2305 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002306 }
2307#endif
2308 }
2309 else
2310 while ((c = *src++) != NUL)
2311 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002312 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002313 no = 0;
2314 else if (c == '\\' && *src != NUL)
2315 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002316 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002317 {
2318 ++src;
2319 no = 0;
2320 }
2321 else if ('0' <= *src && *src <= '9')
2322 {
2323 no = *src++ - '0';
2324 }
2325 else if (vim_strchr((char_u *)"uUlLeE", *src))
2326 {
2327 switch (*src++)
2328 {
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002329 case 'u': func_one = do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002330 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002331 case 'U': func_all = do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002332 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002333 case 'l': func_one = do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002334 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002335 case 'L': func_all = do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002336 continue;
2337 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002338 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002339 continue;
2340 }
2341 }
2342 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002343 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002344 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002345 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2346 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002347 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002348 if (copy)
2349 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002350 if (dst + 3 > dest + destlen)
2351 {
2352 iemsg("vim_regsub_both(): not enough space");
2353 return 0;
2354 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002355 *dst++ = c;
2356 *dst++ = *src++;
2357 *dst++ = *src++;
2358 }
2359 else
2360 {
2361 dst += 3;
2362 src += 2;
2363 }
2364 continue;
2365 }
2366
Bram Moolenaar071d4272004-06-13 20:20:40 +00002367 if (c == '\\' && *src != NUL)
2368 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002369 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002370 switch (*src)
2371 {
2372 case 'r': c = CAR; ++src; break;
2373 case 'n': c = NL; ++src; break;
2374 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002375 // Oh no! \e already has meaning in subst pat :-(
2376 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002377 case 'b': c = Ctrl_H; ++src; break;
2378
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002379 // If "backslash" is TRUE the backslash will be removed
2380 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002381 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002382 {
2383 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002384 {
2385 if (dst + 1 > dest + destlen)
2386 {
2387 iemsg("vim_regsub_both(): not enough space");
2388 return 0;
2389 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002390 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002391 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002392 ++dst;
2393 }
2394 c = *src++;
2395 }
2396 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002397 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002398 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002399
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002400 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002401 if (func_one != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002402 {
2403 func_one(&cc, c);
2404 func_one = NULL;
2405 }
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002406 else if (func_all != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002407 func_all(&cc, c);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002408 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002409 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002410
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002411 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002412 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002413 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002414 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002415
Bram Moolenaar071d4272004-06-13 20:20:40 +00002416 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002417 {
2418 if (dst + charlen > dest + destlen)
2419 {
2420 iemsg("vim_regsub_both(): not enough space");
2421 return 0;
2422 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002423 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002424 }
2425 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002426 if (enc_utf8)
2427 {
2428 int clen = utf_ptr2len(src - 1);
2429
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002430 // If the character length is shorter than "totlen", there
2431 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002432 if (clen < totlen)
2433 {
2434 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002435 {
2436 if (dst + totlen - clen > dest + destlen)
2437 {
2438 iemsg("vim_regsub_both(): not enough space");
2439 return 0;
2440 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002441 mch_memmove(dst + 1, src - 1 + clen,
2442 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002443 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002444 dst += totlen - clen;
2445 }
2446 }
2447 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002448 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002449 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002450 {
2451 if (dst + 1 > dest + destlen)
2452 {
2453 iemsg("vim_regsub_both(): not enough space");
2454 return 0;
2455 }
2456 *dst = cc;
2457 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002458 dst++;
2459 }
2460 else
2461 {
2462 if (REG_MULTI)
2463 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002464 clnum = rex.reg_mmatch->startpos[no].lnum;
2465 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002466 s = NULL;
2467 else
2468 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002469 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2470 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2471 len = rex.reg_mmatch->endpos[no].col
2472 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002473 else
John Marriott82792db2024-05-12 00:07:17 +02002474 len = (int)reg_getline_len(clnum) - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002475 }
2476 }
2477 else
2478 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002479 s = rex.reg_match->startp[no];
2480 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002481 s = NULL;
2482 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002483 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002484 }
2485 if (s != NULL)
2486 {
2487 for (;;)
2488 {
2489 if (len == 0)
2490 {
2491 if (REG_MULTI)
2492 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002493 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002494 break;
2495 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002496 {
2497 if (dst + 1 > dest + destlen)
2498 {
2499 iemsg("vim_regsub_both(): not enough space");
2500 return 0;
2501 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002502 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002503 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002504 ++dst;
2505 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002506 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2507 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002508 else
John Marriott82792db2024-05-12 00:07:17 +02002509 len = (int)reg_getline_len(clnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002510 }
2511 else
2512 break;
2513 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002514 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002515 {
2516 if (copy)
RestorerZ68ebcee2023-05-31 17:12:14 +01002517 iemsg(e_damaged_match_string);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002518 goto exit;
2519 }
2520 else
2521 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002522 if ((flags & REGSUB_BACKSLASH)
2523 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002524 {
2525 /*
2526 * Insert a backslash in front of a CR, otherwise
2527 * it will be replaced by a line break.
2528 * Number of backslashes will be halved later,
2529 * double them here.
2530 */
2531 if (copy)
2532 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002533 if (dst + 2 > dest + destlen)
2534 {
2535 iemsg("vim_regsub_both(): not enough space");
2536 return 0;
2537 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002538 dst[0] = '\\';
2539 dst[1] = *s;
2540 }
2541 dst += 2;
2542 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002543 else
2544 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002545 if (has_mbyte)
2546 c = mb_ptr2char(s);
2547 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002548 c = *s;
2549
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002550 if (func_one != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002551 {
2552 func_one(&cc, c);
2553 func_one = NULL;
2554 }
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002555 else if (func_all != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002556 func_all(&cc, c);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002557 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002558 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002559
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002560 if (has_mbyte)
2561 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002562 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002563 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002564
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002565 // Copy composing characters separately, one
2566 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002567 if (enc_utf8)
2568 l = utf_ptr2len(s) - 1;
2569 else
2570 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002571
2572 s += l;
2573 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002574 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002575 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002576 {
2577 if (dst + charlen > dest + destlen)
2578 {
2579 iemsg("vim_regsub_both(): not enough space");
2580 return 0;
2581 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002582 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002583 }
2584 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002585 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002586 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002587 {
2588 if (dst + 1 > dest + destlen)
2589 {
2590 iemsg("vim_regsub_both(): not enough space");
2591 return 0;
2592 }
2593 *dst = cc;
2594 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002595 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002596 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002597
Bram Moolenaar071d4272004-06-13 20:20:40 +00002598 ++s;
2599 --len;
2600 }
2601 }
2602 }
2603 no = -1;
2604 }
2605 }
2606 if (copy)
2607 *dst = NUL;
2608
2609exit:
2610 return (int)((dst - dest) + 1);
2611}
2612
2613#ifdef FEAT_EVAL
John Marriott82792db2024-05-12 00:07:17 +02002614
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002615 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002616reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002617{
John Marriott82792db2024-05-12 00:07:17 +02002618 char_u *line;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002619
John Marriott82792db2024-05-12 00:07:17 +02002620 reg_getline_common(lnum, RGLF_LINE | RGLF_SUBMATCH, &line, NULL);
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002621
John Marriott82792db2024-05-12 00:07:17 +02002622 return line;
2623}
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002624
John Marriott82792db2024-05-12 00:07:17 +02002625 static colnr_T
2626reg_getline_submatch_len(linenr_T lnum)
2627{
2628 colnr_T length;
2629
2630 reg_getline_common(lnum, RGLF_LENGTH | RGLF_SUBMATCH, NULL, &length);
2631
2632 return length;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002633}
2634
2635/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002636 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002637 * allocated memory.
2638 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2639 */
2640 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002641reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002642{
2643 char_u *retval = NULL;
2644 char_u *s;
2645 int len;
2646 int round;
2647 linenr_T lnum;
2648
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002649 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002650 return NULL;
2651
Bram Moolenaar6100d022016-10-02 16:51:57 +02002652 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002653 {
2654 /*
2655 * First round: compute the length and allocate memory.
2656 * Second round: copy the text.
2657 */
2658 for (round = 1; round <= 2; ++round)
2659 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002660 lnum = rsm.sm_mmatch->startpos[no].lnum;
2661 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002662 return NULL;
2663
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002664 s = reg_getline_submatch(lnum);
2665 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002666 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002667 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002668 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002669 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002670 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002671 len = rsm.sm_mmatch->endpos[no].col
2672 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002673 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002674 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002675 ++len;
2676 }
2677 else
2678 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002679 // Multiple lines: take start line from start col, middle
2680 // lines completely and end line up to end col.
John Marriott82792db2024-05-12 00:07:17 +02002681 len = (int)reg_getline_submatch_len(lnum) - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002682 if (round == 2)
2683 {
2684 STRCPY(retval, s);
2685 retval[len] = '\n';
2686 }
2687 ++len;
2688 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002689 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002690 {
John Marriott82792db2024-05-12 00:07:17 +02002691 s = reg_getline_submatch(lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002692 if (round == 2)
2693 STRCPY(retval + len, s);
John Marriott82792db2024-05-12 00:07:17 +02002694 len += (int)reg_getline_submatch_len(lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002695 if (round == 2)
2696 retval[len] = '\n';
2697 ++len;
John Marriott82792db2024-05-12 00:07:17 +02002698 ++lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002699 }
2700 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002701 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002702 rsm.sm_mmatch->endpos[no].col);
2703 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002704 if (round == 2)
2705 retval[len] = NUL;
2706 ++len;
2707 }
2708
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002709 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002710 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002711 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002712 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002713 return NULL;
2714 }
2715 }
2716 }
2717 else
2718 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002719 s = rsm.sm_match->startp[no];
2720 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002721 retval = NULL;
2722 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002723 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002724 }
2725
2726 return retval;
2727}
Bram Moolenaar41571762014-04-02 19:00:58 +02002728
2729/*
2730 * Used for the submatch() function with the optional non-zero argument: get
2731 * the list of strings from the n'th submatch in allocated memory with NULs
2732 * represented in NLs.
2733 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2734 * command, for a non-existing submatch and for any error.
2735 */
2736 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002737reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002738{
2739 char_u *s;
2740 linenr_T slnum;
2741 linenr_T elnum;
2742 colnr_T scol;
2743 colnr_T ecol;
2744 int i;
2745 list_T *list;
2746 int error = FALSE;
2747
2748 if (!can_f_submatch || no < 0)
2749 return NULL;
2750
Bram Moolenaar6100d022016-10-02 16:51:57 +02002751 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002752 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002753 slnum = rsm.sm_mmatch->startpos[no].lnum;
2754 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002755 if (slnum < 0 || elnum < 0)
2756 return NULL;
2757
Bram Moolenaar6100d022016-10-02 16:51:57 +02002758 scol = rsm.sm_mmatch->startpos[no].col;
2759 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002760
2761 list = list_alloc();
2762 if (list == NULL)
2763 return NULL;
2764
2765 s = reg_getline_submatch(slnum) + scol;
2766 if (slnum == elnum)
2767 {
2768 if (list_append_string(list, s, ecol - scol) == FAIL)
2769 error = TRUE;
2770 }
2771 else
2772 {
John Marriott82792db2024-05-12 00:07:17 +02002773 int max_lnum = elnum - slnum;
2774
Bram Moolenaar41571762014-04-02 19:00:58 +02002775 if (list_append_string(list, s, -1) == FAIL)
2776 error = TRUE;
John Marriott82792db2024-05-12 00:07:17 +02002777 for (i = 1; i < max_lnum; i++)
Bram Moolenaar41571762014-04-02 19:00:58 +02002778 {
2779 s = reg_getline_submatch(slnum + i);
2780 if (list_append_string(list, s, -1) == FAIL)
2781 error = TRUE;
2782 }
2783 s = reg_getline_submatch(elnum);
2784 if (list_append_string(list, s, ecol) == FAIL)
2785 error = TRUE;
2786 }
2787 }
2788 else
2789 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002790 s = rsm.sm_match->startp[no];
2791 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002792 return NULL;
2793 list = list_alloc();
2794 if (list == NULL)
2795 return NULL;
2796 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002797 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002798 error = TRUE;
2799 }
2800
2801 if (error)
2802 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002803 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002804 return NULL;
2805 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002806 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002807 return list;
2808}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002809#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002810
Bram Moolenaarf4140482020-02-15 23:06:45 +01002811/*
2812 * Initialize the values used for matching against multiple lines
2813 */
2814 static void
2815init_regexec_multi(
2816 regmmatch_T *rmp,
2817 win_T *win, // window in which to search or NULL
2818 buf_T *buf, // buffer in which to search
2819 linenr_T lnum) // nr of line to start looking for match
2820{
2821 rex.reg_match = NULL;
2822 rex.reg_mmatch = rmp;
2823 rex.reg_buf = buf;
2824 rex.reg_win = win;
2825 rex.reg_firstlnum = lnum;
2826 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2827 rex.reg_line_lbr = FALSE;
2828 rex.reg_ic = rmp->rmm_ic;
2829 rex.reg_icombine = FALSE;
2830 rex.reg_maxcol = rmp->rmm_maxcol;
2831}
2832
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002833#include "regexp_bt.c"
2834
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002835static regengine_T bt_regengine =
2836{
2837 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002838 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002839 bt_regexec_nl,
Christian Brabandtd2cc51f2024-01-04 22:54:08 +01002840 bt_regexec_multi
2841#ifdef DEBUG
2842 ,(char_u *)""
2843#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002844};
2845
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002846#include "regexp_nfa.c"
2847
2848static regengine_T nfa_regengine =
2849{
2850 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002851 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002852 nfa_regexec_nl,
Christian Brabandtd2cc51f2024-01-04 22:54:08 +01002853 nfa_regexec_multi
2854#ifdef DEBUG
2855 ,(char_u *)""
2856#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002857};
2858
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002859// Which regexp engine to use? Needed for vim_regcomp().
2860// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002861static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002862
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002863#ifdef DEBUG
2864static char_u regname[][30] = {
2865 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002866 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002867 "NFA Regexp Engine"
2868 };
2869#endif
2870
2871/*
2872 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002873 * Returns the program in allocated memory.
2874 * Use vim_regfree() to free the memory.
2875 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002876 */
2877 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002878vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002879{
2880 regprog_T *prog = NULL;
2881 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002882 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002883
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002884 regexp_engine = p_re;
2885
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002886 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002887 if (STRNCMP(expr, "\\%#=", 4) == 0)
2888 {
2889 int newengine = expr[4] - '0';
2890
2891 if (newengine == AUTOMATIC_ENGINE
2892 || newengine == BACKTRACKING_ENGINE
2893 || newengine == NFA_ENGINE)
2894 {
2895 regexp_engine = expr[4] - '0';
2896 expr += 5;
2897#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002898 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002899 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002900#endif
2901 }
2902 else
2903 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002904 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002905 regexp_engine = AUTOMATIC_ENGINE;
2906 }
2907 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002908#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002909 bt_regengine.expr = expr;
2910 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002911#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002912 // reg_iswordc() uses rex.reg_buf
2913 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002914
2915 /*
2916 * First try the NFA engine, unless backtracking was requested.
2917 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002918 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002919 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002920 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002921 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002922 else
2923 prog = bt_regengine.regcomp(expr, re_flags);
2924
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002925 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002926 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002927 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002928#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002929 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002930 {
2931 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002932 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002933 if (f)
2934 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002935 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002936 fclose(f);
2937 }
2938 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002939 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002940 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002941 }
2942#endif
2943 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002944 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002945 * The NFA engine also fails for patterns that it can't handle well
2946 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002947 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002948 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002949 if (regexp_engine == AUTOMATIC_ENGINE
2950 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002951 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002952 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002953#ifdef FEAT_EVAL
2954 report_re_switch(expr);
2955#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002956 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002957 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002958 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002959
Bram Moolenaarfda37292014-11-05 14:27:36 +01002960 if (prog != NULL)
2961 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002962 // Store the info needed to call regcomp() again when the engine turns
2963 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002964 prog->re_engine = regexp_engine;
2965 prog->re_flags = re_flags;
2966 }
2967
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002968 return prog;
2969}
2970
2971/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002972 * Free a compiled regexp program, returned by vim_regcomp().
2973 */
2974 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002975vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002976{
2977 if (prog != NULL)
2978 prog->engine->regfree(prog);
2979}
2980
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002981#if defined(EXITFREE) || defined(PROTO)
2982 void
2983free_regexp_stuff(void)
2984{
2985 ga_clear(&regstack);
2986 ga_clear(&backpos);
2987 vim_free(reg_tofree);
2988 vim_free(reg_prev_sub);
2989}
2990#endif
2991
Bram Moolenaarfda37292014-11-05 14:27:36 +01002992#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002993 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002994report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002995{
2996 if (p_verbose > 0)
2997 {
2998 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002999 msg_puts(_("Switching to backtracking RE engine for pattern: "));
3000 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01003001 verbose_leave();
3002 }
3003}
3004#endif
3005
Bram Moolenaar651fca82021-11-29 20:39:38 +00003006#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02003007/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01003008 * Return whether "prog" is currently being executed.
3009 */
3010 int
3011regprog_in_use(regprog_T *prog)
3012{
3013 return prog->re_in_use;
3014}
Bram Moolenaar113e1072019-01-20 15:30:40 +01003015#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01003016
3017/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003018 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01003019 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003020 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003021 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003022 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003023 *
3024 * Return TRUE if there is a match, FALSE if not.
3025 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01003026 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003027vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01003028 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003029 char_u *line, // string to match against
3030 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01003031 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01003032{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003033 int result;
3034 regexec_T rex_save;
3035 int rex_in_use_save = rex_in_use;
3036
Bram Moolenaar0270f382018-07-17 05:43:58 +02003037 // Cannot use the same prog recursively, it contains state.
3038 if (rmp->regprog->re_in_use)
3039 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00003040 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02003041 return FALSE;
3042 }
3043 rmp->regprog->re_in_use = TRUE;
3044
Bram Moolenaar6100d022016-10-02 16:51:57 +02003045 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003046 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02003047 rex_save = rex;
3048 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003049
Bram Moolenaar6100d022016-10-02 16:51:57 +02003050 rex.reg_startp = NULL;
3051 rex.reg_endp = NULL;
3052 rex.reg_startpos = NULL;
3053 rex.reg_endpos = NULL;
3054
3055 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02003056 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003057
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003058 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003059 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
3060 && result == NFA_TOO_EXPENSIVE)
3061 {
3062 int save_p_re = p_re;
3063 int re_flags = rmp->regprog->re_flags;
3064 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
3065
3066 p_re = BACKTRACKING_ENGINE;
3067 vim_regfree(rmp->regprog);
3068 if (pat != NULL)
3069 {
3070#ifdef FEAT_EVAL
3071 report_re_switch(pat);
3072#endif
3073 rmp->regprog = vim_regcomp(pat, re_flags);
3074 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003075 {
3076 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003077 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02003078 rmp->regprog->re_in_use = FALSE;
3079 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003080 vim_free(pat);
3081 }
3082
3083 p_re = save_p_re;
3084 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02003085
3086 rex_in_use = rex_in_use_save;
3087 if (rex_in_use)
3088 rex = rex_save;
3089
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003090 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003091}
3092
Dominique Pellee764d1b2023-03-12 21:20:59 +00003093#if defined(FEAT_SPELL) || defined(FEAT_EVAL) || defined(FEAT_X11) || defined(PROTO)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003094/*
3095 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003096 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003097 */
3098 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003099vim_regexec_prog(
3100 regprog_T **prog,
3101 int ignore_case,
3102 char_u *line,
3103 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003104{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003105 int r;
3106 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003107
3108 regmatch.regprog = *prog;
3109 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003110 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003111 *prog = regmatch.regprog;
3112 return r;
3113}
Dominique Pellee764d1b2023-03-12 21:20:59 +00003114#endif
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003115
3116/*
3117 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003118 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003119 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003120 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003121vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003122{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003123 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003124}
3125
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003126/*
3127 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003128 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003129 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003130 */
3131 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003132vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003133{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003134 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003135}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003136
3137/*
3138 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003139 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
3140 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003141 * Uses curbuf for line count and 'iskeyword'.
3142 *
3143 * Return zero if there is no match. Return number of lines contained in the
3144 * match otherwise.
3145 */
3146 long
Bram Moolenaar05540972016-01-30 20:31:25 +01003147vim_regexec_multi(
3148 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003149 win_T *win, // window in which to search or NULL
3150 buf_T *buf, // buffer in which to search
3151 linenr_T lnum, // nr of line to start looking for match
3152 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003153 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003154{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003155 int result;
3156 regexec_T rex_save;
3157 int rex_in_use_save = rex_in_use;
3158
Bram Moolenaar0270f382018-07-17 05:43:58 +02003159 // Cannot use the same prog recursively, it contains state.
3160 if (rmp->regprog->re_in_use)
3161 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00003162 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02003163 return FALSE;
3164 }
3165 rmp->regprog->re_in_use = TRUE;
3166
Bram Moolenaar6100d022016-10-02 16:51:57 +02003167 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003168 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02003169 rex_save = rex;
3170 rex_in_use = TRUE;
3171
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003172 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003173 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003174 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003175
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003176 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003177 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
3178 && result == NFA_TOO_EXPENSIVE)
3179 {
3180 int save_p_re = p_re;
3181 int re_flags = rmp->regprog->re_flags;
3182 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
3183
3184 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003185 if (pat != NULL)
3186 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003187 regprog_T *prev_prog = rmp->regprog;
3188
Bram Moolenaarfda37292014-11-05 14:27:36 +01003189#ifdef FEAT_EVAL
3190 report_re_switch(pat);
3191#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003192#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003193 // checking for \z misuse was already done when compiling for NFA,
3194 // allow all here
3195 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003196#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01003197 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003198#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003199 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003200#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003201 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003202 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003203 // Somehow compiling the pattern failed now, put back the
3204 // previous one to avoid "regprog" becoming NULL.
3205 rmp->regprog = prev_prog;
3206 }
3207 else
3208 {
3209 vim_regfree(prev_prog);
3210
Bram Moolenaar41499802018-07-18 06:02:09 +02003211 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003212 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003213 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003214 rmp->regprog->re_in_use = FALSE;
3215 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003216 vim_free(pat);
3217 }
3218 p_re = save_p_re;
3219 }
3220
Bram Moolenaar6100d022016-10-02 16:51:57 +02003221 rex_in_use = rex_in_use_save;
3222 if (rex_in_use)
3223 rex = rex_save;
3224
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003225 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003226}