blob: d43193219126299b5e822410540b5fd5560d7e7e [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
Bram Moolenaar155f2d12022-06-20 13:38:33 +010024static sig_atomic_t dummy_timeout_flag = 0;
25static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010026#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
Bram Moolenaar0f618382022-08-26 21:33:04 +010054static int timeout_nesting = 0;
55
56/*
57 * Start a timer that will cause the regexp to abort after "msec".
58 * This doesn't work well recursively. In case it happens anyway, the first
59 * set timeout will prevail, nested ones are ignored.
60 * The caller must make sure there is a matching disable_regexp_timeout() call!
61 */
Paul Ollis65745772022-06-05 16:55:54 +010062 void
63init_regexp_timeout(long msec)
64{
Bram Moolenaar0f618382022-08-26 21:33:04 +010065 if (timeout_nesting == 0)
66 timeout_flag = start_timeout(msec);
67 ++timeout_nesting;
Paul Ollis65745772022-06-05 16:55:54 +010068}
69
70 void
71disable_regexp_timeout(void)
72{
Bram Moolenaar0f618382022-08-26 21:33:04 +010073 if (timeout_nesting == 0)
74 iemsg("disable_regexp_timeout() called without active timer");
75 else if (--timeout_nesting == 0)
76 {
77 stop_timeout();
78 timeout_flag = &dummy_timeout_flag;
79 }
Paul Ollis65745772022-06-05 16:55:54 +010080}
81#endif
82
Bram Moolenaar9781d9c2022-09-20 13:51:25 +010083#if defined(FEAT_EVAL) || defined(PROTO)
84# ifdef FEAT_RELTIME
85static sig_atomic_t *saved_timeout_flag;
86# endif
87
88/*
89 * Used at the debug prompt: disable the timeout so that expression evaluation
90 * can used patterns.
91 * Must be followed by calling restore_timeout_for_debugging().
92 */
93 void
94save_timeout_for_debugging(void)
95{
96# ifdef FEAT_RELTIME
97 saved_timeout_flag = (sig_atomic_t *)timeout_flag;
98 timeout_flag = &dummy_timeout_flag;
99# endif
100}
101
102 void
103restore_timeout_for_debugging(void)
104{
105# ifdef FEAT_RELTIME
106 timeout_flag = saved_timeout_flag;
107# endif
108}
109#endif
110
Bram Moolenaar071d4272004-06-13 20:20:40 +0000111/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200112 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 * number; the start node begins in the second byte. It's used to catch the
114 * most severe mutilation of the program by the caller.
115 */
116
117#define REGMAGIC 0234
118
119/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 * Utility definitions.
121 */
122#define UCHARAT(p) ((int)*(char_u *)(p))
123
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100124// Used for an error (down from) vim_regcomp(): give the error message, set
125// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100126#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
127#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
128#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
129#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +0100130#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100131#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +0000132#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000133
Bram Moolenaar95f09602016-11-10 20:01:45 +0100134
Bram Moolenaar071d4272004-06-13 20:20:40 +0000135#define MAX_LIMIT (32767L << 16L)
136
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137#define NOT_MULTI 0
138#define MULTI_ONE 1
139#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200140
141// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100142#define RA_FAIL 1 // something failed, abort
143#define RA_CONT 2 // continue in inner loop
144#define RA_BREAK 3 // break inner loop
145#define RA_MATCH 4 // successful match
146#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200147
Bram Moolenaar071d4272004-06-13 20:20:40 +0000148/*
149 * Return NOT_MULTI if c is not a "multi" operator.
150 * Return MULTI_ONE if c is a single "multi" operator.
151 * Return MULTI_MULT if c is a multi "multi" operator.
152 */
153 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100154re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000155{
156 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
157 return MULTI_ONE;
158 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
159 return MULTI_MULT;
160 return NOT_MULTI;
161}
162
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000163static char_u *reg_prev_sub = NULL;
John Marriott82792db2024-05-12 00:07:17 +0200164static size_t reg_prev_sublen = 0;
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000165
Bram Moolenaar071d4272004-06-13 20:20:40 +0000166/*
167 * REGEXP_INRANGE contains all characters which are always special in a []
168 * range after '\'.
169 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
170 * These are:
171 * \n - New line (NL).
172 * \r - Carriage Return (CR).
173 * \t - Tab (TAB).
174 * \e - Escape (ESC).
175 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000176 * \d - Character code in decimal, eg \d123
177 * \o - Character code in octal, eg \o80
178 * \x - Character code in hex, eg \x4a
179 * \u - Multibyte character code, eg \u20ac
180 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000181 */
182static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000183static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000184
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185/*
186 * Translate '\x' to its control character, except "\n", which is Magic.
187 */
188 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100189backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000190{
191 switch (c)
192 {
193 case 'r': return CAR;
194 case 't': return TAB;
195 case 'e': return ESC;
196 case 'b': return BS;
197 }
198 return c;
199}
200
John Marriott82792db2024-05-12 00:07:17 +0200201enum
202{
203 CLASS_ALNUM = 0,
204 CLASS_ALPHA,
205 CLASS_BLANK,
206 CLASS_CNTRL,
207 CLASS_DIGIT,
208 CLASS_GRAPH,
209 CLASS_LOWER,
210 CLASS_PRINT,
211 CLASS_PUNCT,
212 CLASS_SPACE,
213 CLASS_UPPER,
214 CLASS_XDIGIT,
215 CLASS_TAB,
216 CLASS_RETURN,
217 CLASS_BACKSPACE,
218 CLASS_ESCAPE,
219 CLASS_IDENT,
220 CLASS_KEYWORD,
221 CLASS_FNAME,
222 CLASS_NONE = 99
223};
224
Bram Moolenaar071d4272004-06-13 20:20:40 +0000225/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000226 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000227 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
228 * recognized. Otherwise "pp" is advanced to after the item.
229 */
230 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100231get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000232{
John Marriott82792db2024-05-12 00:07:17 +0200233 // must be sorted by the 'value' field because it is used by bsearch()!
234 static keyvalue_T char_class_tab[] =
Bram Moolenaar071d4272004-06-13 20:20:40 +0000235 {
John Marriott82792db2024-05-12 00:07:17 +0200236 KEYVALUE_ENTRY(CLASS_ALNUM, "alnum:]"),
237 KEYVALUE_ENTRY(CLASS_ALPHA, "alpha:]"),
238 KEYVALUE_ENTRY(CLASS_BACKSPACE, "backspace:]"),
239 KEYVALUE_ENTRY(CLASS_BLANK, "blank:]"),
240 KEYVALUE_ENTRY(CLASS_CNTRL, "cntrl:]"),
241 KEYVALUE_ENTRY(CLASS_DIGIT, "digit:]"),
242 KEYVALUE_ENTRY(CLASS_ESCAPE, "escape:]"),
243 KEYVALUE_ENTRY(CLASS_FNAME, "fname:]"),
244 KEYVALUE_ENTRY(CLASS_GRAPH, "graph:]"),
245 KEYVALUE_ENTRY(CLASS_IDENT, "ident:]"),
246 KEYVALUE_ENTRY(CLASS_KEYWORD, "keyword:]"),
247 KEYVALUE_ENTRY(CLASS_LOWER, "lower:]"),
248 KEYVALUE_ENTRY(CLASS_PRINT, "print:]"),
249 KEYVALUE_ENTRY(CLASS_PUNCT, "punct:]"),
250 KEYVALUE_ENTRY(CLASS_RETURN, "return:]"),
251 KEYVALUE_ENTRY(CLASS_SPACE, "space:]"),
252 KEYVALUE_ENTRY(CLASS_TAB, "tab:]"),
253 KEYVALUE_ENTRY(CLASS_UPPER, "upper:]"),
254 KEYVALUE_ENTRY(CLASS_XDIGIT, "xdigit:]")
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 };
Bram Moolenaar071d4272004-06-13 20:20:40 +0000256
John Marriott82792db2024-05-12 00:07:17 +0200257 // check that the value of "pp" has a chance of matching
258 if ((*pp)[1] == ':' && ASCII_ISLOWER((*pp)[2])
259 && ASCII_ISLOWER((*pp)[3]) && ASCII_ISLOWER((*pp)[4]))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260 {
John Marriott82792db2024-05-12 00:07:17 +0200261 keyvalue_T target;
262 keyvalue_T *entry;
263 // this function can be called repeatedly with the same value for "pp"
264 // so we cache the last found entry.
265 static keyvalue_T *last_entry = NULL;
266
267 target.key = 0;
268 target.value = (char *)*pp + 2;
269 target.length = 0; // not used, see cmp_keyvalue_value_n()
270
271 if (last_entry != NULL && cmp_keyvalue_value_n(&target, last_entry) == 0)
272 entry = last_entry;
273 else
274 entry = (keyvalue_T *)bsearch(&target, &char_class_tab,
275 ARRAY_LENGTH(char_class_tab),
276 sizeof(char_class_tab[0]), cmp_keyvalue_value_n);
277 if (entry != NULL)
278 {
279 last_entry = entry;
280 *pp += entry->length + 2;
281 return entry->key;
282 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283 }
284 return CLASS_NONE;
285}
286
287/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000288 * Specific version of character class functions.
289 * Using a table to keep this fast.
290 */
291static short class_tab[256];
292
293#define RI_DIGIT 0x01
294#define RI_HEX 0x02
295#define RI_OCTAL 0x04
296#define RI_WORD 0x08
297#define RI_HEAD 0x10
298#define RI_ALPHA 0x20
299#define RI_LOWER 0x40
300#define RI_UPPER 0x80
301#define RI_WHITE 0x100
302
303 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100304init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000305{
306 int i;
307 static int done = FALSE;
308
309 if (done)
310 return;
311
312 for (i = 0; i < 256; ++i)
313 {
314 if (i >= '0' && i <= '7')
315 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
316 else if (i >= '8' && i <= '9')
317 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
318 else if (i >= 'a' && i <= 'f')
319 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000321 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
322 else if (i >= 'A' && i <= 'F')
323 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000324 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
326 else if (i == '_')
327 class_tab[i] = RI_WORD + RI_HEAD;
328 else
329 class_tab[i] = 0;
330 }
331 class_tab[' '] |= RI_WHITE;
332 class_tab['\t'] |= RI_WHITE;
333 done = TRUE;
334}
335
kylo252ae6f1d82022-02-16 19:24:07 +0000336#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
337#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
338#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
339#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
340#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
341#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
342#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
343#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
344#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100346// flags for regflags
347#define RF_ICASE 1 // ignore case
348#define RF_NOICASE 2 // don't ignore case
349#define RF_HASNL 4 // can match a NL
350#define RF_ICOMBINE 8 // ignore combining characters
351#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000352
353/*
354 * Global work variables for vim_regcomp().
355 */
356
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100357static char_u *regparse; // Input-scan pointer.
358static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100359static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000360#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100361static int regnzpar; // \z() count.
362static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000363#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100364static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000365#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100366static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000367#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000368
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100369static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100371static int reg_string; // matching with a string instead of a buffer
372 // line
373static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000374
375/*
376 * META contains all characters that may be magic, except '^' and '$'.
377 */
378
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100379// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380static char_u META_flags[] = {
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100383// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100385// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000386 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100387// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000388 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100389// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000390 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100391// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000392 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100393// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000394 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
395};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000396
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100397static int curchr; // currently parsed character
398// Previous character. Note: prevchr is sometimes -1 when we are not at the
399// start, eg in /[ ^I]^ the pattern was never found even if it existed,
400// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200401static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100402static int prevprevchr; // previous-previous character
403static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100405// arguments for reg()
406#define REG_NOPAREN 0 // toplevel reg()
407#define REG_PAREN 1 // \(\)
408#define REG_ZPAREN 2 // \z(\)
409#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200411typedef struct
412{
413 char_u *regparse;
414 int prevchr_len;
415 int curchr;
416 int prevchr;
417 int prevprevchr;
418 int nextchr;
419 int at_start;
420 int prev_at_start;
421 int regnpar;
422} parse_state_T;
423
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100424static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100425static int getchr(void);
426static void skipchr_keepstart(void);
427static int peekchr(void);
428static void skipchr(void);
429static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100430static long gethexchrs(int maxinputlen);
431static long getoctchrs(void);
432static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100433static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100434static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200435static int cstrncmp(char_u *s1, char_u *s2, int *n);
436static char_u *cstrchr(char_u *, int);
437static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100438static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100439#ifdef FEAT_EVAL
440static void report_re_switch(char_u *pat);
441#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000442
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200443static regengine_T bt_regengine;
444static regengine_T nfa_regengine;
445
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446/*
447 * Return TRUE if compiled regular expression "prog" can match a line break.
448 */
449 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100450re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451{
452 return (prog->regflags & RF_HASNL);
453}
454
455/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000456 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
457 * Returns a character representing the class. Zero means that no item was
458 * recognized. Otherwise "pp" is advanced to after the item.
459 */
460 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100461get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000462{
463 int c;
464 int l = 1;
465 char_u *p = *pp;
466
Bram Moolenaar985079c2019-02-16 17:07:47 +0100467 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000468 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000470 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000471 if (p[l + 2] == '=' && p[l + 3] == ']')
472 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000473 if (has_mbyte)
474 c = mb_ptr2char(p + 2);
475 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000476 c = p[2];
477 *pp += l + 4;
478 return c;
479 }
480 }
481 return 0;
482}
483
484/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000485 * Check for a collating element "[.a.]". "pp" points to the '['.
486 * Returns a character. Zero means that no item was recognized. Otherwise
487 * "pp" is advanced to after the item.
488 * Currently only single characters are recognized!
489 */
490 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100491get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000492{
493 int c;
494 int l = 1;
495 char_u *p = *pp;
496
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100497 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000498 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000499 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000500 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000501 if (p[l + 2] == '.' && p[l + 3] == ']')
502 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000503 if (has_mbyte)
504 c = mb_ptr2char(p + 2);
505 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000506 c = p[2];
507 *pp += l + 4;
508 return c;
509 }
510 }
511 return 0;
512}
513
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100514static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
515static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200516
517 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100518get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200519{
520 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
521 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
522}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000523
524/*
525 * Skip over a "[]" range.
526 * "p" must point to the character after the '['.
527 * The returned pointer is on the matching ']', or the terminating NUL.
528 */
529 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100530skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000531{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000532 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000533
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100534 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000535 ++p;
536 if (*p == ']' || *p == '-')
537 ++p;
538 while (*p != NUL && *p != ']')
539 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000540 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000541 p += l;
542 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000543 if (*p == '-')
544 {
545 ++p;
546 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100547 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000548 }
549 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200550 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000551 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200552 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000553 p += 2;
554 else if (*p == '[')
555 {
556 if (get_char_class(&p) == CLASS_NONE
557 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200558 && get_coll_element(&p) == 0
559 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100560 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000561 }
562 else
563 ++p;
564 }
565
566 return p;
567}
568
569/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000570 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200571 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000572 * Take care of characters with a backslash in front of it.
573 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000574 */
575 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100576skip_regexp(
577 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200578 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200579 int magic)
580{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100581 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200582}
583
584/*
585 * Call skip_regexp() and when the delimiter does not match give an error and
586 * return NULL.
587 */
588 char_u *
589skip_regexp_err(
590 char_u *startp,
591 int delim,
592 int magic)
593{
594 char_u *p = skip_regexp(startp, delim, magic);
595
596 if (*p != delim)
597 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000598 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200599 return NULL;
600 }
601 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200602}
603
604/*
605 * skip_regexp() with extra arguments:
606 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
607 * expression and change "\?" to "?". If "*newp" is not NULL the expression
608 * is changed in-place.
609 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100610 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200611 */
612 char_u *
613skip_regexp_ex(
614 char_u *startp,
615 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100616 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200617 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100618 int *dropped,
619 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000620{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100621 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000622 char_u *p = startp;
623
624 if (magic)
625 mymagic = MAGIC_ON;
626 else
627 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200628 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000629
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100630 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000631 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100632 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000633 break;
634 if ((p[0] == '[' && mymagic >= MAGIC_ON)
635 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
636 {
637 p = skip_anyof(p + 1);
638 if (p[0] == NUL)
639 break;
640 }
641 else if (p[0] == '\\' && p[1] != NUL)
642 {
643 if (dirc == '?' && newp != NULL && p[1] == '?')
644 {
John Marriottd01e6992024-05-12 09:01:38 +0200645 size_t startplen = 0;
John Marriott82792db2024-05-12 00:07:17 +0200646
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100647 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000648 if (*newp == NULL)
649 {
John Marriott82792db2024-05-12 00:07:17 +0200650 startplen = STRLEN(startp);
651 *newp = vim_strnsave(startp, startplen);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000652 if (*newp != NULL)
653 p = *newp + (p - startp);
654 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200655 if (dropped != NULL)
656 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000657 if (*newp != NULL)
John Marriott82792db2024-05-12 00:07:17 +0200658 mch_memmove(p, p + 1, (startplen - ((p + 1) - *newp)) + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000659 else
660 ++p;
661 }
662 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100663 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664 if (*p == 'v')
665 mymagic = MAGIC_ALL;
666 else if (*p == 'V')
667 mymagic = MAGIC_NONE;
668 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000669 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100670 if (magic_val != NULL)
671 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000672 return p;
673}
674
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200675/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200676 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200677 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100678static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200679static int at_start; // True when on the first character
680static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100681
Bram Moolenaar071d4272004-06-13 20:20:40 +0000682/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200683 * Start parsing at "str".
684 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000685 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100686initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687{
688 regparse = str;
689 prevchr_len = 0;
690 curchr = prevprevchr = prevchr = nextchr = -1;
691 at_start = TRUE;
692 prev_at_start = FALSE;
693}
694
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200695/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200696 * Save the current parse state, so that it can be restored and parsing
697 * starts in the same state again.
698 */
699 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100700save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200701{
702 ps->regparse = regparse;
703 ps->prevchr_len = prevchr_len;
704 ps->curchr = curchr;
705 ps->prevchr = prevchr;
706 ps->prevprevchr = prevprevchr;
707 ps->nextchr = nextchr;
708 ps->at_start = at_start;
709 ps->prev_at_start = prev_at_start;
710 ps->regnpar = regnpar;
711}
712
713/*
714 * Restore a previously saved parse state.
715 */
716 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100717restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200718{
719 regparse = ps->regparse;
720 prevchr_len = ps->prevchr_len;
721 curchr = ps->curchr;
722 prevchr = ps->prevchr;
723 prevprevchr = ps->prevprevchr;
724 nextchr = ps->nextchr;
725 at_start = ps->at_start;
726 prev_at_start = ps->prev_at_start;
727 regnpar = ps->regnpar;
728}
729
730
731/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200732 * Get the next character without advancing.
733 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100735peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000736{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000737 static int after_slash = FALSE;
738
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000739 if (curchr != -1)
740 return curchr;
741
742 switch (curchr = regparse[0])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000743 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000744 case '.':
745 case '[':
746 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100747 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000748 if (reg_magic >= MAGIC_ON)
749 curchr = Magic(curchr);
750 break;
751 case '(':
752 case ')':
753 case '{':
754 case '%':
755 case '+':
756 case '=':
757 case '?':
758 case '@':
759 case '!':
760 case '&':
761 case '|':
762 case '<':
763 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100764 case '#': // future ext.
765 case '"': // future ext.
766 case '\'': // future ext.
767 case ',': // future ext.
768 case '-': // future ext.
769 case ':': // future ext.
770 case ';': // future ext.
771 case '`': // future ext.
772 case '/': // Can't be used in / command
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000773 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774 if (reg_magic == MAGIC_ALL)
775 curchr = Magic(curchr);
776 break;
777 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100778 // * is not magic as the very first character, eg "?*ptr", when
779 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
780 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000781 if (reg_magic >= MAGIC_ON
782 && !at_start
783 && !(prev_at_start && prevchr == Magic('^'))
784 && (after_slash
785 || (prevchr != Magic('(')
786 && prevchr != Magic('&')
787 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000788 curchr = Magic('*');
789 break;
790 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100791 // '^' is only magic as the very first character and if it's after
792 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000793 if (reg_magic >= MAGIC_OFF
794 && (at_start
795 || reg_magic == MAGIC_ALL
796 || prevchr == Magic('(')
797 || prevchr == Magic('|')
798 || prevchr == Magic('&')
799 || prevchr == Magic('n')
800 || (no_Magic(prevchr) == '('
801 && prevprevchr == Magic('%'))))
802 {
803 curchr = Magic('^');
804 at_start = TRUE;
805 prev_at_start = FALSE;
806 }
807 break;
808 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100809 // '$' is only magic as the very last char and if it's in front of
810 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000811 if (reg_magic >= MAGIC_OFF)
812 {
813 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200814 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100816 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000817 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000818 || p[1] == 'm' || p[1] == 'M'
819 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200820 {
821 if (p[1] == 'v')
822 is_magic_all = TRUE;
823 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
824 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200826 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000827 if (p[0] == NUL
828 || (p[0] == '\\'
829 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
830 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200831 || (is_magic_all
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000832 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833 || reg_magic == MAGIC_ALL)
834 curchr = Magic('$');
835 }
836 break;
837 case '\\':
838 {
839 int c = regparse[1];
840
841 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100842 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000843 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000844 {
845 /*
846 * META contains everything that may be magic sometimes,
847 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200848 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000849 * magicness. Therefore, \ is so meta-magic that it is
850 * not in META.
851 */
852 curchr = -1;
853 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100854 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000856 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000857 peekchr();
858 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000859 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860 curchr = toggle_Magic(curchr);
861 }
862 else if (vim_strchr(REGEXP_ABBR, c))
863 {
864 /*
865 * Handle abbreviations, like "\t" for TAB -- webb
866 */
867 curchr = backslash_trans(c);
868 }
869 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
870 curchr = toggle_Magic(c);
871 else
872 {
873 /*
874 * Next character can never be (made) magic?
875 * Then backslashing it won't do anything.
876 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877 if (has_mbyte)
878 curchr = (*mb_ptr2char)(regparse + 1);
879 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000880 curchr = c;
881 }
882 break;
883 }
884
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885 default:
886 if (has_mbyte)
887 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000888 }
889
890 return curchr;
891}
892
893/*
894 * Eat one lexed character. Do this in a way that we can undo it.
895 */
896 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100897skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000898{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100899 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000900 if (*regparse == '\\')
901 prevchr_len = 1;
902 else
903 prevchr_len = 0;
904 if (regparse[prevchr_len] != NUL)
905 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000906 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100907 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000908 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000909 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000910 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000911 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000912 ++prevchr_len;
913 }
914 regparse += prevchr_len;
915 prev_at_start = at_start;
916 at_start = FALSE;
917 prevprevchr = prevchr;
918 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100919 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000920 nextchr = -1;
921}
922
923/*
924 * Skip a character while keeping the value of prev_at_start for at_start.
925 * prevchr and prevprevchr are also kept.
926 */
927 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100928skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000929{
930 int as = prev_at_start;
931 int pr = prevchr;
932 int prpr = prevprevchr;
933
934 skipchr();
935 at_start = as;
936 prevchr = pr;
937 prevprevchr = prpr;
938}
939
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200940/*
941 * Get the next character from the pattern. We know about magic and such, so
942 * therefore we need a lexical analyzer.
943 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000944 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100945getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000946{
947 int chr = peekchr();
948
949 skipchr();
950 return chr;
951}
952
953/*
954 * put character back. Works only once!
955 */
956 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100957ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000958{
959 nextchr = curchr;
960 curchr = prevchr;
961 prevchr = prevprevchr;
962 at_start = prev_at_start;
963 prev_at_start = FALSE;
964
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100965 // Backup regparse, so that it's at the same position as before the
966 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000967 regparse -= prevchr_len;
968}
969
970/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000971 * Get and return the value of the hex string at the current position.
972 * Return -1 if there is no valid hex number.
973 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000974 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000975 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000976 * The parameter controls the maximum number of input characters. This will be
977 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
978 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100979 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100980gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000981{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100982 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000983 int c;
984 int i;
985
986 for (i = 0; i < maxinputlen; ++i)
987 {
988 c = regparse[0];
989 if (!vim_isxdigit(c))
990 break;
991 nr <<= 4;
992 nr |= hex2nr(c);
993 ++regparse;
994 }
995
996 if (i == 0)
997 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100998 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000999}
1000
1001/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001002 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001003 * current position. Return -1 for invalid. Consumes all digits.
1004 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001005 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01001006getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001007{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001008 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001009 int c;
1010 int i;
1011
1012 for (i = 0; ; ++i)
1013 {
1014 c = regparse[0];
1015 if (c < '0' || c > '9')
1016 break;
1017 nr *= 10;
1018 nr += c - '0';
1019 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001020 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001021 }
1022
1023 if (i == 0)
1024 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001025 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001026}
1027
1028/*
1029 * get and return the value of the octal string immediately after the current
1030 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
1031 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
1032 * treat 8 or 9 as recognised characters. Position is updated:
1033 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00001034 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001035 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001036 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01001037getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001038{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001039 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001040 int c;
1041 int i;
1042
1043 for (i = 0; i < 3 && nr < 040; ++i)
1044 {
1045 c = regparse[0];
1046 if (c < '0' || c > '7')
1047 break;
1048 nr <<= 3;
1049 nr |= hex2nr(c);
1050 ++regparse;
1051 }
1052
1053 if (i == 0)
1054 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001055 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001056}
1057
1058/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 * read_limits - Read two integers to be taken as a minimum and maximum.
1060 * If the first character is '-', then the range is reversed.
1061 * Should end with 'end'. If minval is missing, zero is default, if maxval is
1062 * missing, a very big number is the default.
1063 */
1064 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001065read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001066{
1067 int reverse = FALSE;
1068 char_u *first_char;
1069 long tmp;
1070
1071 if (*regparse == '-')
1072 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001073 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001074 regparse++;
1075 reverse = TRUE;
1076 }
1077 first_char = regparse;
1078 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001079 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001080 {
1081 if (vim_isdigit(*++regparse))
1082 *maxval = getdigits(&regparse);
1083 else
1084 *maxval = MAX_LIMIT;
1085 }
1086 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001087 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001088 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001089 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001090 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001091 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001092 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001093 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001094 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001095
1096 /*
1097 * Reverse the range if there was a '-', or make sure it is in the right
1098 * order otherwise.
1099 */
1100 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1101 {
1102 tmp = *minval;
1103 *minval = *maxval;
1104 *maxval = tmp;
1105 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001106 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001107 return OK;
1108}
1109
1110/*
1111 * vim_regexec and friends
1112 */
1113
1114/*
1115 * Global work variables for vim_regexec().
1116 */
1117
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001118static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001119#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001120static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001121#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001122static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001123
1124/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001125 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1126 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001127 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001128 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001129static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001130static unsigned reg_tofreelen;
1131
1132/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001133 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001134 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001135 * done:
1136 * single-line multi-line
1137 * reg_match &regmatch_T NULL
1138 * reg_mmatch NULL &regmmatch_T
1139 * reg_startp reg_match->startp <invalid>
1140 * reg_endp reg_match->endp <invalid>
1141 * reg_startpos <invalid> reg_mmatch->startpos
1142 * reg_endpos <invalid> reg_mmatch->endpos
1143 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001144 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001145 * reg_firstlnum <invalid> first line in which to search
1146 * reg_maxline 0 last line nr
1147 * reg_line_lbr FALSE or TRUE FALSE
1148 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001149typedef struct {
1150 regmatch_T *reg_match;
1151 regmmatch_T *reg_mmatch;
Bram Moolenaar01105b32022-11-26 11:47:10 +00001152
Bram Moolenaar6100d022016-10-02 16:51:57 +02001153 char_u **reg_startp;
1154 char_u **reg_endp;
1155 lpos_T *reg_startpos;
1156 lpos_T *reg_endpos;
Bram Moolenaar01105b32022-11-26 11:47:10 +00001157
Bram Moolenaar6100d022016-10-02 16:51:57 +02001158 win_T *reg_win;
1159 buf_T *reg_buf;
1160 linenr_T reg_firstlnum;
1161 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001162 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001163
Bram Moolenaar0270f382018-07-17 05:43:58 +02001164 // The current match-position is stord in these variables:
1165 linenr_T lnum; // line number, relative to first line
1166 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001167 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001168
1169 int need_clear_subexpr; // subexpressions still need to be cleared
1170#ifdef FEAT_SYN_HL
1171 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1172 // cleared
1173#endif
1174
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001175 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1176 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1177 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001178 int reg_ic;
1179
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001180 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1181 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001182 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001183
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001184 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1185 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001186 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001187
1188 // State for the NFA engine regexec.
1189 int nfa_has_zend; // NFA regexp \ze operator encountered.
1190 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1191 int nfa_nsubexpr; // Number of sub expressions actually being used
1192 // during execution. 1 if only the whole match
1193 // (subexpr 0) is used.
1194 // listid is global, so that it increases on recursive calls to
1195 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1196 // all the states.
1197 int nfa_listid;
1198 int nfa_alt_listid;
1199
1200#ifdef FEAT_SYN_HL
1201 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1202#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001203} regexec_T;
1204
1205static regexec_T rex;
1206static int rex_in_use = FALSE;
1207
Bram Moolenaar071d4272004-06-13 20:20:40 +00001208/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001209 * Return TRUE if character 'c' is included in 'iskeyword' option for
1210 * "reg_buf" buffer.
1211 */
1212 static int
1213reg_iswordc(int c)
1214{
1215 return vim_iswordc_buf(c, rex.reg_buf);
1216}
1217
John Marriott82792db2024-05-12 00:07:17 +02001218#ifdef FEAT_EVAL
1219static int can_f_submatch = FALSE; // TRUE when submatch() can be used
1220
1221// This struct is used for reg_submatch(). Needed for when the
1222// substitution string is an expression that contains a call to substitute()
1223// and submatch().
1224typedef struct {
1225 regmatch_T *sm_match;
1226 regmmatch_T *sm_mmatch;
1227 linenr_T sm_firstlnum;
1228 linenr_T sm_maxline;
1229 int sm_line_lbr;
1230} regsubmatch_T;
1231
1232static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
1233#endif
1234
1235typedef enum
1236{
1237 RGLF_LINE = 0x01,
1238 RGLF_LENGTH = 0x02
1239#ifdef FEAT_EVAL
1240 ,
1241 RGLF_SUBMATCH = 0x04
1242#endif
1243} reg_getline_flags_T;
1244
1245//
1246// common code for reg_getline(), reg_getline_len(), reg_getline_submatch() and
1247// reg_getline_submatch_len().
1248// the flags argument (which is a bitmask) controls what info is to be returned and whether
1249// or not submatch is in effect.
1250// note:
1251// submatch is available only if FEAT_EVAL is defined.
1252 static void
1253reg_getline_common(linenr_T lnum, reg_getline_flags_T flags, char_u **line, colnr_T *length)
1254{
1255 int get_line = flags & RGLF_LINE;
1256 int get_length = flags & RGLF_LENGTH;
1257 linenr_T firstlnum;
1258 linenr_T maxline;
1259
1260#ifdef FEAT_EVAL
1261 if (flags & RGLF_SUBMATCH)
1262 {
1263 firstlnum = rsm.sm_firstlnum + lnum;
1264 maxline = rsm.sm_maxline;
1265 }
1266 else
1267#endif
1268 {
1269 firstlnum = rex.reg_firstlnum + lnum;
1270 maxline = rex.reg_maxline;
1271 }
1272
1273 // when looking behind for a match/no-match lnum is negative. but we
1274 // can't go before line 1.
1275 if (firstlnum < 1)
1276 {
1277 if (get_line)
1278 *line = NULL;
1279 if (get_length)
1280 *length = 0;
1281
1282 return;
1283 }
1284
1285 if (lnum > maxline)
1286 {
1287 // must have matched the "\n" in the last line.
1288 if (get_line)
1289 *line = (char_u *)"";
1290 if (get_length)
1291 *length = 0;
1292
1293 return;
1294 }
1295
1296 if (get_line)
1297 *line = ml_get_buf(rex.reg_buf, firstlnum, FALSE);
1298 if (get_length)
1299 *length = ml_get_buf_len(rex.reg_buf, firstlnum);
1300}
1301
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001302/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001303 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1304 */
1305 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001306reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001307{
John Marriott82792db2024-05-12 00:07:17 +02001308 char_u *line;
1309
1310 reg_getline_common(lnum, RGLF_LINE, &line, NULL);
1311
1312 return line;
1313}
1314
1315/*
1316 * Get length of line "lnum", which is relative to "reg_firstlnum".
1317 */
1318 static colnr_T
1319reg_getline_len(linenr_T lnum)
1320{
1321 colnr_T length;
1322
1323 reg_getline_common(lnum, RGLF_LENGTH, NULL, &length);
1324
1325 return length;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001326}
1327
Bram Moolenaar071d4272004-06-13 20:20:40 +00001328#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001329static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1330static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1331static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1332static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001333#endif
1334
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001335// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001336#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001337
Bram Moolenaar071d4272004-06-13 20:20:40 +00001338#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001339/*
1340 * Create a new extmatch and mark it as referenced once.
1341 */
1342 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001343make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001344{
1345 reg_extmatch_T *em;
1346
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001347 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001348 if (em != NULL)
1349 em->refcnt = 1;
1350 return em;
1351}
1352
1353/*
1354 * Add a reference to an extmatch.
1355 */
1356 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001357ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001358{
1359 if (em != NULL)
1360 em->refcnt++;
1361 return em;
1362}
1363
1364/*
1365 * Remove a reference to an extmatch. If there are no references left, free
1366 * the info.
1367 */
1368 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001369unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001370{
1371 int i;
1372
1373 if (em != NULL && --em->refcnt <= 0)
1374 {
1375 for (i = 0; i < NSUBEXP; ++i)
1376 vim_free(em->matches[i]);
1377 vim_free(em);
1378 }
1379}
1380#endif
1381
1382/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001383 * Get class of previous character.
1384 */
1385 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001386reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001388 if (rex.input > rex.line)
1389 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001390 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001391 return -1;
1392}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001393
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001394/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001395 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001396 */
1397 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001398reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001399{
1400 pos_T top, bot;
1401 linenr_T lnum;
1402 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001403 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001404 int mode;
1405 colnr_T start, end;
1406 colnr_T start2, end2;
1407 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001408 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001409
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001410 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001411 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001412 return FALSE;
1413
1414 if (VIsual_active)
1415 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001416 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001417 {
1418 top = VIsual;
1419 bot = wp->w_cursor;
1420 }
1421 else
1422 {
1423 top = wp->w_cursor;
1424 bot = VIsual;
1425 }
1426 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001427 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001428 }
1429 else
1430 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001431 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001432 {
1433 top = curbuf->b_visual.vi_start;
1434 bot = curbuf->b_visual.vi_end;
1435 }
1436 else
1437 {
1438 top = curbuf->b_visual.vi_end;
1439 bot = curbuf->b_visual.vi_start;
1440 }
zeertzjqe7102202024-02-13 20:32:04 +01001441 // a substitute command may have removed some lines
Christian Brabandt7c71db32024-01-22 20:12:34 +01001442 if (bot.lnum > curbuf->b_ml.ml_line_count)
1443 bot.lnum = curbuf->b_ml.ml_line_count;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001444 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001445 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001446 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001447 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001448 if (lnum < top.lnum || lnum > bot.lnum)
1449 return FALSE;
1450
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001451 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001452 if (mode == 'v')
1453 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001454 if ((lnum == top.lnum && col < top.col)
1455 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1456 return FALSE;
1457 }
1458 else if (mode == Ctrl_V)
1459 {
1460 getvvcol(wp, &top, &start, NULL, &end);
1461 getvvcol(wp, &bot, &start2, NULL, &end2);
1462 if (start2 < start)
1463 start = start2;
1464 if (end2 > end)
1465 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001466 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001467 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001468
1469 // getvvcol() flushes rex.line, need to get it again
1470 rex.line = reg_getline(rex.lnum);
1471 rex.input = rex.line + col;
1472
Bram Moolenaar7f9969c2022-07-25 18:13:54 +01001473 cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001474 if (cols < start || cols > end - (*p_sel == 'e'))
1475 return FALSE;
1476 }
1477 return TRUE;
1478}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001479
Bram Moolenaar071d4272004-06-13 20:20:40 +00001480/*
1481 * Check the regexp program for its magic number.
1482 * Return TRUE if it's wrong.
1483 */
1484 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001485prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001486{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001487 regprog_T *prog;
1488
Bram Moolenaar6100d022016-10-02 16:51:57 +02001489 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001490 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001491 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001492 return FALSE;
1493
1494 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001495 {
RestorerZ68ebcee2023-05-31 17:12:14 +01001496 iemsg(e_corrupted_regexp_program);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001497 return TRUE;
1498 }
1499 return FALSE;
1500}
1501
1502/*
1503 * Cleanup the subexpressions, if this wasn't done yet.
1504 * This construction is used to clear the subexpressions only when they are
1505 * used (to increase speed).
1506 */
1507 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001508cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001509{
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001510 if (!rex.need_clear_subexpr)
1511 return;
1512
1513 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001514 {
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001515 // Use 0xff to set lnum to -1
1516 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1517 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001518 }
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001519 else
1520 {
1521 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1522 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
1523 }
1524 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001525}
1526
1527#ifdef FEAT_SYN_HL
1528 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001529cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001530{
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001531 if (!rex.need_clear_zsubexpr)
1532 return;
1533
1534 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001535 {
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001536 // Use 0xff to set lnum to -1
1537 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1538 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001539 }
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001540 else
1541 {
1542 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1543 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1544 }
1545 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001546}
1547#endif
1548
1549/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001550 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001551 */
1552 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001553reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001554{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001555 rex.line = reg_getline(++rex.lnum);
1556 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001557 fast_breakcheck();
1558}
1559
1560/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001561 * Check whether a backreference matches.
1562 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001563 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1564 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001565 */
1566 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001567match_with_backref(
1568 linenr_T start_lnum,
1569 colnr_T start_col,
1570 linenr_T end_lnum,
1571 colnr_T end_col,
1572 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001573{
1574 linenr_T clnum = start_lnum;
1575 colnr_T ccol = start_col;
1576 int len;
1577 char_u *p;
1578
1579 if (bytelen != NULL)
1580 *bytelen = 0;
1581 for (;;)
1582 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001583 // Since getting one line may invalidate the other, need to make copy.
1584 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001585 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001586 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001587 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001588 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1589 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001590 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001591 vim_free(reg_tofree);
1592 reg_tofree = alloc(len);
1593 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001594 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001595 reg_tofreelen = len;
1596 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001597 STRCPY(reg_tofree, rex.line);
1598 rex.input = reg_tofree + (rex.input - rex.line);
1599 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001600 }
1601
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001602 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001603 p = reg_getline(clnum);
1604 if (clnum == end_lnum)
1605 len = end_col - ccol;
1606 else
John Marriott82792db2024-05-12 00:07:17 +02001607 len = (int)reg_getline_len(clnum) - ccol;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001608
Bram Moolenaar0270f382018-07-17 05:43:58 +02001609 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001610 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001611 if (bytelen != NULL)
1612 *bytelen += len;
1613 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001614 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001615 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001616 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001617
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001618 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001619 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001620 if (bytelen != NULL)
1621 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001622 ++clnum;
1623 ccol = 0;
1624 if (got_int)
1625 return RA_FAIL;
1626 }
1627
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001628 // found a match! Note that rex.line may now point to a copy of the line,
1629 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001630 return RA_MATCH;
1631}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001632
Bram Moolenaarfb031402014-09-09 17:18:49 +02001633/*
1634 * Used in a place where no * or \+ can follow.
1635 */
1636 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001637re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001638{
1639 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001640 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001641 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001642 rc_did_emsg = TRUE;
1643 return FAIL;
1644 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001645 return OK;
1646}
1647
Bram Moolenaar071d4272004-06-13 20:20:40 +00001648typedef struct
1649{
1650 int a, b, c;
1651} decomp_T;
1652
1653
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001654// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001655static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001656{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001657 {0x5e2,0,0}, // 0xfb20 alt ayin
1658 {0x5d0,0,0}, // 0xfb21 alt alef
1659 {0x5d3,0,0}, // 0xfb22 alt dalet
1660 {0x5d4,0,0}, // 0xfb23 alt he
1661 {0x5db,0,0}, // 0xfb24 alt kaf
1662 {0x5dc,0,0}, // 0xfb25 alt lamed
1663 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1664 {0x5e8,0,0}, // 0xfb27 alt resh
1665 {0x5ea,0,0}, // 0xfb28 alt tav
1666 {'+', 0, 0}, // 0xfb29 alt plus
1667 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1668 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1669 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1670 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1671 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1672 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1673 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1674 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1675 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1676 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1677 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1678 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1679 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1680 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1681 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1682 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1683 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1684 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1685 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1686 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1687 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1688 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1689 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1690 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1691 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1692 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1693 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1694 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1695 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1696 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1697 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1698 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1699 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1700 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1701 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1702 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1703 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1704 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001705};
1706
1707 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001708mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001709{
1710 decomp_T d;
1711
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001712 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001713 {
1714 d = decomp_table[c - 0xfb20];
1715 *c1 = d.a;
1716 *c2 = d.b;
1717 *c3 = d.c;
1718 }
1719 else
1720 {
1721 *c1 = c;
1722 *c2 = *c3 = 0;
1723 }
1724}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001725
1726/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001727 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001728 * Return 0 if strings match, non-zero otherwise.
Christian Brabandtc97f4d62024-04-10 16:18:15 +02001729 * Correct the length "*n" when composing characters are ignored.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001730 */
1731 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001732cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001733{
1734 int result;
1735
Bram Moolenaar6100d022016-10-02 16:51:57 +02001736 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001737 result = STRNCMP(s1, s2, *n);
1738 else
1739 result = MB_STRNICMP(s1, s2, *n);
1740
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001741 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001742 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001743 {
1744 char_u *str1, *str2;
1745 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 int junk;
1747
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001748 // we have to handle the strcmp ourselves, since it is necessary to
1749 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001750 str1 = s1;
1751 str2 = s2;
1752 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001753 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001754 {
1755 c1 = mb_ptr2char_adv(&str1);
1756 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001757
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001758 // Decompose the character if necessary, into 'base' characters.
1759 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001760 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001761 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001762 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 mb_decompose(c1, &c11, &junk, &junk);
1764 mb_decompose(c2, &c12, &junk, &junk);
1765 c1 = c11;
1766 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001767 if (c11 != c12
1768 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001769 break;
1770 }
1771 }
1772 result = c2 - c1;
1773 if (result == 0)
1774 *n = (int)(str2 - s2);
1775 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001776
1777 return result;
1778}
1779
1780/*
1781 * cstrchr: This function is used a lot for simple searches, keep it fast!
1782 */
1783 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001784cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001785{
1786 char_u *p;
1787 int cc;
1788
Bram Moolenaara12a1612019-01-24 16:39:02 +01001789 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001790 return vim_strchr(s, c);
1791
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001792 // tolower() and toupper() can be slow, comparing twice should be a lot
1793 // faster (esp. when using MS Visual C++!).
1794 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001795 if (enc_utf8 && c > 0x80)
1796 cc = utf_fold(c);
1797 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001798 if (MB_ISUPPER(c))
1799 cc = MB_TOLOWER(c);
1800 else if (MB_ISLOWER(c))
1801 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001802 else
1803 return vim_strchr(s, c);
1804
Bram Moolenaar071d4272004-06-13 20:20:40 +00001805 if (has_mbyte)
1806 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001807 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001808 {
1809 if (enc_utf8 && c > 0x80)
1810 {
Bram Moolenaarf5094052022-07-29 16:22:25 +01001811 int uc = utf_ptr2char(p);
1812
1813 // Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
1814 // not 0xff.
1815 if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001816 return p;
1817 }
1818 else if (*p == c || *p == cc)
1819 return p;
1820 }
1821 }
1822 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001823 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001824 for (p = s; *p != NUL; ++p)
1825 if (*p == c || *p == cc)
1826 return p;
1827
1828 return NULL;
1829}
1830
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001831////////////////////////////////////////////////////////////////
1832// regsub stuff //
1833////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001834
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001835typedef void (*fptr_T)(int *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001836
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001837static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001838
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001839 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001840do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001842 *d = MB_TOUPPER(c);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001843}
1844
Yee Cheng Chind25021c2023-09-18 19:51:56 +02001845 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001846do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001847{
1848 *d = MB_TOLOWER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001849}
1850
1851/*
1852 * regtilde(): Replace tildes in the pattern by the old pattern.
1853 *
1854 * Short explanation of the tilde: It stands for the previous replacement
1855 * pattern. If that previous pattern also contains a ~ we should go back a
1856 * step further... But we insert the previous pattern into the current one
1857 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001858 * This still does not handle the case where "magic" changes. So require the
1859 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001860 *
1861 * The tildes are parsed once before the first call to vim_regsub().
1862 */
1863 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001864regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001865{
1866 char_u *newsub = source;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001867 char_u *p;
John Marriott82792db2024-05-12 00:07:17 +02001868 size_t newsublen = 0;
1869 char_u tilde[3] = {'~', NUL, NUL};
1870 size_t tildelen = 1;
1871 int error = FALSE;
1872
1873 if (!magic)
1874 {
1875 tilde[0] = '\\';
1876 tilde[1] = '~';
1877 tilde[2] = NUL;
1878 tildelen = 2;
1879 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001880
1881 for (p = newsub; *p; ++p)
1882 {
John Marriott82792db2024-05-12 00:07:17 +02001883 if (STRNCMP(p, tilde, tildelen) == 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001884 {
John Marriott82792db2024-05-12 00:07:17 +02001885 size_t prefixlen = p - newsub; // not including the tilde
1886 char_u *postfix = p + tildelen;
1887 size_t postfixlen;
1888 size_t tmpsublen;
1889
1890 if (newsublen == 0)
1891 newsublen = STRLEN(newsub);
1892 newsublen -= tildelen;
1893 postfixlen = newsublen - prefixlen;
1894 tmpsublen = prefixlen + reg_prev_sublen + postfixlen;
1895
1896 if (tmpsublen > 0 && reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001897 {
John Marriott82792db2024-05-12 00:07:17 +02001898 char_u *tmpsub;
1899
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001900 // Avoid making the text longer than MAXCOL, it will cause
1901 // trouble at some point.
John Marriott82792db2024-05-12 00:07:17 +02001902 if (tmpsublen > MAXCOL)
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001903 {
1904 emsg(_(e_resulting_text_too_long));
John Marriott82792db2024-05-12 00:07:17 +02001905 error = TRUE;
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001906 break;
1907 }
1908
John Marriott82792db2024-05-12 00:07:17 +02001909 tmpsub = alloc(tmpsublen + 1);
1910 if (tmpsub == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001911 {
John Marriott82792db2024-05-12 00:07:17 +02001912 emsg(_(e_out_of_memory));
1913 error = TRUE;
1914 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001915 }
John Marriott82792db2024-05-12 00:07:17 +02001916
1917 // copy prefix
1918 mch_memmove(tmpsub, newsub, prefixlen);
1919 // interpret tilde
1920 mch_memmove(tmpsub + prefixlen, reg_prev_sub, reg_prev_sublen);
1921 // copy postfix
1922 STRCPY(tmpsub + prefixlen + reg_prev_sublen, postfix);
1923
1924 if (newsub != source) // allocated newsub before
1925 vim_free(newsub);
1926 newsub = tmpsub;
1927 newsublen = tmpsublen;
1928 p = newsub + prefixlen + reg_prev_sublen;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001929 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001930 else
John Marriott82792db2024-05-12 00:07:17 +02001931 mch_memmove(p, postfix, postfixlen + 1); // remove the tilde (+1 for the NUL)
1932
Bram Moolenaar071d4272004-06-13 20:20:40 +00001933 --p;
1934 }
1935 else
1936 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001937 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001938 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001939 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001940 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001941 }
1942 }
1943
John Marriott82792db2024-05-12 00:07:17 +02001944 if (error)
1945 {
1946 if (newsub != source)
1947 vim_free(newsub);
1948 return source;
1949 }
1950
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001951 // Store a copy of newsub in reg_prev_sub. It is always allocated,
1952 // because recursive calls may make the returned string invalid.
John Marriott82792db2024-05-12 00:07:17 +02001953 // Only store it if there something to store.
1954 newsublen = p - newsub;
1955 if (newsublen == 0)
1956 VIM_CLEAR(reg_prev_sub);
1957 else
1958 {
1959 vim_free(reg_prev_sub);
1960 reg_prev_sub = vim_strnsave(newsub, newsublen);
1961 }
1962
1963 if (reg_prev_sub == NULL)
1964 reg_prev_sublen = 0;
1965 else
1966 reg_prev_sublen = newsublen;
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001967
Bram Moolenaar071d4272004-06-13 20:20:40 +00001968 return newsub;
1969}
1970
1971#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001972
1973/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001974 * Put the submatches in "argv[argskip]" which is a list passed into
1975 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001976 */
1977 static int
zeertzjq48db5da2022-09-16 12:10:03 +01001978fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001979{
1980 listitem_T *li;
1981 int i;
1982 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001983 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001984
zeertzjqabd58d82022-09-16 16:06:32 +01001985 if (!has_varargs(fp) && fp->uf_args.ga_len <= argskip)
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001986 // called function doesn't take a submatches argument
1987 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001988
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001989 // Relies on sl_list to be the first item in staticList10_T.
1990 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001991
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001992 // There are always 10 list items in staticList10_T.
1993 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001994 for (i = 0; i < 10; ++i)
1995 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001996 s = rsm.sm_match->startp[i];
1997 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001998 s = NULL;
1999 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002000 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002001 li->li_tv.v_type = VAR_STRING;
2002 li->li_tv.vval.v_string = s;
2003 li = li->li_next;
2004 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01002005 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002006}
2007
2008 static void
2009clear_submatch_list(staticList10_T *sl)
2010{
2011 int i;
2012
2013 for (i = 0; i < 10; ++i)
2014 vim_free(sl->sl_items[i].li_tv.vval.v_string);
2015}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02002016#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002017
Bram Moolenaar071d4272004-06-13 20:20:40 +00002018/*
2019 * vim_regsub() - perform substitutions after a vim_regexec() or
2020 * vim_regexec_multi() match.
2021 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002022 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
dundargocc57b5bc2022-11-02 13:30:51 +00002023 * Otherwise nothing is copied, only compute the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002024 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002025 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
2026 *
2027 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
2028 * double them to keep them, and insert a backslash before a CR to avoid it
2029 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002030 *
2031 * Note: The matched text must not change between the call of
2032 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
2033 * references invalid!
2034 *
2035 * Returns the size of the replacement, including terminating NUL.
2036 */
2037 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002038vim_regsub(
2039 regmatch_T *rmp,
2040 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002041 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01002042 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002043 int destlen,
2044 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002045{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002046 int result;
2047 regexec_T rex_save;
2048 int rex_in_use_save = rex_in_use;
2049
2050 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002051 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002052 rex_save = rex;
2053 rex_in_use = TRUE;
2054
2055 rex.reg_match = rmp;
2056 rex.reg_mmatch = NULL;
2057 rex.reg_maxline = 0;
2058 rex.reg_buf = curbuf;
2059 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002060 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002061
2062 rex_in_use = rex_in_use_save;
2063 if (rex_in_use)
2064 rex = rex_save;
2065
2066 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002067}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002068
2069 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002070vim_regsub_multi(
2071 regmmatch_T *rmp,
2072 linenr_T lnum,
2073 char_u *source,
2074 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002075 int destlen,
2076 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002077{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002078 int result;
2079 regexec_T rex_save;
2080 int rex_in_use_save = rex_in_use;
2081
2082 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002083 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002084 rex_save = rex;
2085 rex_in_use = TRUE;
2086
2087 rex.reg_match = NULL;
2088 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002089 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02002090 rex.reg_firstlnum = lnum;
2091 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
2092 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002093 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002094
2095 rex_in_use = rex_in_use_save;
2096 if (rex_in_use)
2097 rex = rex_save;
2098
2099 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002100}
2101
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002102#if defined(FEAT_EVAL) || defined(PROTO)
2103// When nesting more than a couple levels it's probably a mistake.
2104# define MAX_REGSUB_NESTING 4
2105static char_u *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
2106
2107# if defined(EXITFREE) || defined(PROTO)
2108 void
2109free_resub_eval_result(void)
2110{
2111 int i;
2112
2113 for (i = 0; i < MAX_REGSUB_NESTING; ++i)
2114 VIM_CLEAR(eval_result[i]);
2115}
2116# endif
2117#endif
2118
Bram Moolenaar071d4272004-06-13 20:20:40 +00002119 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002120vim_regsub_both(
2121 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002122 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01002123 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002124 int destlen,
2125 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002126{
2127 char_u *src;
2128 char_u *dst;
2129 char_u *s;
2130 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002131 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002132 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002133 fptr_T func_all = (fptr_T)NULL;
2134 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002135 linenr_T clnum = 0; // init for GCC
2136 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00002137#ifdef FEAT_EVAL
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002138 static int nesting = 0;
2139 int nested;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002140#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002141 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002142
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002143 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002144 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002145 {
RestorerZ68ebcee2023-05-31 17:12:14 +01002146 iemsg(e_null_argument);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002147 return 0;
2148 }
2149 if (prog_magic_wrong())
2150 return 0;
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002151#ifdef FEAT_EVAL
2152 if (nesting == MAX_REGSUB_NESTING)
2153 {
2154 emsg(_(e_substitute_nesting_too_deep));
2155 return 0;
2156 }
2157 nested = nesting;
2158#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002159 src = source;
2160 dst = dest;
2161
2162 /*
2163 * When the substitute part starts with "\=" evaluate it as an expression.
2164 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002165 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002166 {
2167#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002168 // To make sure that the length doesn't change between checking the
2169 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01002170 // resulting string is saved from the call with
2171 // "flags & REGSUB_COPY" == 0 to the call with
2172 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002173 if (copy)
2174 {
John Marriott82792db2024-05-12 00:07:17 +02002175 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002176 {
John Marriott82792db2024-05-12 00:07:17 +02002177 int eval_len = (int)STRLEN(eval_result[nested]);
2178
2179 if (eval_len < destlen)
2180 {
2181 STRCPY(dest, eval_result[nested]);
2182 dst += eval_len;
2183 VIM_CLEAR(eval_result[nested]);
2184 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002185 }
2186 }
2187 else
2188 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002189 int prev_can_f_submatch = can_f_submatch;
2190 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002191
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002192 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002193
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002194 // The expression may contain substitute(), which calls us
2195 // recursively. Make sure submatch() gets the text from the first
2196 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002197 if (can_f_submatch)
2198 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002199 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002200 rsm.sm_match = rex.reg_match;
2201 rsm.sm_mmatch = rex.reg_mmatch;
2202 rsm.sm_firstlnum = rex.reg_firstlnum;
2203 rsm.sm_maxline = rex.reg_maxline;
2204 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002205
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002206 // Although unlikely, it is possible that the expression invokes a
2207 // substitute command (it might fail, but still). Therefore keep
Bram Moolenaarabd56da2022-06-23 20:46:27 +01002208 // an array of eval results.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002209 ++nesting;
2210
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002211 if (expr != NULL)
2212 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002213 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002214 char_u buf[NUMBUFLEN];
2215 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002216 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002217 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002218
2219 rettv.v_type = VAR_STRING;
2220 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002221 argv[0].v_type = VAR_LIST;
2222 argv[0].vval.v_list = &matchList.sl_list;
2223 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002224 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002225 funcexe.fe_argv_func = fill_submatch_list;
2226 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002227 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002228 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002229 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002230 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002231 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002232 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002233 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002234 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002235
Bram Moolenaar6100d022016-10-02 16:51:57 +02002236 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002237 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002238 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002239 }
LemonBoyf3b48952022-05-05 13:53:03 +01002240 else if (expr->v_type == VAR_INSTR)
2241 {
2242 exe_typval_instr(expr, &rettv);
2243 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002244 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002245 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002246 clear_submatch_list(&matchList);
2247
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002248 if (rettv.v_type == VAR_UNKNOWN)
2249 // something failed, no need to report another error
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002250 eval_result[nested] = NULL;
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002251 else
2252 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002253 eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
2254 if (eval_result[nested] != NULL)
2255 eval_result[nested] = vim_strsave(eval_result[nested]);
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002256 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002257 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002258 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002259 else if (substitute_instr != NULL)
2260 // Execute instructions from ISN_SUBSTITUTE.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002261 eval_result[nested] = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002262 else
Bram Moolenaara4e0b972022-10-01 19:43:52 +01002263 eval_result[nested] = eval_to_string(source + 2, TRUE, FALSE);
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002264 --nesting;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002265
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002266 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002267 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002268 int had_backslash = FALSE;
2269
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002270 for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002271 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002272 // Change NL to CR, so that it becomes a line break,
2273 // unless called from vim_regexec_nl().
2274 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002275 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002276 *s = CAR;
2277 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002278 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002279 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002280 /* Change NL to CR here too, so that this works:
2281 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2282 * abc\
2283 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002284 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002285 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002286 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002287 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002288 had_backslash = TRUE;
2289 }
2290 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002291 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002292 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002293 // Backslashes will be consumed, need to double them.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002294 s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
Bram Moolenaar06975a42010-03-23 16:27:22 +01002295 if (s != NULL)
2296 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002297 vim_free(eval_result[nested]);
2298 eval_result[nested] = s;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002299 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002300 }
2301
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002302 dst += STRLEN(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002303 }
2304
Bram Moolenaar6100d022016-10-02 16:51:57 +02002305 can_f_submatch = prev_can_f_submatch;
2306 if (can_f_submatch)
2307 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002308 }
2309#endif
2310 }
2311 else
2312 while ((c = *src++) != NUL)
2313 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002314 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002315 no = 0;
2316 else if (c == '\\' && *src != NUL)
2317 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002318 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002319 {
2320 ++src;
2321 no = 0;
2322 }
2323 else if ('0' <= *src && *src <= '9')
2324 {
2325 no = *src++ - '0';
2326 }
2327 else if (vim_strchr((char_u *)"uUlLeE", *src))
2328 {
2329 switch (*src++)
2330 {
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002331 case 'u': func_one = do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002332 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002333 case 'U': func_all = do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002334 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002335 case 'l': func_one = do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002336 continue;
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002337 case 'L': func_all = do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002338 continue;
2339 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002340 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002341 continue;
2342 }
2343 }
2344 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002345 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002346 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002347 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2348 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002349 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002350 if (copy)
2351 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002352 if (dst + 3 > dest + destlen)
2353 {
2354 iemsg("vim_regsub_both(): not enough space");
2355 return 0;
2356 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002357 *dst++ = c;
2358 *dst++ = *src++;
2359 *dst++ = *src++;
2360 }
2361 else
2362 {
2363 dst += 3;
2364 src += 2;
2365 }
2366 continue;
2367 }
2368
Bram Moolenaar071d4272004-06-13 20:20:40 +00002369 if (c == '\\' && *src != NUL)
2370 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002371 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002372 switch (*src)
2373 {
2374 case 'r': c = CAR; ++src; break;
2375 case 'n': c = NL; ++src; break;
2376 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002377 // Oh no! \e already has meaning in subst pat :-(
2378 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002379 case 'b': c = Ctrl_H; ++src; break;
2380
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002381 // If "backslash" is TRUE the backslash will be removed
2382 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002383 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002384 {
2385 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002386 {
2387 if (dst + 1 > dest + destlen)
2388 {
2389 iemsg("vim_regsub_both(): not enough space");
2390 return 0;
2391 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002392 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002393 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002394 ++dst;
2395 }
2396 c = *src++;
2397 }
2398 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002399 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002400 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002401
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002402 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002403 if (func_one != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002404 {
2405 func_one(&cc, c);
2406 func_one = NULL;
2407 }
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002408 else if (func_all != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002409 func_all(&cc, c);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002410 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002411 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002412
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002413 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002414 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002415 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002416 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002417
Bram Moolenaar071d4272004-06-13 20:20:40 +00002418 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002419 {
2420 if (dst + charlen > dest + destlen)
2421 {
2422 iemsg("vim_regsub_both(): not enough space");
2423 return 0;
2424 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002425 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002426 }
2427 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002428 if (enc_utf8)
2429 {
2430 int clen = utf_ptr2len(src - 1);
2431
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002432 // If the character length is shorter than "totlen", there
2433 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002434 if (clen < totlen)
2435 {
2436 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002437 {
2438 if (dst + totlen - clen > dest + destlen)
2439 {
2440 iemsg("vim_regsub_both(): not enough space");
2441 return 0;
2442 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002443 mch_memmove(dst + 1, src - 1 + clen,
2444 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002445 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002446 dst += totlen - clen;
2447 }
2448 }
2449 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002450 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002451 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002452 {
2453 if (dst + 1 > dest + destlen)
2454 {
2455 iemsg("vim_regsub_both(): not enough space");
2456 return 0;
2457 }
2458 *dst = cc;
2459 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002460 dst++;
2461 }
2462 else
2463 {
2464 if (REG_MULTI)
2465 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002466 clnum = rex.reg_mmatch->startpos[no].lnum;
2467 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002468 s = NULL;
2469 else
2470 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002471 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2472 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2473 len = rex.reg_mmatch->endpos[no].col
2474 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002475 else
John Marriott82792db2024-05-12 00:07:17 +02002476 len = (int)reg_getline_len(clnum) - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002477 }
2478 }
2479 else
2480 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002481 s = rex.reg_match->startp[no];
2482 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002483 s = NULL;
2484 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002485 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002486 }
2487 if (s != NULL)
2488 {
2489 for (;;)
2490 {
2491 if (len == 0)
2492 {
2493 if (REG_MULTI)
2494 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002495 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002496 break;
2497 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002498 {
2499 if (dst + 1 > dest + destlen)
2500 {
2501 iemsg("vim_regsub_both(): not enough space");
2502 return 0;
2503 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002504 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002505 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002506 ++dst;
2507 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002508 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2509 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002510 else
John Marriott82792db2024-05-12 00:07:17 +02002511 len = (int)reg_getline_len(clnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002512 }
2513 else
2514 break;
2515 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002516 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002517 {
2518 if (copy)
RestorerZ68ebcee2023-05-31 17:12:14 +01002519 iemsg(e_damaged_match_string);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002520 goto exit;
2521 }
2522 else
2523 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002524 if ((flags & REGSUB_BACKSLASH)
2525 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002526 {
2527 /*
2528 * Insert a backslash in front of a CR, otherwise
2529 * it will be replaced by a line break.
2530 * Number of backslashes will be halved later,
2531 * double them here.
2532 */
2533 if (copy)
2534 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002535 if (dst + 2 > dest + destlen)
2536 {
2537 iemsg("vim_regsub_both(): not enough space");
2538 return 0;
2539 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002540 dst[0] = '\\';
2541 dst[1] = *s;
2542 }
2543 dst += 2;
2544 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002545 else
2546 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002547 if (has_mbyte)
2548 c = mb_ptr2char(s);
2549 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002550 c = *s;
2551
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002552 if (func_one != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002553 {
2554 func_one(&cc, c);
2555 func_one = NULL;
2556 }
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002557 else if (func_all != (fptr_T)NULL)
Yee Cheng Chind25021c2023-09-18 19:51:56 +02002558 func_all(&cc, c);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002559 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002560 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002561
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002562 if (has_mbyte)
2563 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002564 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002565 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002566
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002567 // Copy composing characters separately, one
2568 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002569 if (enc_utf8)
2570 l = utf_ptr2len(s) - 1;
2571 else
2572 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002573
2574 s += l;
2575 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002576 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002577 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002578 {
2579 if (dst + charlen > dest + destlen)
2580 {
2581 iemsg("vim_regsub_both(): not enough space");
2582 return 0;
2583 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002584 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002585 }
2586 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002587 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002588 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002589 {
2590 if (dst + 1 > dest + destlen)
2591 {
2592 iemsg("vim_regsub_both(): not enough space");
2593 return 0;
2594 }
2595 *dst = cc;
2596 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002597 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002598 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002599
Bram Moolenaar071d4272004-06-13 20:20:40 +00002600 ++s;
2601 --len;
2602 }
2603 }
2604 }
2605 no = -1;
2606 }
2607 }
2608 if (copy)
2609 *dst = NUL;
2610
2611exit:
2612 return (int)((dst - dest) + 1);
2613}
2614
2615#ifdef FEAT_EVAL
John Marriott82792db2024-05-12 00:07:17 +02002616
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002617 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002618reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002619{
John Marriott82792db2024-05-12 00:07:17 +02002620 char_u *line;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002621
John Marriott82792db2024-05-12 00:07:17 +02002622 reg_getline_common(lnum, RGLF_LINE | RGLF_SUBMATCH, &line, NULL);
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002623
John Marriott82792db2024-05-12 00:07:17 +02002624 return line;
2625}
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002626
John Marriott82792db2024-05-12 00:07:17 +02002627 static colnr_T
2628reg_getline_submatch_len(linenr_T lnum)
2629{
2630 colnr_T length;
2631
2632 reg_getline_common(lnum, RGLF_LENGTH | RGLF_SUBMATCH, NULL, &length);
2633
2634 return length;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002635}
2636
2637/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002638 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002639 * allocated memory.
2640 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2641 */
2642 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002643reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002644{
2645 char_u *retval = NULL;
2646 char_u *s;
2647 int len;
2648 int round;
2649 linenr_T lnum;
2650
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002651 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002652 return NULL;
2653
Bram Moolenaar6100d022016-10-02 16:51:57 +02002654 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002655 {
2656 /*
2657 * First round: compute the length and allocate memory.
2658 * Second round: copy the text.
2659 */
2660 for (round = 1; round <= 2; ++round)
2661 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002662 lnum = rsm.sm_mmatch->startpos[no].lnum;
2663 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002664 return NULL;
2665
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002666 s = reg_getline_submatch(lnum);
2667 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002668 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002669 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002670 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002671 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002672 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002673 len = rsm.sm_mmatch->endpos[no].col
2674 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002675 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002676 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002677 ++len;
2678 }
2679 else
2680 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002681 // Multiple lines: take start line from start col, middle
2682 // lines completely and end line up to end col.
John Marriott82792db2024-05-12 00:07:17 +02002683 len = (int)reg_getline_submatch_len(lnum) - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002684 if (round == 2)
2685 {
2686 STRCPY(retval, s);
2687 retval[len] = '\n';
2688 }
2689 ++len;
2690 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002691 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002692 {
John Marriott82792db2024-05-12 00:07:17 +02002693 s = reg_getline_submatch(lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002694 if (round == 2)
2695 STRCPY(retval + len, s);
John Marriott82792db2024-05-12 00:07:17 +02002696 len += (int)reg_getline_submatch_len(lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002697 if (round == 2)
2698 retval[len] = '\n';
2699 ++len;
John Marriott82792db2024-05-12 00:07:17 +02002700 ++lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002701 }
2702 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002703 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002704 rsm.sm_mmatch->endpos[no].col);
2705 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002706 if (round == 2)
2707 retval[len] = NUL;
2708 ++len;
2709 }
2710
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002711 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002712 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002713 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002714 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002715 return NULL;
2716 }
2717 }
2718 }
2719 else
2720 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002721 s = rsm.sm_match->startp[no];
2722 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002723 retval = NULL;
2724 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002725 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002726 }
2727
2728 return retval;
2729}
Bram Moolenaar41571762014-04-02 19:00:58 +02002730
2731/*
2732 * Used for the submatch() function with the optional non-zero argument: get
2733 * the list of strings from the n'th submatch in allocated memory with NULs
2734 * represented in NLs.
2735 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2736 * command, for a non-existing submatch and for any error.
2737 */
2738 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002739reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002740{
2741 char_u *s;
2742 linenr_T slnum;
2743 linenr_T elnum;
2744 colnr_T scol;
2745 colnr_T ecol;
2746 int i;
2747 list_T *list;
2748 int error = FALSE;
2749
2750 if (!can_f_submatch || no < 0)
2751 return NULL;
2752
Bram Moolenaar6100d022016-10-02 16:51:57 +02002753 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002754 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002755 slnum = rsm.sm_mmatch->startpos[no].lnum;
2756 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002757 if (slnum < 0 || elnum < 0)
2758 return NULL;
2759
Bram Moolenaar6100d022016-10-02 16:51:57 +02002760 scol = rsm.sm_mmatch->startpos[no].col;
2761 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002762
2763 list = list_alloc();
2764 if (list == NULL)
2765 return NULL;
2766
2767 s = reg_getline_submatch(slnum) + scol;
2768 if (slnum == elnum)
2769 {
2770 if (list_append_string(list, s, ecol - scol) == FAIL)
2771 error = TRUE;
2772 }
2773 else
2774 {
John Marriott82792db2024-05-12 00:07:17 +02002775 int max_lnum = elnum - slnum;
2776
Bram Moolenaar41571762014-04-02 19:00:58 +02002777 if (list_append_string(list, s, -1) == FAIL)
2778 error = TRUE;
John Marriott82792db2024-05-12 00:07:17 +02002779 for (i = 1; i < max_lnum; i++)
Bram Moolenaar41571762014-04-02 19:00:58 +02002780 {
2781 s = reg_getline_submatch(slnum + i);
2782 if (list_append_string(list, s, -1) == FAIL)
2783 error = TRUE;
2784 }
2785 s = reg_getline_submatch(elnum);
2786 if (list_append_string(list, s, ecol) == FAIL)
2787 error = TRUE;
2788 }
2789 }
2790 else
2791 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002792 s = rsm.sm_match->startp[no];
2793 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002794 return NULL;
2795 list = list_alloc();
2796 if (list == NULL)
2797 return NULL;
2798 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002799 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002800 error = TRUE;
2801 }
2802
2803 if (error)
2804 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002805 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002806 return NULL;
2807 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002808 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002809 return list;
2810}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002811#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002812
Bram Moolenaarf4140482020-02-15 23:06:45 +01002813/*
2814 * Initialize the values used for matching against multiple lines
2815 */
2816 static void
2817init_regexec_multi(
2818 regmmatch_T *rmp,
2819 win_T *win, // window in which to search or NULL
2820 buf_T *buf, // buffer in which to search
2821 linenr_T lnum) // nr of line to start looking for match
2822{
2823 rex.reg_match = NULL;
2824 rex.reg_mmatch = rmp;
2825 rex.reg_buf = buf;
2826 rex.reg_win = win;
2827 rex.reg_firstlnum = lnum;
2828 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2829 rex.reg_line_lbr = FALSE;
2830 rex.reg_ic = rmp->rmm_ic;
2831 rex.reg_icombine = FALSE;
2832 rex.reg_maxcol = rmp->rmm_maxcol;
2833}
2834
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002835#include "regexp_bt.c"
2836
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002837static regengine_T bt_regengine =
2838{
2839 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002840 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002841 bt_regexec_nl,
Christian Brabandtd2cc51f2024-01-04 22:54:08 +01002842 bt_regexec_multi
2843#ifdef DEBUG
2844 ,(char_u *)""
2845#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002846};
2847
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002848#include "regexp_nfa.c"
2849
2850static regengine_T nfa_regengine =
2851{
2852 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002853 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002854 nfa_regexec_nl,
Christian Brabandtd2cc51f2024-01-04 22:54:08 +01002855 nfa_regexec_multi
2856#ifdef DEBUG
2857 ,(char_u *)""
2858#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002859};
2860
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002861// Which regexp engine to use? Needed for vim_regcomp().
2862// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002863static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002864
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002865#ifdef DEBUG
2866static char_u regname[][30] = {
2867 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002868 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002869 "NFA Regexp Engine"
2870 };
2871#endif
2872
2873/*
2874 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002875 * Returns the program in allocated memory.
2876 * Use vim_regfree() to free the memory.
2877 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002878 */
2879 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002880vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002881{
2882 regprog_T *prog = NULL;
2883 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002884 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002885
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002886 regexp_engine = p_re;
2887
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002888 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002889 if (STRNCMP(expr, "\\%#=", 4) == 0)
2890 {
2891 int newengine = expr[4] - '0';
2892
2893 if (newengine == AUTOMATIC_ENGINE
2894 || newengine == BACKTRACKING_ENGINE
2895 || newengine == NFA_ENGINE)
2896 {
2897 regexp_engine = expr[4] - '0';
2898 expr += 5;
2899#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002900 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002901 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002902#endif
2903 }
2904 else
2905 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002906 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002907 regexp_engine = AUTOMATIC_ENGINE;
2908 }
2909 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002910#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002911 bt_regengine.expr = expr;
2912 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002913#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002914 // reg_iswordc() uses rex.reg_buf
2915 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002916
2917 /*
2918 * First try the NFA engine, unless backtracking was requested.
2919 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002920 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002921 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002922 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002923 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002924 else
2925 prog = bt_regengine.regcomp(expr, re_flags);
2926
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002927 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002928 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002929 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002930#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002931 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002932 {
2933 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002934 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002935 if (f)
2936 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002937 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002938 fclose(f);
2939 }
2940 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002941 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002942 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002943 }
2944#endif
2945 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002946 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002947 * The NFA engine also fails for patterns that it can't handle well
2948 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002949 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002950 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002951 if (regexp_engine == AUTOMATIC_ENGINE
2952 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002953 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002954 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002955#ifdef FEAT_EVAL
2956 report_re_switch(expr);
2957#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002958 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002959 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002960 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002961
Bram Moolenaarfda37292014-11-05 14:27:36 +01002962 if (prog != NULL)
2963 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002964 // Store the info needed to call regcomp() again when the engine turns
2965 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002966 prog->re_engine = regexp_engine;
2967 prog->re_flags = re_flags;
2968 }
2969
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002970 return prog;
2971}
2972
2973/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002974 * Free a compiled regexp program, returned by vim_regcomp().
2975 */
2976 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002977vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002978{
2979 if (prog != NULL)
2980 prog->engine->regfree(prog);
2981}
2982
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002983#if defined(EXITFREE) || defined(PROTO)
2984 void
2985free_regexp_stuff(void)
2986{
2987 ga_clear(&regstack);
2988 ga_clear(&backpos);
2989 vim_free(reg_tofree);
2990 vim_free(reg_prev_sub);
2991}
2992#endif
2993
Bram Moolenaarfda37292014-11-05 14:27:36 +01002994#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002995 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002996report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002997{
2998 if (p_verbose > 0)
2999 {
3000 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01003001 msg_puts(_("Switching to backtracking RE engine for pattern: "));
3002 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01003003 verbose_leave();
3004 }
3005}
3006#endif
3007
Bram Moolenaar651fca82021-11-29 20:39:38 +00003008#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02003009/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01003010 * Return whether "prog" is currently being executed.
3011 */
3012 int
3013regprog_in_use(regprog_T *prog)
3014{
3015 return prog->re_in_use;
3016}
Bram Moolenaar113e1072019-01-20 15:30:40 +01003017#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01003018
3019/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003020 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01003021 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003022 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003023 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003024 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003025 *
3026 * Return TRUE if there is a match, FALSE if not.
3027 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01003028 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003029vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01003030 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003031 char_u *line, // string to match against
3032 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01003033 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01003034{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003035 int result;
3036 regexec_T rex_save;
3037 int rex_in_use_save = rex_in_use;
3038
Bram Moolenaar0270f382018-07-17 05:43:58 +02003039 // Cannot use the same prog recursively, it contains state.
3040 if (rmp->regprog->re_in_use)
3041 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00003042 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02003043 return FALSE;
3044 }
3045 rmp->regprog->re_in_use = TRUE;
3046
Bram Moolenaar6100d022016-10-02 16:51:57 +02003047 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003048 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02003049 rex_save = rex;
3050 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003051
Bram Moolenaar6100d022016-10-02 16:51:57 +02003052 rex.reg_startp = NULL;
3053 rex.reg_endp = NULL;
3054 rex.reg_startpos = NULL;
3055 rex.reg_endpos = NULL;
3056
3057 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02003058 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003059
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003060 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003061 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
3062 && result == NFA_TOO_EXPENSIVE)
3063 {
3064 int save_p_re = p_re;
3065 int re_flags = rmp->regprog->re_flags;
3066 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
3067
3068 p_re = BACKTRACKING_ENGINE;
3069 vim_regfree(rmp->regprog);
3070 if (pat != NULL)
3071 {
3072#ifdef FEAT_EVAL
3073 report_re_switch(pat);
3074#endif
3075 rmp->regprog = vim_regcomp(pat, re_flags);
3076 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003077 {
3078 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003079 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02003080 rmp->regprog->re_in_use = FALSE;
3081 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003082 vim_free(pat);
3083 }
3084
3085 p_re = save_p_re;
3086 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02003087
3088 rex_in_use = rex_in_use_save;
3089 if (rex_in_use)
3090 rex = rex_save;
3091
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003092 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003093}
3094
Dominique Pellee764d1b2023-03-12 21:20:59 +00003095#if defined(FEAT_SPELL) || defined(FEAT_EVAL) || defined(FEAT_X11) || defined(PROTO)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003096/*
3097 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003098 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003099 */
3100 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003101vim_regexec_prog(
3102 regprog_T **prog,
3103 int ignore_case,
3104 char_u *line,
3105 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003106{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003107 int r;
3108 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003109
3110 regmatch.regprog = *prog;
3111 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003112 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003113 *prog = regmatch.regprog;
3114 return r;
3115}
Dominique Pellee764d1b2023-03-12 21:20:59 +00003116#endif
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003117
3118/*
3119 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003120 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003121 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003122 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003123vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003124{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003125 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003126}
3127
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003128/*
3129 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003130 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003131 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003132 */
3133 int
Bram Moolenaar05540972016-01-30 20:31:25 +01003134vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003135{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003136 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003137}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003138
3139/*
3140 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003141 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
3142 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003143 * Uses curbuf for line count and 'iskeyword'.
3144 *
3145 * Return zero if there is no match. Return number of lines contained in the
3146 * match otherwise.
3147 */
3148 long
Bram Moolenaar05540972016-01-30 20:31:25 +01003149vim_regexec_multi(
3150 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003151 win_T *win, // window in which to search or NULL
3152 buf_T *buf, // buffer in which to search
3153 linenr_T lnum, // nr of line to start looking for match
3154 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003155 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003156{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003157 int result;
3158 regexec_T rex_save;
3159 int rex_in_use_save = rex_in_use;
3160
Bram Moolenaar0270f382018-07-17 05:43:58 +02003161 // Cannot use the same prog recursively, it contains state.
3162 if (rmp->regprog->re_in_use)
3163 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00003164 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02003165 return FALSE;
3166 }
3167 rmp->regprog->re_in_use = TRUE;
3168
Bram Moolenaar6100d022016-10-02 16:51:57 +02003169 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003170 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02003171 rex_save = rex;
3172 rex_in_use = TRUE;
3173
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003174 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003175 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003176 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003177
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003178 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003179 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
3180 && result == NFA_TOO_EXPENSIVE)
3181 {
3182 int save_p_re = p_re;
3183 int re_flags = rmp->regprog->re_flags;
3184 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
3185
3186 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003187 if (pat != NULL)
3188 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003189 regprog_T *prev_prog = rmp->regprog;
3190
Bram Moolenaarfda37292014-11-05 14:27:36 +01003191#ifdef FEAT_EVAL
3192 report_re_switch(pat);
3193#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003194#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003195 // checking for \z misuse was already done when compiling for NFA,
3196 // allow all here
3197 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003198#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01003199 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003200#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003201 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003202#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003203 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003204 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003205 // Somehow compiling the pattern failed now, put back the
3206 // previous one to avoid "regprog" becoming NULL.
3207 rmp->regprog = prev_prog;
3208 }
3209 else
3210 {
3211 vim_regfree(prev_prog);
3212
Bram Moolenaar41499802018-07-18 06:02:09 +02003213 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003214 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003215 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003216 rmp->regprog->re_in_use = FALSE;
3217 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003218 vim_free(pat);
3219 }
3220 p_re = save_p_re;
3221 }
3222
Bram Moolenaar6100d022016-10-02 16:51:57 +02003223 rex_in_use = rex_in_use_save;
3224 if (rex_in_use)
3225 rex = rex_save;
3226
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003227 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003228}