blob: 0e6c746df81975c55abc98708f200a93da7f01e6 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
Bram Moolenaar155f2d12022-06-20 13:38:33 +010024static sig_atomic_t dummy_timeout_flag = 0;
25static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010026#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
Bram Moolenaar0f618382022-08-26 21:33:04 +010054static int timeout_nesting = 0;
55
56/*
57 * Start a timer that will cause the regexp to abort after "msec".
58 * This doesn't work well recursively. In case it happens anyway, the first
59 * set timeout will prevail, nested ones are ignored.
60 * The caller must make sure there is a matching disable_regexp_timeout() call!
61 */
Paul Ollis65745772022-06-05 16:55:54 +010062 void
63init_regexp_timeout(long msec)
64{
Bram Moolenaar0f618382022-08-26 21:33:04 +010065 if (timeout_nesting == 0)
66 timeout_flag = start_timeout(msec);
67 ++timeout_nesting;
Paul Ollis65745772022-06-05 16:55:54 +010068}
69
70 void
71disable_regexp_timeout(void)
72{
Bram Moolenaar0f618382022-08-26 21:33:04 +010073 if (timeout_nesting == 0)
74 iemsg("disable_regexp_timeout() called without active timer");
75 else if (--timeout_nesting == 0)
76 {
77 stop_timeout();
78 timeout_flag = &dummy_timeout_flag;
79 }
Paul Ollis65745772022-06-05 16:55:54 +010080}
81#endif
82
Bram Moolenaar9781d9c2022-09-20 13:51:25 +010083#if defined(FEAT_EVAL) || defined(PROTO)
84# ifdef FEAT_RELTIME
85static sig_atomic_t *saved_timeout_flag;
86# endif
87
88/*
89 * Used at the debug prompt: disable the timeout so that expression evaluation
90 * can used patterns.
91 * Must be followed by calling restore_timeout_for_debugging().
92 */
93 void
94save_timeout_for_debugging(void)
95{
96# ifdef FEAT_RELTIME
97 saved_timeout_flag = (sig_atomic_t *)timeout_flag;
98 timeout_flag = &dummy_timeout_flag;
99# endif
100}
101
102 void
103restore_timeout_for_debugging(void)
104{
105# ifdef FEAT_RELTIME
106 timeout_flag = saved_timeout_flag;
107# endif
108}
109#endif
110
Bram Moolenaar071d4272004-06-13 20:20:40 +0000111/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200112 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +0000113 * number; the start node begins in the second byte. It's used to catch the
114 * most severe mutilation of the program by the caller.
115 */
116
117#define REGMAGIC 0234
118
119/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 * Utility definitions.
121 */
122#define UCHARAT(p) ((int)*(char_u *)(p))
123
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100124// Used for an error (down from) vim_regcomp(): give the error message, set
125// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100126#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
127#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
128#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
129#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +0100130#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100131#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +0000132#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000133
Bram Moolenaar95f09602016-11-10 20:01:45 +0100134
Bram Moolenaar071d4272004-06-13 20:20:40 +0000135#define MAX_LIMIT (32767L << 16L)
136
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137#define NOT_MULTI 0
138#define MULTI_ONE 1
139#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200140
141// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100142#define RA_FAIL 1 // something failed, abort
143#define RA_CONT 2 // continue in inner loop
144#define RA_BREAK 3 // break inner loop
145#define RA_MATCH 4 // successful match
146#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200147
Bram Moolenaar071d4272004-06-13 20:20:40 +0000148/*
149 * Return NOT_MULTI if c is not a "multi" operator.
150 * Return MULTI_ONE if c is a single "multi" operator.
151 * Return MULTI_MULT if c is a multi "multi" operator.
152 */
153 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100154re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000155{
156 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
157 return MULTI_ONE;
158 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
159 return MULTI_MULT;
160 return NOT_MULTI;
161}
162
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000163static char_u *reg_prev_sub = NULL;
164
Bram Moolenaar071d4272004-06-13 20:20:40 +0000165/*
166 * REGEXP_INRANGE contains all characters which are always special in a []
167 * range after '\'.
168 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
169 * These are:
170 * \n - New line (NL).
171 * \r - Carriage Return (CR).
172 * \t - Tab (TAB).
173 * \e - Escape (ESC).
174 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000175 * \d - Character code in decimal, eg \d123
176 * \o - Character code in octal, eg \o80
177 * \x - Character code in hex, eg \x4a
178 * \u - Multibyte character code, eg \u20ac
179 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000180 */
181static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000182static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000183
Bram Moolenaar071d4272004-06-13 20:20:40 +0000184/*
185 * Translate '\x' to its control character, except "\n", which is Magic.
186 */
187 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100188backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000189{
190 switch (c)
191 {
192 case 'r': return CAR;
193 case 't': return TAB;
194 case 'e': return ESC;
195 case 'b': return BS;
196 }
197 return c;
198}
199
200/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000201 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000202 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
203 * recognized. Otherwise "pp" is advanced to after the item.
204 */
205 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100206get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000207{
208 static const char *(class_names[]) =
209 {
210 "alnum:]",
211#define CLASS_ALNUM 0
212 "alpha:]",
213#define CLASS_ALPHA 1
214 "blank:]",
215#define CLASS_BLANK 2
216 "cntrl:]",
217#define CLASS_CNTRL 3
218 "digit:]",
219#define CLASS_DIGIT 4
220 "graph:]",
221#define CLASS_GRAPH 5
222 "lower:]",
223#define CLASS_LOWER 6
224 "print:]",
225#define CLASS_PRINT 7
226 "punct:]",
227#define CLASS_PUNCT 8
228 "space:]",
229#define CLASS_SPACE 9
230 "upper:]",
231#define CLASS_UPPER 10
232 "xdigit:]",
233#define CLASS_XDIGIT 11
234 "tab:]",
235#define CLASS_TAB 12
236 "return:]",
237#define CLASS_RETURN 13
238 "backspace:]",
239#define CLASS_BACKSPACE 14
240 "escape:]",
241#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100242 "ident:]",
243#define CLASS_IDENT 16
244 "keyword:]",
245#define CLASS_KEYWORD 17
246 "fname:]",
247#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000248 };
249#define CLASS_NONE 99
250 int i;
251
252 if ((*pp)[1] == ':')
253 {
K.Takataeeec2542021-06-02 13:28:16 +0200254 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
256 {
257 *pp += STRLEN(class_names[i]) + 2;
258 return i;
259 }
260 }
261 return CLASS_NONE;
262}
263
264/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000265 * Specific version of character class functions.
266 * Using a table to keep this fast.
267 */
268static short class_tab[256];
269
270#define RI_DIGIT 0x01
271#define RI_HEX 0x02
272#define RI_OCTAL 0x04
273#define RI_WORD 0x08
274#define RI_HEAD 0x10
275#define RI_ALPHA 0x20
276#define RI_LOWER 0x40
277#define RI_UPPER 0x80
278#define RI_WHITE 0x100
279
280 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100281init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000282{
283 int i;
284 static int done = FALSE;
285
286 if (done)
287 return;
288
289 for (i = 0; i < 256; ++i)
290 {
291 if (i >= '0' && i <= '7')
292 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
293 else if (i >= '8' && i <= '9')
294 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
295 else if (i >= 'a' && i <= 'f')
296 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000298 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
299 else if (i >= 'A' && i <= 'F')
300 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000301 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000302 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
303 else if (i == '_')
304 class_tab[i] = RI_WORD + RI_HEAD;
305 else
306 class_tab[i] = 0;
307 }
308 class_tab[' '] |= RI_WHITE;
309 class_tab['\t'] |= RI_WHITE;
310 done = TRUE;
311}
312
kylo252ae6f1d82022-02-16 19:24:07 +0000313#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
314#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
315#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
316#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
317#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
318#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
319#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
320#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
321#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000322
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100323// flags for regflags
324#define RF_ICASE 1 // ignore case
325#define RF_NOICASE 2 // don't ignore case
326#define RF_HASNL 4 // can match a NL
327#define RF_ICOMBINE 8 // ignore combining characters
328#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000329
330/*
331 * Global work variables for vim_regcomp().
332 */
333
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100334static char_u *regparse; // Input-scan pointer.
335static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100336static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000337#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100338static int regnzpar; // \z() count.
339static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000340#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100341static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000342#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100343static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100346static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000347
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100348static int reg_string; // matching with a string instead of a buffer
349 // line
350static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000351
352/*
353 * META contains all characters that may be magic, except '^' and '$'.
354 */
355
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100356// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000357static char_u META_flags[] = {
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100360// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000361 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100362// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000363 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100364// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000365 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100366// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000367 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100368// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000369 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100370// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000371 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
372};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000373
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100374static int curchr; // currently parsed character
375// Previous character. Note: prevchr is sometimes -1 when we are not at the
376// start, eg in /[ ^I]^ the pattern was never found even if it existed,
377// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200378static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100379static int prevprevchr; // previous-previous character
380static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000381
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100382// arguments for reg()
383#define REG_NOPAREN 0 // toplevel reg()
384#define REG_PAREN 1 // \(\)
385#define REG_ZPAREN 2 // \z(\)
386#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000387
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200388typedef struct
389{
390 char_u *regparse;
391 int prevchr_len;
392 int curchr;
393 int prevchr;
394 int prevprevchr;
395 int nextchr;
396 int at_start;
397 int prev_at_start;
398 int regnpar;
399} parse_state_T;
400
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100401static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100402static int getchr(void);
403static void skipchr_keepstart(void);
404static int peekchr(void);
405static void skipchr(void);
406static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100407static long gethexchrs(int maxinputlen);
408static long getoctchrs(void);
409static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100410static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100411static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200412static int cstrncmp(char_u *s1, char_u *s2, int *n);
413static char_u *cstrchr(char_u *, int);
414static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100415static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100416#ifdef FEAT_EVAL
417static void report_re_switch(char_u *pat);
418#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000419
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200420static regengine_T bt_regengine;
421static regengine_T nfa_regengine;
422
Bram Moolenaar071d4272004-06-13 20:20:40 +0000423/*
424 * Return TRUE if compiled regular expression "prog" can match a line break.
425 */
426 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100427re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428{
429 return (prog->regflags & RF_HASNL);
430}
431
432/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000433 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
434 * Returns a character representing the class. Zero means that no item was
435 * recognized. Otherwise "pp" is advanced to after the item.
436 */
437 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100438get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000439{
440 int c;
441 int l = 1;
442 char_u *p = *pp;
443
Bram Moolenaar985079c2019-02-16 17:07:47 +0100444 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000445 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000446 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000447 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000448 if (p[l + 2] == '=' && p[l + 3] == ']')
449 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000450 if (has_mbyte)
451 c = mb_ptr2char(p + 2);
452 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000453 c = p[2];
454 *pp += l + 4;
455 return c;
456 }
457 }
458 return 0;
459}
460
461/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000462 * Check for a collating element "[.a.]". "pp" points to the '['.
463 * Returns a character. Zero means that no item was recognized. Otherwise
464 * "pp" is advanced to after the item.
465 * Currently only single characters are recognized!
466 */
467 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100468get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469{
470 int c;
471 int l = 1;
472 char_u *p = *pp;
473
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100474 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000475 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000476 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000477 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000478 if (p[l + 2] == '.' && p[l + 3] == ']')
479 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000480 if (has_mbyte)
481 c = mb_ptr2char(p + 2);
482 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000483 c = p[2];
484 *pp += l + 4;
485 return c;
486 }
487 }
488 return 0;
489}
490
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100491static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
492static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200493
494 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100495get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200496{
497 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
498 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
499}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000500
501/*
502 * Skip over a "[]" range.
503 * "p" must point to the character after the '['.
504 * The returned pointer is on the matching ']', or the terminating NUL.
505 */
506 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100507skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000508{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000509 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000510
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100511 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000512 ++p;
513 if (*p == ']' || *p == '-')
514 ++p;
515 while (*p != NUL && *p != ']')
516 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000517 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000518 p += l;
519 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000520 if (*p == '-')
521 {
522 ++p;
523 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100524 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000525 }
526 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200527 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000528 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200529 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000530 p += 2;
531 else if (*p == '[')
532 {
533 if (get_char_class(&p) == CLASS_NONE
534 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200535 && get_coll_element(&p) == 0
536 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100537 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000538 }
539 else
540 ++p;
541 }
542
543 return p;
544}
545
546/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000547 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200548 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000549 * Take care of characters with a backslash in front of it.
550 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000551 */
552 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100553skip_regexp(
554 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200555 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200556 int magic)
557{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100558 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200559}
560
561/*
562 * Call skip_regexp() and when the delimiter does not match give an error and
563 * return NULL.
564 */
565 char_u *
566skip_regexp_err(
567 char_u *startp,
568 int delim,
569 int magic)
570{
571 char_u *p = skip_regexp(startp, delim, magic);
572
573 if (*p != delim)
574 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000575 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200576 return NULL;
577 }
578 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200579}
580
581/*
582 * skip_regexp() with extra arguments:
583 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
584 * expression and change "\?" to "?". If "*newp" is not NULL the expression
585 * is changed in-place.
586 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100587 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200588 */
589 char_u *
590skip_regexp_ex(
591 char_u *startp,
592 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100593 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200594 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100595 int *dropped,
596 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000597{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100598 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000599 char_u *p = startp;
600
601 if (magic)
602 mymagic = MAGIC_ON;
603 else
604 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200605 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000606
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100607 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000608 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100609 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000610 break;
611 if ((p[0] == '[' && mymagic >= MAGIC_ON)
612 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
613 {
614 p = skip_anyof(p + 1);
615 if (p[0] == NUL)
616 break;
617 }
618 else if (p[0] == '\\' && p[1] != NUL)
619 {
620 if (dirc == '?' && newp != NULL && p[1] == '?')
621 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100622 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000623 if (*newp == NULL)
624 {
625 *newp = vim_strsave(startp);
626 if (*newp != NULL)
627 p = *newp + (p - startp);
628 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200629 if (dropped != NULL)
630 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000631 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000632 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000633 else
634 ++p;
635 }
636 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100637 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000638 if (*p == 'v')
639 mymagic = MAGIC_ALL;
640 else if (*p == 'V')
641 mymagic = MAGIC_NONE;
642 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000643 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100644 if (magic_val != NULL)
645 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000646 return p;
647}
648
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200649/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200650 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200651 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100652static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200653static int at_start; // True when on the first character
654static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100655
Bram Moolenaar071d4272004-06-13 20:20:40 +0000656/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200657 * Start parsing at "str".
658 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000659 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100660initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000661{
662 regparse = str;
663 prevchr_len = 0;
664 curchr = prevprevchr = prevchr = nextchr = -1;
665 at_start = TRUE;
666 prev_at_start = FALSE;
667}
668
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200669/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200670 * Save the current parse state, so that it can be restored and parsing
671 * starts in the same state again.
672 */
673 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100674save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200675{
676 ps->regparse = regparse;
677 ps->prevchr_len = prevchr_len;
678 ps->curchr = curchr;
679 ps->prevchr = prevchr;
680 ps->prevprevchr = prevprevchr;
681 ps->nextchr = nextchr;
682 ps->at_start = at_start;
683 ps->prev_at_start = prev_at_start;
684 ps->regnpar = regnpar;
685}
686
687/*
688 * Restore a previously saved parse state.
689 */
690 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100691restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200692{
693 regparse = ps->regparse;
694 prevchr_len = ps->prevchr_len;
695 curchr = ps->curchr;
696 prevchr = ps->prevchr;
697 prevprevchr = ps->prevprevchr;
698 nextchr = ps->nextchr;
699 at_start = ps->at_start;
700 prev_at_start = ps->prev_at_start;
701 regnpar = ps->regnpar;
702}
703
704
705/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200706 * Get the next character without advancing.
707 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000708 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100709peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000710{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000711 static int after_slash = FALSE;
712
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000713 if (curchr != -1)
714 return curchr;
715
716 switch (curchr = regparse[0])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717 {
Bram Moolenaar071d4272004-06-13 20:20:40 +0000718 case '.':
719 case '[':
720 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100721 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000722 if (reg_magic >= MAGIC_ON)
723 curchr = Magic(curchr);
724 break;
725 case '(':
726 case ')':
727 case '{':
728 case '%':
729 case '+':
730 case '=':
731 case '?':
732 case '@':
733 case '!':
734 case '&':
735 case '|':
736 case '<':
737 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100738 case '#': // future ext.
739 case '"': // future ext.
740 case '\'': // future ext.
741 case ',': // future ext.
742 case '-': // future ext.
743 case ':': // future ext.
744 case ';': // future ext.
745 case '`': // future ext.
746 case '/': // Can't be used in / command
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000747 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000748 if (reg_magic == MAGIC_ALL)
749 curchr = Magic(curchr);
750 break;
751 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100752 // * is not magic as the very first character, eg "?*ptr", when
753 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
754 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000755 if (reg_magic >= MAGIC_ON
756 && !at_start
757 && !(prev_at_start && prevchr == Magic('^'))
758 && (after_slash
759 || (prevchr != Magic('(')
760 && prevchr != Magic('&')
761 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000762 curchr = Magic('*');
763 break;
764 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100765 // '^' is only magic as the very first character and if it's after
766 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000767 if (reg_magic >= MAGIC_OFF
768 && (at_start
769 || reg_magic == MAGIC_ALL
770 || prevchr == Magic('(')
771 || prevchr == Magic('|')
772 || prevchr == Magic('&')
773 || prevchr == Magic('n')
774 || (no_Magic(prevchr) == '('
775 && prevprevchr == Magic('%'))))
776 {
777 curchr = Magic('^');
778 at_start = TRUE;
779 prev_at_start = FALSE;
780 }
781 break;
782 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100783 // '$' is only magic as the very last char and if it's in front of
784 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000785 if (reg_magic >= MAGIC_OFF)
786 {
787 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200788 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100790 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000791 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000792 || p[1] == 'm' || p[1] == 'M'
793 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200794 {
795 if (p[1] == 'v')
796 is_magic_all = TRUE;
797 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
798 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000799 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200800 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000801 if (p[0] == NUL
802 || (p[0] == '\\'
803 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
804 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200805 || (is_magic_all
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +0000806 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 || reg_magic == MAGIC_ALL)
808 curchr = Magic('$');
809 }
810 break;
811 case '\\':
812 {
813 int c = regparse[1];
814
815 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100816 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000817 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000818 {
819 /*
820 * META contains everything that may be magic sometimes,
821 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200822 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000823 * magicness. Therefore, \ is so meta-magic that it is
824 * not in META.
825 */
826 curchr = -1;
827 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100828 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000829 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000830 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831 peekchr();
832 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000833 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000834 curchr = toggle_Magic(curchr);
835 }
836 else if (vim_strchr(REGEXP_ABBR, c))
837 {
838 /*
839 * Handle abbreviations, like "\t" for TAB -- webb
840 */
841 curchr = backslash_trans(c);
842 }
843 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
844 curchr = toggle_Magic(c);
845 else
846 {
847 /*
848 * Next character can never be (made) magic?
849 * Then backslashing it won't do anything.
850 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851 if (has_mbyte)
852 curchr = (*mb_ptr2char)(regparse + 1);
853 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000854 curchr = c;
855 }
856 break;
857 }
858
Bram Moolenaar071d4272004-06-13 20:20:40 +0000859 default:
860 if (has_mbyte)
861 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000862 }
863
864 return curchr;
865}
866
867/*
868 * Eat one lexed character. Do this in a way that we can undo it.
869 */
870 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100871skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000872{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100873 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000874 if (*regparse == '\\')
875 prevchr_len = 1;
876 else
877 prevchr_len = 0;
878 if (regparse[prevchr_len] != NUL)
879 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000880 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100881 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000882 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000883 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000884 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000885 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000886 ++prevchr_len;
887 }
888 regparse += prevchr_len;
889 prev_at_start = at_start;
890 at_start = FALSE;
891 prevprevchr = prevchr;
892 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100893 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000894 nextchr = -1;
895}
896
897/*
898 * Skip a character while keeping the value of prev_at_start for at_start.
899 * prevchr and prevprevchr are also kept.
900 */
901 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100902skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000903{
904 int as = prev_at_start;
905 int pr = prevchr;
906 int prpr = prevprevchr;
907
908 skipchr();
909 at_start = as;
910 prevchr = pr;
911 prevprevchr = prpr;
912}
913
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200914/*
915 * Get the next character from the pattern. We know about magic and such, so
916 * therefore we need a lexical analyzer.
917 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000918 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100919getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000920{
921 int chr = peekchr();
922
923 skipchr();
924 return chr;
925}
926
927/*
928 * put character back. Works only once!
929 */
930 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100931ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000932{
933 nextchr = curchr;
934 curchr = prevchr;
935 prevchr = prevprevchr;
936 at_start = prev_at_start;
937 prev_at_start = FALSE;
938
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100939 // Backup regparse, so that it's at the same position as before the
940 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000941 regparse -= prevchr_len;
942}
943
944/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000945 * Get and return the value of the hex string at the current position.
946 * Return -1 if there is no valid hex number.
947 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000948 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000949 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000950 * The parameter controls the maximum number of input characters. This will be
951 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
952 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100953 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100954gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000955{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100956 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000957 int c;
958 int i;
959
960 for (i = 0; i < maxinputlen; ++i)
961 {
962 c = regparse[0];
963 if (!vim_isxdigit(c))
964 break;
965 nr <<= 4;
966 nr |= hex2nr(c);
967 ++regparse;
968 }
969
970 if (i == 0)
971 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100972 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000973}
974
975/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200976 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000977 * current position. Return -1 for invalid. Consumes all digits.
978 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100979 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100980getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000981{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100982 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000983 int c;
984 int i;
985
986 for (i = 0; ; ++i)
987 {
988 c = regparse[0];
989 if (c < '0' || c > '9')
990 break;
991 nr *= 10;
992 nr += c - '0';
993 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100994 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000995 }
996
997 if (i == 0)
998 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100999 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001000}
1001
1002/*
1003 * get and return the value of the octal string immediately after the current
1004 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
1005 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
1006 * treat 8 or 9 as recognised characters. Position is updated:
1007 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00001008 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001009 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001010 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01001011getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001012{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001013 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001014 int c;
1015 int i;
1016
1017 for (i = 0; i < 3 && nr < 040; ++i)
1018 {
1019 c = regparse[0];
1020 if (c < '0' || c > '7')
1021 break;
1022 nr <<= 3;
1023 nr |= hex2nr(c);
1024 ++regparse;
1025 }
1026
1027 if (i == 0)
1028 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001029 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001030}
1031
1032/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001033 * read_limits - Read two integers to be taken as a minimum and maximum.
1034 * If the first character is '-', then the range is reversed.
1035 * Should end with 'end'. If minval is missing, zero is default, if maxval is
1036 * missing, a very big number is the default.
1037 */
1038 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001039read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001040{
1041 int reverse = FALSE;
1042 char_u *first_char;
1043 long tmp;
1044
1045 if (*regparse == '-')
1046 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001047 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001048 regparse++;
1049 reverse = TRUE;
1050 }
1051 first_char = regparse;
1052 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001053 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001054 {
1055 if (vim_isdigit(*++regparse))
1056 *maxval = getdigits(&regparse);
1057 else
1058 *maxval = MAX_LIMIT;
1059 }
1060 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001061 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001062 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001063 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001064 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001065 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001066 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001067 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001068 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001069
1070 /*
1071 * Reverse the range if there was a '-', or make sure it is in the right
1072 * order otherwise.
1073 */
1074 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1075 {
1076 tmp = *minval;
1077 *minval = *maxval;
1078 *maxval = tmp;
1079 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001080 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001081 return OK;
1082}
1083
1084/*
1085 * vim_regexec and friends
1086 */
1087
1088/*
1089 * Global work variables for vim_regexec().
1090 */
1091
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001092static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001093#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001094static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001095#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001096static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001097
1098/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001099 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1100 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001101 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001102 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001103static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001104static unsigned reg_tofreelen;
1105
1106/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001107 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001108 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001109 * done:
1110 * single-line multi-line
1111 * reg_match &regmatch_T NULL
1112 * reg_mmatch NULL &regmmatch_T
1113 * reg_startp reg_match->startp <invalid>
1114 * reg_endp reg_match->endp <invalid>
1115 * reg_startpos <invalid> reg_mmatch->startpos
1116 * reg_endpos <invalid> reg_mmatch->endpos
1117 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001118 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001119 * reg_firstlnum <invalid> first line in which to search
1120 * reg_maxline 0 last line nr
1121 * reg_line_lbr FALSE or TRUE FALSE
1122 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001123typedef struct {
1124 regmatch_T *reg_match;
1125 regmmatch_T *reg_mmatch;
Bram Moolenaar01105b32022-11-26 11:47:10 +00001126
Bram Moolenaar6100d022016-10-02 16:51:57 +02001127 char_u **reg_startp;
1128 char_u **reg_endp;
1129 lpos_T *reg_startpos;
1130 lpos_T *reg_endpos;
Bram Moolenaar01105b32022-11-26 11:47:10 +00001131
Bram Moolenaar6100d022016-10-02 16:51:57 +02001132 win_T *reg_win;
1133 buf_T *reg_buf;
1134 linenr_T reg_firstlnum;
1135 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001136 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001137
Bram Moolenaar0270f382018-07-17 05:43:58 +02001138 // The current match-position is stord in these variables:
1139 linenr_T lnum; // line number, relative to first line
1140 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001141 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001142
1143 int need_clear_subexpr; // subexpressions still need to be cleared
1144#ifdef FEAT_SYN_HL
1145 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1146 // cleared
1147#endif
1148
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001149 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1150 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1151 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001152 int reg_ic;
1153
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001154 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1155 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001156 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001157
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001158 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1159 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001160 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001161
1162 // State for the NFA engine regexec.
1163 int nfa_has_zend; // NFA regexp \ze operator encountered.
1164 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1165 int nfa_nsubexpr; // Number of sub expressions actually being used
1166 // during execution. 1 if only the whole match
1167 // (subexpr 0) is used.
1168 // listid is global, so that it increases on recursive calls to
1169 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1170 // all the states.
1171 int nfa_listid;
1172 int nfa_alt_listid;
1173
1174#ifdef FEAT_SYN_HL
1175 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1176#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001177} regexec_T;
1178
1179static regexec_T rex;
1180static int rex_in_use = FALSE;
1181
Bram Moolenaar071d4272004-06-13 20:20:40 +00001182/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001183 * Return TRUE if character 'c' is included in 'iskeyword' option for
1184 * "reg_buf" buffer.
1185 */
1186 static int
1187reg_iswordc(int c)
1188{
1189 return vim_iswordc_buf(c, rex.reg_buf);
1190}
1191
1192/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001193 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1194 */
1195 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001196reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001197{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001198 // when looking behind for a match/no-match lnum is negative. But we
1199 // can't go before line 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001200 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001201 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001202 if (lnum > rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001203 // Must have matched the "\n" in the last line.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001204 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001205 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206}
1207
Bram Moolenaar071d4272004-06-13 20:20:40 +00001208#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001209static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1210static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1211static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1212static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001213#endif
1214
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001215// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001216#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001217
Bram Moolenaar071d4272004-06-13 20:20:40 +00001218#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001219/*
1220 * Create a new extmatch and mark it as referenced once.
1221 */
1222 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001223make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001224{
1225 reg_extmatch_T *em;
1226
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001227 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001228 if (em != NULL)
1229 em->refcnt = 1;
1230 return em;
1231}
1232
1233/*
1234 * Add a reference to an extmatch.
1235 */
1236 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001237ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238{
1239 if (em != NULL)
1240 em->refcnt++;
1241 return em;
1242}
1243
1244/*
1245 * Remove a reference to an extmatch. If there are no references left, free
1246 * the info.
1247 */
1248 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001249unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001250{
1251 int i;
1252
1253 if (em != NULL && --em->refcnt <= 0)
1254 {
1255 for (i = 0; i < NSUBEXP; ++i)
1256 vim_free(em->matches[i]);
1257 vim_free(em);
1258 }
1259}
1260#endif
1261
1262/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001263 * Get class of previous character.
1264 */
1265 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001266reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001268 if (rex.input > rex.line)
1269 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001270 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 return -1;
1272}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001273
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001274/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001275 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001276 */
1277 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001278reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001279{
1280 pos_T top, bot;
1281 linenr_T lnum;
1282 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001283 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001284 int mode;
1285 colnr_T start, end;
1286 colnr_T start2, end2;
1287 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001288 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001289
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001290 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001291 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001292 return FALSE;
1293
1294 if (VIsual_active)
1295 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001296 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001297 {
1298 top = VIsual;
1299 bot = wp->w_cursor;
1300 }
1301 else
1302 {
1303 top = wp->w_cursor;
1304 bot = VIsual;
1305 }
1306 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001307 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001308 }
1309 else
1310 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001311 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001312 {
1313 top = curbuf->b_visual.vi_start;
1314 bot = curbuf->b_visual.vi_end;
1315 }
1316 else
1317 {
1318 top = curbuf->b_visual.vi_end;
1319 bot = curbuf->b_visual.vi_start;
1320 }
1321 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001322 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001323 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001324 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001325 if (lnum < top.lnum || lnum > bot.lnum)
1326 return FALSE;
1327
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001328 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001329 if (mode == 'v')
1330 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001331 if ((lnum == top.lnum && col < top.col)
1332 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1333 return FALSE;
1334 }
1335 else if (mode == Ctrl_V)
1336 {
1337 getvvcol(wp, &top, &start, NULL, &end);
1338 getvvcol(wp, &bot, &start2, NULL, &end2);
1339 if (start2 < start)
1340 start = start2;
1341 if (end2 > end)
1342 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001343 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001344 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001345
1346 // getvvcol() flushes rex.line, need to get it again
1347 rex.line = reg_getline(rex.lnum);
1348 rex.input = rex.line + col;
1349
Bram Moolenaar7f9969c2022-07-25 18:13:54 +01001350 cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001351 if (cols < start || cols > end - (*p_sel == 'e'))
1352 return FALSE;
1353 }
1354 return TRUE;
1355}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001356
Bram Moolenaar071d4272004-06-13 20:20:40 +00001357/*
1358 * Check the regexp program for its magic number.
1359 * Return TRUE if it's wrong.
1360 */
1361 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001362prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001363{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001364 regprog_T *prog;
1365
Bram Moolenaar6100d022016-10-02 16:51:57 +02001366 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001367 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001368 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001369 return FALSE;
1370
1371 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001373 emsg(_(e_corrupted_regexp_program));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374 return TRUE;
1375 }
1376 return FALSE;
1377}
1378
1379/*
1380 * Cleanup the subexpressions, if this wasn't done yet.
1381 * This construction is used to clear the subexpressions only when they are
1382 * used (to increase speed).
1383 */
1384 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001385cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386{
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001387 if (!rex.need_clear_subexpr)
1388 return;
1389
1390 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001391 {
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001392 // Use 0xff to set lnum to -1
1393 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1394 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001395 }
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001396 else
1397 {
1398 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1399 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
1400 }
1401 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001402}
1403
1404#ifdef FEAT_SYN_HL
1405 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001406cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001407{
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001408 if (!rex.need_clear_zsubexpr)
1409 return;
1410
1411 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001412 {
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001413 // Use 0xff to set lnum to -1
1414 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1415 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001416 }
Yegappan Lakshmananf97a2952023-01-18 18:17:48 +00001417 else
1418 {
1419 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1420 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1421 }
1422 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001423}
1424#endif
1425
1426/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001427 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001428 */
1429 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001430reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001431{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001432 rex.line = reg_getline(++rex.lnum);
1433 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001434 fast_breakcheck();
1435}
1436
1437/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001438 * Check whether a backreference matches.
1439 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001440 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1441 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001442 */
1443 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001444match_with_backref(
1445 linenr_T start_lnum,
1446 colnr_T start_col,
1447 linenr_T end_lnum,
1448 colnr_T end_col,
1449 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001450{
1451 linenr_T clnum = start_lnum;
1452 colnr_T ccol = start_col;
1453 int len;
1454 char_u *p;
1455
1456 if (bytelen != NULL)
1457 *bytelen = 0;
1458 for (;;)
1459 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001460 // Since getting one line may invalidate the other, need to make copy.
1461 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001462 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001463 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001464 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001465 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1466 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001467 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001468 vim_free(reg_tofree);
1469 reg_tofree = alloc(len);
1470 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001471 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001472 reg_tofreelen = len;
1473 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001474 STRCPY(reg_tofree, rex.line);
1475 rex.input = reg_tofree + (rex.input - rex.line);
1476 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001477 }
1478
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001479 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001480 p = reg_getline(clnum);
1481 if (clnum == end_lnum)
1482 len = end_col - ccol;
1483 else
1484 len = (int)STRLEN(p + ccol);
1485
Bram Moolenaar0270f382018-07-17 05:43:58 +02001486 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001487 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001488 if (bytelen != NULL)
1489 *bytelen += len;
1490 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001491 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001492 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001493 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001494
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001495 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001496 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001497 if (bytelen != NULL)
1498 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001499 ++clnum;
1500 ccol = 0;
1501 if (got_int)
1502 return RA_FAIL;
1503 }
1504
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001505 // found a match! Note that rex.line may now point to a copy of the line,
1506 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001507 return RA_MATCH;
1508}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001509
Bram Moolenaarfb031402014-09-09 17:18:49 +02001510/*
1511 * Used in a place where no * or \+ can follow.
1512 */
1513 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001514re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001515{
1516 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001517 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001518 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001519 rc_did_emsg = TRUE;
1520 return FAIL;
1521 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001522 return OK;
1523}
1524
Bram Moolenaar071d4272004-06-13 20:20:40 +00001525typedef struct
1526{
1527 int a, b, c;
1528} decomp_T;
1529
1530
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001531// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001532static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001533{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001534 {0x5e2,0,0}, // 0xfb20 alt ayin
1535 {0x5d0,0,0}, // 0xfb21 alt alef
1536 {0x5d3,0,0}, // 0xfb22 alt dalet
1537 {0x5d4,0,0}, // 0xfb23 alt he
1538 {0x5db,0,0}, // 0xfb24 alt kaf
1539 {0x5dc,0,0}, // 0xfb25 alt lamed
1540 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1541 {0x5e8,0,0}, // 0xfb27 alt resh
1542 {0x5ea,0,0}, // 0xfb28 alt tav
1543 {'+', 0, 0}, // 0xfb29 alt plus
1544 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1545 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1546 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1547 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1548 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1549 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1550 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1551 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1552 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1553 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1554 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1555 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1556 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1557 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1558 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1559 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1560 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1561 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1562 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1563 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1564 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1565 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1566 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1567 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1568 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1569 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1570 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1571 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1572 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1573 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1574 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1575 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1576 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1577 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1578 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1579 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1580 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1581 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001582};
1583
1584 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001585mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001586{
1587 decomp_T d;
1588
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001589 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001590 {
1591 d = decomp_table[c - 0xfb20];
1592 *c1 = d.a;
1593 *c2 = d.b;
1594 *c3 = d.c;
1595 }
1596 else
1597 {
1598 *c1 = c;
1599 *c2 = *c3 = 0;
1600 }
1601}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001602
1603/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001604 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001605 * Return 0 if strings match, non-zero otherwise.
1606 * Correct the length "*n" when composing characters are ignored.
1607 */
1608 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001609cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001610{
1611 int result;
1612
Bram Moolenaar6100d022016-10-02 16:51:57 +02001613 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001614 result = STRNCMP(s1, s2, *n);
1615 else
1616 result = MB_STRNICMP(s1, s2, *n);
1617
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001618 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001619 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001620 {
1621 char_u *str1, *str2;
1622 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001623 int junk;
1624
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001625 // we have to handle the strcmp ourselves, since it is necessary to
1626 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001627 str1 = s1;
1628 str2 = s2;
1629 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001630 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001631 {
1632 c1 = mb_ptr2char_adv(&str1);
1633 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001634
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001635 // Decompose the character if necessary, into 'base' characters.
1636 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001637 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001638 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001639 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001640 mb_decompose(c1, &c11, &junk, &junk);
1641 mb_decompose(c2, &c12, &junk, &junk);
1642 c1 = c11;
1643 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001644 if (c11 != c12
1645 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001646 break;
1647 }
1648 }
1649 result = c2 - c1;
1650 if (result == 0)
1651 *n = (int)(str2 - s2);
1652 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001653
1654 return result;
1655}
1656
1657/*
1658 * cstrchr: This function is used a lot for simple searches, keep it fast!
1659 */
1660 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001661cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001662{
1663 char_u *p;
1664 int cc;
1665
Bram Moolenaara12a1612019-01-24 16:39:02 +01001666 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001667 return vim_strchr(s, c);
1668
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001669 // tolower() and toupper() can be slow, comparing twice should be a lot
1670 // faster (esp. when using MS Visual C++!).
1671 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001672 if (enc_utf8 && c > 0x80)
1673 cc = utf_fold(c);
1674 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001675 if (MB_ISUPPER(c))
1676 cc = MB_TOLOWER(c);
1677 else if (MB_ISLOWER(c))
1678 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001679 else
1680 return vim_strchr(s, c);
1681
Bram Moolenaar071d4272004-06-13 20:20:40 +00001682 if (has_mbyte)
1683 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001684 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001685 {
1686 if (enc_utf8 && c > 0x80)
1687 {
Bram Moolenaarf5094052022-07-29 16:22:25 +01001688 int uc = utf_ptr2char(p);
1689
1690 // Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
1691 // not 0xff.
1692 if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001693 return p;
1694 }
1695 else if (*p == c || *p == cc)
1696 return p;
1697 }
1698 }
1699 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001700 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001701 for (p = s; *p != NUL; ++p)
1702 if (*p == c || *p == cc)
1703 return p;
1704
1705 return NULL;
1706}
1707
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001708////////////////////////////////////////////////////////////////
1709// regsub stuff //
1710////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001711
Bram Moolenaar071d4272004-06-13 20:20:40 +00001712/*
1713 * We should define ftpr as a pointer to a function returning a pointer to
1714 * a function returning a pointer to a function ...
1715 * This is impossible, so we declare a pointer to a function returning a
Bram Moolenaar30d64132020-09-06 17:09:12 +02001716 * void pointer. This should work for all compilers.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001717 */
Bram Moolenaar30d64132020-09-06 17:09:12 +02001718typedef void (*(*fptr_T)(int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001719
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001720static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001721
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001722 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001723do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001724{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001725 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001726
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001727 return (fptr_T)NULL;
1728}
1729
1730 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001731do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001732{
1733 *d = MB_TOUPPER(c);
1734
1735 return (fptr_T)do_Upper;
1736}
1737
1738 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001739do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001740{
1741 *d = MB_TOLOWER(c);
1742
1743 return (fptr_T)NULL;
1744}
1745
1746 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001747do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001748{
1749 *d = MB_TOLOWER(c);
1750
1751 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001752}
1753
1754/*
1755 * regtilde(): Replace tildes in the pattern by the old pattern.
1756 *
1757 * Short explanation of the tilde: It stands for the previous replacement
1758 * pattern. If that previous pattern also contains a ~ we should go back a
1759 * step further... But we insert the previous pattern into the current one
1760 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001761 * This still does not handle the case where "magic" changes. So require the
1762 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 *
1764 * The tildes are parsed once before the first call to vim_regsub().
1765 */
1766 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001767regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001768{
1769 char_u *newsub = source;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001770 char_u *p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001771
1772 for (p = newsub; *p; ++p)
1773 {
1774 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1775 {
1776 if (reg_prev_sub != NULL)
1777 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001778 // length = len(newsub) - 1 + len(prev_sub) + 1
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001779 // Avoid making the text longer than MAXCOL, it will cause
1780 // trouble at some point.
1781 size_t prevsublen = STRLEN(reg_prev_sub);
1782 size_t newsublen = STRLEN(newsub);
1783 if (prevsublen > MAXCOL || newsublen > MAXCOL
1784 || newsublen + prevsublen > MAXCOL)
1785 {
1786 emsg(_(e_resulting_text_too_long));
1787 break;
1788 }
1789
1790 char_u *tmpsub = alloc(newsublen + prevsublen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001791 if (tmpsub != NULL)
1792 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001793 // copy prefix
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001794 size_t prefixlen = p - newsub; // not including ~
1795 mch_memmove(tmpsub, newsub, prefixlen);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001796 // interpret tilde
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001797 mch_memmove(tmpsub + prefixlen, reg_prev_sub,
1798 prevsublen);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001799 // copy postfix
Bram Moolenaar071d4272004-06-13 20:20:40 +00001800 if (!magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001801 ++p; // back off backslash
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001802 STRCPY(tmpsub + prefixlen + prevsublen, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001803
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001804 if (newsub != source) // allocated newsub before
Bram Moolenaar071d4272004-06-13 20:20:40 +00001805 vim_free(newsub);
1806 newsub = tmpsub;
Bram Moolenaarab9a2d82023-05-09 21:15:30 +01001807 p = newsub + prefixlen + prevsublen;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001808 }
1809 }
1810 else if (magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001811 STRMOVE(p, p + 1); // remove '~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001812 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001813 STRMOVE(p, p + 2); // remove '\~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001814 --p;
1815 }
1816 else
1817 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001818 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001819 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001820 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001821 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001822 }
1823 }
1824
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001825 // Store a copy of newsub in reg_prev_sub. It is always allocated,
1826 // because recursive calls may make the returned string invalid.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001827 vim_free(reg_prev_sub);
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001828 reg_prev_sub = vim_strsave(newsub);
1829
Bram Moolenaar071d4272004-06-13 20:20:40 +00001830 return newsub;
1831}
1832
1833#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001834static int can_f_submatch = FALSE; // TRUE when submatch() can be used
Bram Moolenaar071d4272004-06-13 20:20:40 +00001835
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001836// These pointers are used for reg_submatch(). Needed for when the
1837// substitution string is an expression that contains a call to substitute()
1838// and submatch().
Bram Moolenaar6100d022016-10-02 16:51:57 +02001839typedef struct {
1840 regmatch_T *sm_match;
1841 regmmatch_T *sm_mmatch;
1842 linenr_T sm_firstlnum;
1843 linenr_T sm_maxline;
1844 int sm_line_lbr;
1845} regsubmatch_T;
1846
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001847static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
Bram Moolenaar071d4272004-06-13 20:20:40 +00001848#endif
1849
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001850#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001851
1852/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001853 * Put the submatches in "argv[argskip]" which is a list passed into
1854 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001855 */
1856 static int
zeertzjq48db5da2022-09-16 12:10:03 +01001857fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001858{
1859 listitem_T *li;
1860 int i;
1861 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001862 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001863
zeertzjqabd58d82022-09-16 16:06:32 +01001864 if (!has_varargs(fp) && fp->uf_args.ga_len <= argskip)
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001865 // called function doesn't take a submatches argument
1866 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001867
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001868 // Relies on sl_list to be the first item in staticList10_T.
1869 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001870
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001871 // There are always 10 list items in staticList10_T.
1872 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001873 for (i = 0; i < 10; ++i)
1874 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001875 s = rsm.sm_match->startp[i];
1876 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001877 s = NULL;
1878 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02001879 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001880 li->li_tv.v_type = VAR_STRING;
1881 li->li_tv.vval.v_string = s;
1882 li = li->li_next;
1883 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001884 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001885}
1886
1887 static void
1888clear_submatch_list(staticList10_T *sl)
1889{
1890 int i;
1891
1892 for (i = 0; i < 10; ++i)
1893 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1894}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001895#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001896
Bram Moolenaar071d4272004-06-13 20:20:40 +00001897/*
1898 * vim_regsub() - perform substitutions after a vim_regexec() or
1899 * vim_regexec_multi() match.
1900 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001901 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
dundargocc57b5bc2022-11-02 13:30:51 +00001902 * Otherwise nothing is copied, only compute the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001903 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001904 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
1905 *
1906 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
1907 * double them to keep them, and insert a backslash before a CR to avoid it
1908 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001909 *
1910 * Note: The matched text must not change between the call of
1911 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1912 * references invalid!
1913 *
1914 * Returns the size of the replacement, including terminating NUL.
1915 */
1916 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001917vim_regsub(
1918 regmatch_T *rmp,
1919 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001920 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001921 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001922 int destlen,
1923 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001924{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001925 int result;
1926 regexec_T rex_save;
1927 int rex_in_use_save = rex_in_use;
1928
1929 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001930 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001931 rex_save = rex;
1932 rex_in_use = TRUE;
1933
1934 rex.reg_match = rmp;
1935 rex.reg_mmatch = NULL;
1936 rex.reg_maxline = 0;
1937 rex.reg_buf = curbuf;
1938 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001939 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001940
1941 rex_in_use = rex_in_use_save;
1942 if (rex_in_use)
1943 rex = rex_save;
1944
1945 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001946}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001947
1948 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001949vim_regsub_multi(
1950 regmmatch_T *rmp,
1951 linenr_T lnum,
1952 char_u *source,
1953 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001954 int destlen,
1955 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001956{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001957 int result;
1958 regexec_T rex_save;
1959 int rex_in_use_save = rex_in_use;
1960
1961 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001962 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001963 rex_save = rex;
1964 rex_in_use = TRUE;
1965
1966 rex.reg_match = NULL;
1967 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001968 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02001969 rex.reg_firstlnum = lnum;
1970 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1971 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001972 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001973
1974 rex_in_use = rex_in_use_save;
1975 if (rex_in_use)
1976 rex = rex_save;
1977
1978 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001979}
1980
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001981#if defined(FEAT_EVAL) || defined(PROTO)
1982// When nesting more than a couple levels it's probably a mistake.
1983# define MAX_REGSUB_NESTING 4
1984static char_u *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
1985
1986# if defined(EXITFREE) || defined(PROTO)
1987 void
1988free_resub_eval_result(void)
1989{
1990 int i;
1991
1992 for (i = 0; i < MAX_REGSUB_NESTING; ++i)
1993 VIM_CLEAR(eval_result[i]);
1994}
1995# endif
1996#endif
1997
Bram Moolenaar071d4272004-06-13 20:20:40 +00001998 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001999vim_regsub_both(
2000 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002001 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01002002 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002003 int destlen,
2004 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002005{
2006 char_u *src;
2007 char_u *dst;
2008 char_u *s;
2009 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002010 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002011 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002012 fptr_T func_all = (fptr_T)NULL;
2013 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002014 linenr_T clnum = 0; // init for GCC
2015 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00002016#ifdef FEAT_EVAL
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002017 static int nesting = 0;
2018 int nested;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002019#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002020 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002021
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002022 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002023 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002024 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02002025 emsg(_(e_null_argument));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002026 return 0;
2027 }
2028 if (prog_magic_wrong())
2029 return 0;
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002030#ifdef FEAT_EVAL
2031 if (nesting == MAX_REGSUB_NESTING)
2032 {
2033 emsg(_(e_substitute_nesting_too_deep));
2034 return 0;
2035 }
2036 nested = nesting;
2037#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002038 src = source;
2039 dst = dest;
2040
2041 /*
2042 * When the substitute part starts with "\=" evaluate it as an expression.
2043 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002044 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002045 {
2046#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002047 // To make sure that the length doesn't change between checking the
2048 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01002049 // resulting string is saved from the call with
2050 // "flags & REGSUB_COPY" == 0 to the call with
2051 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002052 if (copy)
2053 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002054 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002055 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002056 STRCPY(dest, eval_result[nested]);
2057 dst += STRLEN(eval_result[nested]);
2058 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002059 }
2060 }
2061 else
2062 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002063 int prev_can_f_submatch = can_f_submatch;
2064 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002065
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002066 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002067
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002068 // The expression may contain substitute(), which calls us
2069 // recursively. Make sure submatch() gets the text from the first
2070 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002071 if (can_f_submatch)
2072 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002073 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002074 rsm.sm_match = rex.reg_match;
2075 rsm.sm_mmatch = rex.reg_mmatch;
2076 rsm.sm_firstlnum = rex.reg_firstlnum;
2077 rsm.sm_maxline = rex.reg_maxline;
2078 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002079
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002080 // Although unlikely, it is possible that the expression invokes a
2081 // substitute command (it might fail, but still). Therefore keep
Bram Moolenaarabd56da2022-06-23 20:46:27 +01002082 // an array of eval results.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002083 ++nesting;
2084
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002085 if (expr != NULL)
2086 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002087 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002088 char_u buf[NUMBUFLEN];
2089 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002090 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002091 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002092
2093 rettv.v_type = VAR_STRING;
2094 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002095 argv[0].v_type = VAR_LIST;
2096 argv[0].vval.v_list = &matchList.sl_list;
2097 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002098 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002099 funcexe.fe_argv_func = fill_submatch_list;
2100 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002101 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002102 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002103 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002104 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002105 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002106 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002107 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002108 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002109
Bram Moolenaar6100d022016-10-02 16:51:57 +02002110 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002111 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002112 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002113 }
LemonBoyf3b48952022-05-05 13:53:03 +01002114 else if (expr->v_type == VAR_INSTR)
2115 {
2116 exe_typval_instr(expr, &rettv);
2117 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002118 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002119 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002120 clear_submatch_list(&matchList);
2121
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002122 if (rettv.v_type == VAR_UNKNOWN)
2123 // something failed, no need to report another error
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002124 eval_result[nested] = NULL;
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002125 else
2126 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002127 eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
2128 if (eval_result[nested] != NULL)
2129 eval_result[nested] = vim_strsave(eval_result[nested]);
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002130 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002131 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002132 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002133 else if (substitute_instr != NULL)
2134 // Execute instructions from ISN_SUBSTITUTE.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002135 eval_result[nested] = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002136 else
Bram Moolenaara4e0b972022-10-01 19:43:52 +01002137 eval_result[nested] = eval_to_string(source + 2, TRUE, FALSE);
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002138 --nesting;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002139
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002140 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002141 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002142 int had_backslash = FALSE;
2143
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002144 for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002145 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002146 // Change NL to CR, so that it becomes a line break,
2147 // unless called from vim_regexec_nl().
2148 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002149 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002150 *s = CAR;
2151 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002152 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002153 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002154 /* Change NL to CR here too, so that this works:
2155 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2156 * abc\
2157 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002158 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002159 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002160 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002161 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002162 had_backslash = TRUE;
2163 }
2164 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002165 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002166 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002167 // Backslashes will be consumed, need to double them.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002168 s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
Bram Moolenaar06975a42010-03-23 16:27:22 +01002169 if (s != NULL)
2170 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002171 vim_free(eval_result[nested]);
2172 eval_result[nested] = s;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002173 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002174 }
2175
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002176 dst += STRLEN(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002177 }
2178
Bram Moolenaar6100d022016-10-02 16:51:57 +02002179 can_f_submatch = prev_can_f_submatch;
2180 if (can_f_submatch)
2181 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002182 }
2183#endif
2184 }
2185 else
2186 while ((c = *src++) != NUL)
2187 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002188 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002189 no = 0;
2190 else if (c == '\\' && *src != NUL)
2191 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002192 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002193 {
2194 ++src;
2195 no = 0;
2196 }
2197 else if ('0' <= *src && *src <= '9')
2198 {
2199 no = *src++ - '0';
2200 }
2201 else if (vim_strchr((char_u *)"uUlLeE", *src))
2202 {
2203 switch (*src++)
2204 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002205 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002206 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002207 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002208 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002209 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002210 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002211 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002212 continue;
2213 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002214 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002215 continue;
2216 }
2217 }
2218 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002219 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002220 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002221 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2222 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002223 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002224 if (copy)
2225 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002226 if (dst + 3 > dest + destlen)
2227 {
2228 iemsg("vim_regsub_both(): not enough space");
2229 return 0;
2230 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002231 *dst++ = c;
2232 *dst++ = *src++;
2233 *dst++ = *src++;
2234 }
2235 else
2236 {
2237 dst += 3;
2238 src += 2;
2239 }
2240 continue;
2241 }
2242
Bram Moolenaar071d4272004-06-13 20:20:40 +00002243 if (c == '\\' && *src != NUL)
2244 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002245 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002246 switch (*src)
2247 {
2248 case 'r': c = CAR; ++src; break;
2249 case 'n': c = NL; ++src; break;
2250 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002251 // Oh no! \e already has meaning in subst pat :-(
2252 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002253 case 'b': c = Ctrl_H; ++src; break;
2254
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002255 // If "backslash" is TRUE the backslash will be removed
2256 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002257 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002258 {
2259 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002260 {
2261 if (dst + 1 > dest + destlen)
2262 {
2263 iemsg("vim_regsub_both(): not enough space");
2264 return 0;
2265 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002266 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002267 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002268 ++dst;
2269 }
2270 c = *src++;
2271 }
2272 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002273 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002274 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002275
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002276 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002277 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002278 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002279 func_one = (fptr_T)(func_one(&cc, c));
2280 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002281 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002282 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002283 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002284 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002285
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002286 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002287 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002288 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002289 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002290
Bram Moolenaar071d4272004-06-13 20:20:40 +00002291 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002292 {
2293 if (dst + charlen > dest + destlen)
2294 {
2295 iemsg("vim_regsub_both(): not enough space");
2296 return 0;
2297 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002298 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002299 }
2300 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002301 if (enc_utf8)
2302 {
2303 int clen = utf_ptr2len(src - 1);
2304
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002305 // If the character length is shorter than "totlen", there
2306 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002307 if (clen < totlen)
2308 {
2309 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002310 {
2311 if (dst + totlen - clen > dest + destlen)
2312 {
2313 iemsg("vim_regsub_both(): not enough space");
2314 return 0;
2315 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002316 mch_memmove(dst + 1, src - 1 + clen,
2317 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002318 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002319 dst += totlen - clen;
2320 }
2321 }
2322 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002323 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002324 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002325 {
2326 if (dst + 1 > dest + destlen)
2327 {
2328 iemsg("vim_regsub_both(): not enough space");
2329 return 0;
2330 }
2331 *dst = cc;
2332 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002333 dst++;
2334 }
2335 else
2336 {
2337 if (REG_MULTI)
2338 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002339 clnum = rex.reg_mmatch->startpos[no].lnum;
2340 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002341 s = NULL;
2342 else
2343 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002344 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2345 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2346 len = rex.reg_mmatch->endpos[no].col
2347 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002348 else
2349 len = (int)STRLEN(s);
2350 }
2351 }
2352 else
2353 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002354 s = rex.reg_match->startp[no];
2355 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002356 s = NULL;
2357 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002358 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002359 }
2360 if (s != NULL)
2361 {
2362 for (;;)
2363 {
2364 if (len == 0)
2365 {
2366 if (REG_MULTI)
2367 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002368 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002369 break;
2370 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002371 {
2372 if (dst + 1 > dest + destlen)
2373 {
2374 iemsg("vim_regsub_both(): not enough space");
2375 return 0;
2376 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002377 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002378 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002379 ++dst;
2380 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002381 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2382 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002383 else
2384 len = (int)STRLEN(s);
2385 }
2386 else
2387 break;
2388 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002389 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002390 {
2391 if (copy)
Bram Moolenaare29a27f2021-07-20 21:07:36 +02002392 iemsg(_(e_damaged_match_string));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002393 goto exit;
2394 }
2395 else
2396 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002397 if ((flags & REGSUB_BACKSLASH)
2398 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002399 {
2400 /*
2401 * Insert a backslash in front of a CR, otherwise
2402 * it will be replaced by a line break.
2403 * Number of backslashes will be halved later,
2404 * double them here.
2405 */
2406 if (copy)
2407 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002408 if (dst + 2 > dest + destlen)
2409 {
2410 iemsg("vim_regsub_both(): not enough space");
2411 return 0;
2412 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002413 dst[0] = '\\';
2414 dst[1] = *s;
2415 }
2416 dst += 2;
2417 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002418 else
2419 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002420 if (has_mbyte)
2421 c = mb_ptr2char(s);
2422 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002423 c = *s;
2424
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002425 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002426 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002427 func_one = (fptr_T)(func_one(&cc, c));
2428 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002429 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002430 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002431 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002432 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002433
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002434 if (has_mbyte)
2435 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002436 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002437 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002438
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002439 // Copy composing characters separately, one
2440 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002441 if (enc_utf8)
2442 l = utf_ptr2len(s) - 1;
2443 else
2444 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002445
2446 s += l;
2447 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002448 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002449 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002450 {
2451 if (dst + charlen > dest + destlen)
2452 {
2453 iemsg("vim_regsub_both(): not enough space");
2454 return 0;
2455 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002456 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002457 }
2458 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002459 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002460 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002461 {
2462 if (dst + 1 > dest + destlen)
2463 {
2464 iemsg("vim_regsub_both(): not enough space");
2465 return 0;
2466 }
2467 *dst = cc;
2468 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002469 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002470 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002471
Bram Moolenaar071d4272004-06-13 20:20:40 +00002472 ++s;
2473 --len;
2474 }
2475 }
2476 }
2477 no = -1;
2478 }
2479 }
2480 if (copy)
2481 *dst = NUL;
2482
2483exit:
2484 return (int)((dst - dest) + 1);
2485}
2486
2487#ifdef FEAT_EVAL
2488/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002489 * Call reg_getline() with the line numbers from the submatch. If a
2490 * substitute() was used the reg_maxline and other values have been
2491 * overwritten.
2492 */
2493 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002494reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002495{
2496 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002497 linenr_T save_first = rex.reg_firstlnum;
2498 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002499
Bram Moolenaar6100d022016-10-02 16:51:57 +02002500 rex.reg_firstlnum = rsm.sm_firstlnum;
2501 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002502
2503 s = reg_getline(lnum);
2504
Bram Moolenaar6100d022016-10-02 16:51:57 +02002505 rex.reg_firstlnum = save_first;
2506 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002507 return s;
2508}
2509
2510/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002511 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002512 * allocated memory.
2513 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2514 */
2515 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002516reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002517{
2518 char_u *retval = NULL;
2519 char_u *s;
2520 int len;
2521 int round;
2522 linenr_T lnum;
2523
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002524 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002525 return NULL;
2526
Bram Moolenaar6100d022016-10-02 16:51:57 +02002527 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002528 {
2529 /*
2530 * First round: compute the length and allocate memory.
2531 * Second round: copy the text.
2532 */
2533 for (round = 1; round <= 2; ++round)
2534 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002535 lnum = rsm.sm_mmatch->startpos[no].lnum;
2536 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002537 return NULL;
2538
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002539 s = reg_getline_submatch(lnum);
2540 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002541 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002542 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002543 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002544 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002545 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002546 len = rsm.sm_mmatch->endpos[no].col
2547 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002548 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002549 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002550 ++len;
2551 }
2552 else
2553 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002554 // Multiple lines: take start line from start col, middle
2555 // lines completely and end line up to end col.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002556 len = (int)STRLEN(s);
2557 if (round == 2)
2558 {
2559 STRCPY(retval, s);
2560 retval[len] = '\n';
2561 }
2562 ++len;
2563 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002564 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002565 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002566 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002567 if (round == 2)
2568 STRCPY(retval + len, s);
2569 len += (int)STRLEN(s);
2570 if (round == 2)
2571 retval[len] = '\n';
2572 ++len;
2573 }
2574 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002575 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002576 rsm.sm_mmatch->endpos[no].col);
2577 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002578 if (round == 2)
2579 retval[len] = NUL;
2580 ++len;
2581 }
2582
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002583 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002584 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002585 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002586 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002587 return NULL;
2588 }
2589 }
2590 }
2591 else
2592 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002593 s = rsm.sm_match->startp[no];
2594 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002595 retval = NULL;
2596 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002597 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002598 }
2599
2600 return retval;
2601}
Bram Moolenaar41571762014-04-02 19:00:58 +02002602
2603/*
2604 * Used for the submatch() function with the optional non-zero argument: get
2605 * the list of strings from the n'th submatch in allocated memory with NULs
2606 * represented in NLs.
2607 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2608 * command, for a non-existing submatch and for any error.
2609 */
2610 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002611reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002612{
2613 char_u *s;
2614 linenr_T slnum;
2615 linenr_T elnum;
2616 colnr_T scol;
2617 colnr_T ecol;
2618 int i;
2619 list_T *list;
2620 int error = FALSE;
2621
2622 if (!can_f_submatch || no < 0)
2623 return NULL;
2624
Bram Moolenaar6100d022016-10-02 16:51:57 +02002625 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002626 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002627 slnum = rsm.sm_mmatch->startpos[no].lnum;
2628 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002629 if (slnum < 0 || elnum < 0)
2630 return NULL;
2631
Bram Moolenaar6100d022016-10-02 16:51:57 +02002632 scol = rsm.sm_mmatch->startpos[no].col;
2633 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002634
2635 list = list_alloc();
2636 if (list == NULL)
2637 return NULL;
2638
2639 s = reg_getline_submatch(slnum) + scol;
2640 if (slnum == elnum)
2641 {
2642 if (list_append_string(list, s, ecol - scol) == FAIL)
2643 error = TRUE;
2644 }
2645 else
2646 {
2647 if (list_append_string(list, s, -1) == FAIL)
2648 error = TRUE;
2649 for (i = 1; i < elnum - slnum; i++)
2650 {
2651 s = reg_getline_submatch(slnum + i);
2652 if (list_append_string(list, s, -1) == FAIL)
2653 error = TRUE;
2654 }
2655 s = reg_getline_submatch(elnum);
2656 if (list_append_string(list, s, ecol) == FAIL)
2657 error = TRUE;
2658 }
2659 }
2660 else
2661 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002662 s = rsm.sm_match->startp[no];
2663 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002664 return NULL;
2665 list = list_alloc();
2666 if (list == NULL)
2667 return NULL;
2668 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002669 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002670 error = TRUE;
2671 }
2672
2673 if (error)
2674 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002675 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002676 return NULL;
2677 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002678 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002679 return list;
2680}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002681#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002682
Bram Moolenaarf4140482020-02-15 23:06:45 +01002683/*
2684 * Initialize the values used for matching against multiple lines
2685 */
2686 static void
2687init_regexec_multi(
2688 regmmatch_T *rmp,
2689 win_T *win, // window in which to search or NULL
2690 buf_T *buf, // buffer in which to search
2691 linenr_T lnum) // nr of line to start looking for match
2692{
2693 rex.reg_match = NULL;
2694 rex.reg_mmatch = rmp;
2695 rex.reg_buf = buf;
2696 rex.reg_win = win;
2697 rex.reg_firstlnum = lnum;
2698 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2699 rex.reg_line_lbr = FALSE;
2700 rex.reg_ic = rmp->rmm_ic;
2701 rex.reg_icombine = FALSE;
2702 rex.reg_maxcol = rmp->rmm_maxcol;
2703}
2704
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002705#include "regexp_bt.c"
2706
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002707static regengine_T bt_regengine =
2708{
2709 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002710 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002711 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002712 bt_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002713};
2714
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002715#include "regexp_nfa.c"
2716
2717static regengine_T nfa_regengine =
2718{
2719 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002720 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002721 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002722 nfa_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002723};
2724
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002725// Which regexp engine to use? Needed for vim_regcomp().
2726// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002727static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002728
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002729#ifdef DEBUG
2730static char_u regname[][30] = {
2731 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002732 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002733 "NFA Regexp Engine"
2734 };
2735#endif
2736
2737/*
2738 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002739 * Returns the program in allocated memory.
2740 * Use vim_regfree() to free the memory.
2741 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002742 */
2743 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002744vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002745{
2746 regprog_T *prog = NULL;
2747 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002748 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002749
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002750 regexp_engine = p_re;
2751
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002752 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002753 if (STRNCMP(expr, "\\%#=", 4) == 0)
2754 {
2755 int newengine = expr[4] - '0';
2756
2757 if (newengine == AUTOMATIC_ENGINE
2758 || newengine == BACKTRACKING_ENGINE
2759 || newengine == NFA_ENGINE)
2760 {
2761 regexp_engine = expr[4] - '0';
2762 expr += 5;
2763#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002764 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002765 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002766#endif
2767 }
2768 else
2769 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002770 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002771 regexp_engine = AUTOMATIC_ENGINE;
2772 }
2773 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002774#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002775 bt_regengine.expr = expr;
2776 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002777#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002778 // reg_iswordc() uses rex.reg_buf
2779 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002780
2781 /*
2782 * First try the NFA engine, unless backtracking was requested.
2783 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002784 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002785 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002786 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002787 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002788 else
2789 prog = bt_regengine.regcomp(expr, re_flags);
2790
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002791 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002792 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002793 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002794#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002795 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002796 {
2797 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002798 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002799 if (f)
2800 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002801 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002802 fclose(f);
2803 }
2804 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002805 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002806 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002807 }
2808#endif
2809 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002810 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002811 * The NFA engine also fails for patterns that it can't handle well
2812 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002813 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002814 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002815 if (regexp_engine == AUTOMATIC_ENGINE
2816 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002817 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002818 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002819#ifdef FEAT_EVAL
2820 report_re_switch(expr);
2821#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002822 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002823 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002824 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002825
Bram Moolenaarfda37292014-11-05 14:27:36 +01002826 if (prog != NULL)
2827 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002828 // Store the info needed to call regcomp() again when the engine turns
2829 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002830 prog->re_engine = regexp_engine;
2831 prog->re_flags = re_flags;
2832 }
2833
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002834 return prog;
2835}
2836
2837/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002838 * Free a compiled regexp program, returned by vim_regcomp().
2839 */
2840 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002841vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002842{
2843 if (prog != NULL)
2844 prog->engine->regfree(prog);
2845}
2846
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002847#if defined(EXITFREE) || defined(PROTO)
2848 void
2849free_regexp_stuff(void)
2850{
2851 ga_clear(&regstack);
2852 ga_clear(&backpos);
2853 vim_free(reg_tofree);
2854 vim_free(reg_prev_sub);
2855}
2856#endif
2857
Bram Moolenaarfda37292014-11-05 14:27:36 +01002858#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002859 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002860report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002861{
2862 if (p_verbose > 0)
2863 {
2864 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002865 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2866 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002867 verbose_leave();
2868 }
2869}
2870#endif
2871
Bram Moolenaar651fca82021-11-29 20:39:38 +00002872#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002873/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002874 * Return whether "prog" is currently being executed.
2875 */
2876 int
2877regprog_in_use(regprog_T *prog)
2878{
2879 return prog->re_in_use;
2880}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002881#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002882
2883/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002884 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002885 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002886 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002887 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002888 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002889 *
2890 * Return TRUE if there is a match, FALSE if not.
2891 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002892 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002893vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002894 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002895 char_u *line, // string to match against
2896 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01002897 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002898{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002899 int result;
2900 regexec_T rex_save;
2901 int rex_in_use_save = rex_in_use;
2902
Bram Moolenaar0270f382018-07-17 05:43:58 +02002903 // Cannot use the same prog recursively, it contains state.
2904 if (rmp->regprog->re_in_use)
2905 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002906 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002907 return FALSE;
2908 }
2909 rmp->regprog->re_in_use = TRUE;
2910
Bram Moolenaar6100d022016-10-02 16:51:57 +02002911 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002912 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002913 rex_save = rex;
2914 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002915
Bram Moolenaar6100d022016-10-02 16:51:57 +02002916 rex.reg_startp = NULL;
2917 rex.reg_endp = NULL;
2918 rex.reg_startpos = NULL;
2919 rex.reg_endpos = NULL;
2920
2921 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002922 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002923
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002924 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002925 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2926 && result == NFA_TOO_EXPENSIVE)
2927 {
2928 int save_p_re = p_re;
2929 int re_flags = rmp->regprog->re_flags;
2930 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2931
2932 p_re = BACKTRACKING_ENGINE;
2933 vim_regfree(rmp->regprog);
2934 if (pat != NULL)
2935 {
2936#ifdef FEAT_EVAL
2937 report_re_switch(pat);
2938#endif
2939 rmp->regprog = vim_regcomp(pat, re_flags);
2940 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002941 {
2942 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002943 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002944 rmp->regprog->re_in_use = FALSE;
2945 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002946 vim_free(pat);
2947 }
2948
2949 p_re = save_p_re;
2950 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002951
2952 rex_in_use = rex_in_use_save;
2953 if (rex_in_use)
2954 rex = rex_save;
2955
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002956 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002957}
2958
Dominique Pellee764d1b2023-03-12 21:20:59 +00002959#if defined(FEAT_SPELL) || defined(FEAT_EVAL) || defined(FEAT_X11) || defined(PROTO)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002960/*
2961 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002962 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002963 */
2964 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002965vim_regexec_prog(
2966 regprog_T **prog,
2967 int ignore_case,
2968 char_u *line,
2969 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002970{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002971 int r;
2972 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002973
2974 regmatch.regprog = *prog;
2975 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002976 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002977 *prog = regmatch.regprog;
2978 return r;
2979}
Dominique Pellee764d1b2023-03-12 21:20:59 +00002980#endif
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002981
2982/*
2983 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002984 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002985 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002986 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002987vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002988{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002989 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002990}
2991
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002992/*
2993 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002994 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002995 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002996 */
2997 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002998vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002999{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02003000 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003001}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003002
3003/*
3004 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003005 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
3006 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003007 * Uses curbuf for line count and 'iskeyword'.
3008 *
3009 * Return zero if there is no match. Return number of lines contained in the
3010 * match otherwise.
3011 */
3012 long
Bram Moolenaar05540972016-01-30 20:31:25 +01003013vim_regexec_multi(
3014 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003015 win_T *win, // window in which to search or NULL
3016 buf_T *buf, // buffer in which to search
3017 linenr_T lnum, // nr of line to start looking for match
3018 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003019 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003020{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003021 int result;
3022 regexec_T rex_save;
3023 int rex_in_use_save = rex_in_use;
3024
Bram Moolenaar0270f382018-07-17 05:43:58 +02003025 // Cannot use the same prog recursively, it contains state.
3026 if (rmp->regprog->re_in_use)
3027 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00003028 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02003029 return FALSE;
3030 }
3031 rmp->regprog->re_in_use = TRUE;
3032
Bram Moolenaar6100d022016-10-02 16:51:57 +02003033 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003034 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02003035 rex_save = rex;
3036 rex_in_use = TRUE;
3037
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003038 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003039 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003040 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003041
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003042 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003043 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
3044 && result == NFA_TOO_EXPENSIVE)
3045 {
3046 int save_p_re = p_re;
3047 int re_flags = rmp->regprog->re_flags;
3048 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
3049
3050 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003051 if (pat != NULL)
3052 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003053 regprog_T *prev_prog = rmp->regprog;
3054
Bram Moolenaarfda37292014-11-05 14:27:36 +01003055#ifdef FEAT_EVAL
3056 report_re_switch(pat);
3057#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003058#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003059 // checking for \z misuse was already done when compiling for NFA,
3060 // allow all here
3061 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003062#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01003063 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003064#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003065 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003066#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003067 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003068 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003069 // Somehow compiling the pattern failed now, put back the
3070 // previous one to avoid "regprog" becoming NULL.
3071 rmp->regprog = prev_prog;
3072 }
3073 else
3074 {
3075 vim_regfree(prev_prog);
3076
Bram Moolenaar41499802018-07-18 06:02:09 +02003077 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003078 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003079 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003080 rmp->regprog->re_in_use = FALSE;
3081 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003082 vim_free(pat);
3083 }
3084 p_re = save_p_re;
3085 }
3086
Bram Moolenaar6100d022016-10-02 16:51:57 +02003087 rex_in_use = rex_in_use_save;
3088 if (rex_in_use)
3089 rex = rex_save;
3090
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003091 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003092}