blob: 1a5cfd07cbd73a025a3c861b7b88e8d3f69c6071 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
Bram Moolenaar155f2d12022-06-20 13:38:33 +010024static sig_atomic_t dummy_timeout_flag = 0;
25static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010026#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
54 void
55init_regexp_timeout(long msec)
56{
57 timeout_flag = start_timeout(msec);
58}
59
60 void
61disable_regexp_timeout(void)
62{
63 stop_timeout();
Bram Moolenaar1f30caf2022-06-19 14:36:35 +010064 timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010065}
66#endif
67
Bram Moolenaar071d4272004-06-13 20:20:40 +000068/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020069 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +000070 * number; the start node begins in the second byte. It's used to catch the
71 * most severe mutilation of the program by the caller.
72 */
73
74#define REGMAGIC 0234
75
76/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000077 * Utility definitions.
78 */
79#define UCHARAT(p) ((int)*(char_u *)(p))
80
Bram Moolenaar63d9e732019-12-05 21:10:38 +010081// Used for an error (down from) vim_regcomp(): give the error message, set
82// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010083#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
84#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
85#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
86#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +010087#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010088#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +000089#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +000090
Bram Moolenaar95f09602016-11-10 20:01:45 +010091
Bram Moolenaar071d4272004-06-13 20:20:40 +000092#define MAX_LIMIT (32767L << 16L)
93
Bram Moolenaar071d4272004-06-13 20:20:40 +000094#define NOT_MULTI 0
95#define MULTI_ONE 1
96#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020097
98// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +010099#define RA_FAIL 1 // something failed, abort
100#define RA_CONT 2 // continue in inner loop
101#define RA_BREAK 3 // break inner loop
102#define RA_MATCH 4 // successful match
103#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200104
Bram Moolenaar071d4272004-06-13 20:20:40 +0000105/*
106 * Return NOT_MULTI if c is not a "multi" operator.
107 * Return MULTI_ONE if c is a single "multi" operator.
108 * Return MULTI_MULT if c is a multi "multi" operator.
109 */
110 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100111re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112{
113 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
114 return MULTI_ONE;
115 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
116 return MULTI_MULT;
117 return NOT_MULTI;
118}
119
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000120static char_u *reg_prev_sub = NULL;
121
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122/*
123 * REGEXP_INRANGE contains all characters which are always special in a []
124 * range after '\'.
125 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
126 * These are:
127 * \n - New line (NL).
128 * \r - Carriage Return (CR).
129 * \t - Tab (TAB).
130 * \e - Escape (ESC).
131 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000132 * \d - Character code in decimal, eg \d123
133 * \o - Character code in octal, eg \o80
134 * \x - Character code in hex, eg \x4a
135 * \u - Multibyte character code, eg \u20ac
136 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137 */
138static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000139static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140
Bram Moolenaar071d4272004-06-13 20:20:40 +0000141/*
142 * Translate '\x' to its control character, except "\n", which is Magic.
143 */
144 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100145backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000146{
147 switch (c)
148 {
149 case 'r': return CAR;
150 case 't': return TAB;
151 case 'e': return ESC;
152 case 'b': return BS;
153 }
154 return c;
155}
156
157/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000158 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000159 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
160 * recognized. Otherwise "pp" is advanced to after the item.
161 */
162 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100163get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000164{
165 static const char *(class_names[]) =
166 {
167 "alnum:]",
168#define CLASS_ALNUM 0
169 "alpha:]",
170#define CLASS_ALPHA 1
171 "blank:]",
172#define CLASS_BLANK 2
173 "cntrl:]",
174#define CLASS_CNTRL 3
175 "digit:]",
176#define CLASS_DIGIT 4
177 "graph:]",
178#define CLASS_GRAPH 5
179 "lower:]",
180#define CLASS_LOWER 6
181 "print:]",
182#define CLASS_PRINT 7
183 "punct:]",
184#define CLASS_PUNCT 8
185 "space:]",
186#define CLASS_SPACE 9
187 "upper:]",
188#define CLASS_UPPER 10
189 "xdigit:]",
190#define CLASS_XDIGIT 11
191 "tab:]",
192#define CLASS_TAB 12
193 "return:]",
194#define CLASS_RETURN 13
195 "backspace:]",
196#define CLASS_BACKSPACE 14
197 "escape:]",
198#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100199 "ident:]",
200#define CLASS_IDENT 16
201 "keyword:]",
202#define CLASS_KEYWORD 17
203 "fname:]",
204#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000205 };
206#define CLASS_NONE 99
207 int i;
208
209 if ((*pp)[1] == ':')
210 {
K.Takataeeec2542021-06-02 13:28:16 +0200211 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000212 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
213 {
214 *pp += STRLEN(class_names[i]) + 2;
215 return i;
216 }
217 }
218 return CLASS_NONE;
219}
220
221/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222 * Specific version of character class functions.
223 * Using a table to keep this fast.
224 */
225static short class_tab[256];
226
227#define RI_DIGIT 0x01
228#define RI_HEX 0x02
229#define RI_OCTAL 0x04
230#define RI_WORD 0x08
231#define RI_HEAD 0x10
232#define RI_ALPHA 0x20
233#define RI_LOWER 0x40
234#define RI_UPPER 0x80
235#define RI_WHITE 0x100
236
237 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100238init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000239{
240 int i;
241 static int done = FALSE;
242
243 if (done)
244 return;
245
246 for (i = 0; i < 256; ++i)
247 {
248 if (i >= '0' && i <= '7')
249 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
250 else if (i >= '8' && i <= '9')
251 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
252 else if (i >= 'a' && i <= 'f')
253 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
256 else if (i >= 'A' && i <= 'F')
257 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000259 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
260 else if (i == '_')
261 class_tab[i] = RI_WORD + RI_HEAD;
262 else
263 class_tab[i] = 0;
264 }
265 class_tab[' '] |= RI_WHITE;
266 class_tab['\t'] |= RI_WHITE;
267 done = TRUE;
268}
269
kylo252ae6f1d82022-02-16 19:24:07 +0000270#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
271#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
272#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
273#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
274#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
275#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
276#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
277#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
278#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000279
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100280// flags for regflags
281#define RF_ICASE 1 // ignore case
282#define RF_NOICASE 2 // don't ignore case
283#define RF_HASNL 4 // can match a NL
284#define RF_ICOMBINE 8 // ignore combining characters
285#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000286
287/*
288 * Global work variables for vim_regcomp().
289 */
290
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100291static char_u *regparse; // Input-scan pointer.
292static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100293static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000294#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100295static int regnzpar; // \z() count.
296static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100298static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000299#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100300static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000301#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000302
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100303static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000304
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100305static int reg_string; // matching with a string instead of a buffer
306 // line
307static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000308
309/*
310 * META contains all characters that may be magic, except '^' and '$'.
311 */
312
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100313// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000314static char_u META_flags[] = {
315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100317// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000318 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100319// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100321// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000322 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100323// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000324 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100325// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100327// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
329};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000330
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100331static int curchr; // currently parsed character
332// Previous character. Note: prevchr is sometimes -1 when we are not at the
333// start, eg in /[ ^I]^ the pattern was never found even if it existed,
334// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200335static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100336static int prevprevchr; // previous-previous character
337static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100339// arguments for reg()
340#define REG_NOPAREN 0 // toplevel reg()
341#define REG_PAREN 1 // \(\)
342#define REG_ZPAREN 2 // \z(\)
343#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200345typedef struct
346{
347 char_u *regparse;
348 int prevchr_len;
349 int curchr;
350 int prevchr;
351 int prevprevchr;
352 int nextchr;
353 int at_start;
354 int prev_at_start;
355 int regnpar;
356} parse_state_T;
357
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100359static int getchr(void);
360static void skipchr_keepstart(void);
361static int peekchr(void);
362static void skipchr(void);
363static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100364static long gethexchrs(int maxinputlen);
365static long getoctchrs(void);
366static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100367static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100368static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200369static int cstrncmp(char_u *s1, char_u *s2, int *n);
370static char_u *cstrchr(char_u *, int);
371static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100372static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100373#ifdef FEAT_EVAL
374static void report_re_switch(char_u *pat);
375#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000376
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200377static regengine_T bt_regengine;
378static regengine_T nfa_regengine;
379
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380/*
381 * Return TRUE if compiled regular expression "prog" can match a line break.
382 */
383 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100384re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000385{
386 return (prog->regflags & RF_HASNL);
387}
388
389/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000390 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
391 * Returns a character representing the class. Zero means that no item was
392 * recognized. Otherwise "pp" is advanced to after the item.
393 */
394 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100395get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000396{
397 int c;
398 int l = 1;
399 char_u *p = *pp;
400
Bram Moolenaar985079c2019-02-16 17:07:47 +0100401 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000402 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000403 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000404 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000405 if (p[l + 2] == '=' && p[l + 3] == ']')
406 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000407 if (has_mbyte)
408 c = mb_ptr2char(p + 2);
409 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000410 c = p[2];
411 *pp += l + 4;
412 return c;
413 }
414 }
415 return 0;
416}
417
418/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000419 * Check for a collating element "[.a.]". "pp" points to the '['.
420 * Returns a character. Zero means that no item was recognized. Otherwise
421 * "pp" is advanced to after the item.
422 * Currently only single characters are recognized!
423 */
424 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100425get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000426{
427 int c;
428 int l = 1;
429 char_u *p = *pp;
430
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100431 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000432 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000433 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000434 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000435 if (p[l + 2] == '.' && p[l + 3] == ']')
436 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000437 if (has_mbyte)
438 c = mb_ptr2char(p + 2);
439 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000440 c = p[2];
441 *pp += l + 4;
442 return c;
443 }
444 }
445 return 0;
446}
447
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100448static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
449static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200450
451 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100452get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200453{
454 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
455 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
456}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000457
458/*
459 * Skip over a "[]" range.
460 * "p" must point to the character after the '['.
461 * The returned pointer is on the matching ']', or the terminating NUL.
462 */
463 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100464skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000465{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000466 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000467
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100468 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469 ++p;
470 if (*p == ']' || *p == '-')
471 ++p;
472 while (*p != NUL && *p != ']')
473 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000474 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000475 p += l;
476 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000477 if (*p == '-')
478 {
479 ++p;
480 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100481 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000482 }
483 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200484 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000485 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200486 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000487 p += 2;
488 else if (*p == '[')
489 {
490 if (get_char_class(&p) == CLASS_NONE
491 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200492 && get_coll_element(&p) == 0
493 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100494 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000495 }
496 else
497 ++p;
498 }
499
500 return p;
501}
502
503/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200505 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506 * Take care of characters with a backslash in front of it.
507 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508 */
509 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100510skip_regexp(
511 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200512 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200513 int magic)
514{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100515 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200516}
517
518/*
519 * Call skip_regexp() and when the delimiter does not match give an error and
520 * return NULL.
521 */
522 char_u *
523skip_regexp_err(
524 char_u *startp,
525 int delim,
526 int magic)
527{
528 char_u *p = skip_regexp(startp, delim, magic);
529
530 if (*p != delim)
531 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000532 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200533 return NULL;
534 }
535 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200536}
537
538/*
539 * skip_regexp() with extra arguments:
540 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
541 * expression and change "\?" to "?". If "*newp" is not NULL the expression
542 * is changed in-place.
543 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100544 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200545 */
546 char_u *
547skip_regexp_ex(
548 char_u *startp,
549 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100550 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200551 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100552 int *dropped,
553 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000554{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100555 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000556 char_u *p = startp;
557
558 if (magic)
559 mymagic = MAGIC_ON;
560 else
561 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200562 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100564 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000565 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100566 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000567 break;
568 if ((p[0] == '[' && mymagic >= MAGIC_ON)
569 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
570 {
571 p = skip_anyof(p + 1);
572 if (p[0] == NUL)
573 break;
574 }
575 else if (p[0] == '\\' && p[1] != NUL)
576 {
577 if (dirc == '?' && newp != NULL && p[1] == '?')
578 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100579 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000580 if (*newp == NULL)
581 {
582 *newp = vim_strsave(startp);
583 if (*newp != NULL)
584 p = *newp + (p - startp);
585 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200586 if (dropped != NULL)
587 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000588 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000589 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590 else
591 ++p;
592 }
593 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100594 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595 if (*p == 'v')
596 mymagic = MAGIC_ALL;
597 else if (*p == 'V')
598 mymagic = MAGIC_NONE;
599 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000600 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100601 if (magic_val != NULL)
602 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000603 return p;
604}
605
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200606/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200607 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200608 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100609static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200610static int at_start; // True when on the first character
611static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100612
Bram Moolenaar071d4272004-06-13 20:20:40 +0000613/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200614 * Start parsing at "str".
615 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000616 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100617initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000618{
619 regparse = str;
620 prevchr_len = 0;
621 curchr = prevprevchr = prevchr = nextchr = -1;
622 at_start = TRUE;
623 prev_at_start = FALSE;
624}
625
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200626/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200627 * Save the current parse state, so that it can be restored and parsing
628 * starts in the same state again.
629 */
630 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100631save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200632{
633 ps->regparse = regparse;
634 ps->prevchr_len = prevchr_len;
635 ps->curchr = curchr;
636 ps->prevchr = prevchr;
637 ps->prevprevchr = prevprevchr;
638 ps->nextchr = nextchr;
639 ps->at_start = at_start;
640 ps->prev_at_start = prev_at_start;
641 ps->regnpar = regnpar;
642}
643
644/*
645 * Restore a previously saved parse state.
646 */
647 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100648restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200649{
650 regparse = ps->regparse;
651 prevchr_len = ps->prevchr_len;
652 curchr = ps->curchr;
653 prevchr = ps->prevchr;
654 prevprevchr = ps->prevprevchr;
655 nextchr = ps->nextchr;
656 at_start = ps->at_start;
657 prev_at_start = ps->prev_at_start;
658 regnpar = ps->regnpar;
659}
660
661
662/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200663 * Get the next character without advancing.
664 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100666peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000668 static int after_slash = FALSE;
669
Bram Moolenaar071d4272004-06-13 20:20:40 +0000670 if (curchr == -1)
671 {
672 switch (curchr = regparse[0])
673 {
674 case '.':
675 case '[':
676 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100677 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000678 if (reg_magic >= MAGIC_ON)
679 curchr = Magic(curchr);
680 break;
681 case '(':
682 case ')':
683 case '{':
684 case '%':
685 case '+':
686 case '=':
687 case '?':
688 case '@':
689 case '!':
690 case '&':
691 case '|':
692 case '<':
693 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100694 case '#': // future ext.
695 case '"': // future ext.
696 case '\'': // future ext.
697 case ',': // future ext.
698 case '-': // future ext.
699 case ':': // future ext.
700 case ';': // future ext.
701 case '`': // future ext.
702 case '/': // Can't be used in / command
703 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000704 if (reg_magic == MAGIC_ALL)
705 curchr = Magic(curchr);
706 break;
707 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100708 // * is not magic as the very first character, eg "?*ptr", when
709 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
710 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000711 if (reg_magic >= MAGIC_ON
712 && !at_start
713 && !(prev_at_start && prevchr == Magic('^'))
714 && (after_slash
715 || (prevchr != Magic('(')
716 && prevchr != Magic('&')
717 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000718 curchr = Magic('*');
719 break;
720 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100721 // '^' is only magic as the very first character and if it's after
722 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000723 if (reg_magic >= MAGIC_OFF
724 && (at_start
725 || reg_magic == MAGIC_ALL
726 || prevchr == Magic('(')
727 || prevchr == Magic('|')
728 || prevchr == Magic('&')
729 || prevchr == Magic('n')
730 || (no_Magic(prevchr) == '('
731 && prevprevchr == Magic('%'))))
732 {
733 curchr = Magic('^');
734 at_start = TRUE;
735 prev_at_start = FALSE;
736 }
737 break;
738 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100739 // '$' is only magic as the very last char and if it's in front of
740 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000741 if (reg_magic >= MAGIC_OFF)
742 {
743 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200744 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000745
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100746 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000747 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200748 || p[1] == 'm' || p[1] == 'M'
749 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
750 {
751 if (p[1] == 'v')
752 is_magic_all = TRUE;
753 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
754 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000755 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200756 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000757 if (p[0] == NUL
758 || (p[0] == '\\'
759 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
760 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200761 || (is_magic_all
762 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763 || reg_magic == MAGIC_ALL)
764 curchr = Magic('$');
765 }
766 break;
767 case '\\':
768 {
769 int c = regparse[1];
770
771 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100772 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000773 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774 {
775 /*
776 * META contains everything that may be magic sometimes,
777 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200778 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000779 * magicness. Therefore, \ is so meta-magic that it is
780 * not in META.
781 */
782 curchr = -1;
783 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100784 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000785 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000786 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000787 peekchr();
788 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000789 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000790 curchr = toggle_Magic(curchr);
791 }
792 else if (vim_strchr(REGEXP_ABBR, c))
793 {
794 /*
795 * Handle abbreviations, like "\t" for TAB -- webb
796 */
797 curchr = backslash_trans(c);
798 }
799 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
800 curchr = toggle_Magic(c);
801 else
802 {
803 /*
804 * Next character can never be (made) magic?
805 * Then backslashing it won't do anything.
806 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 if (has_mbyte)
808 curchr = (*mb_ptr2char)(regparse + 1);
809 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000810 curchr = c;
811 }
812 break;
813 }
814
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815 default:
816 if (has_mbyte)
817 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000818 }
819 }
820
821 return curchr;
822}
823
824/*
825 * Eat one lexed character. Do this in a way that we can undo it.
826 */
827 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100828skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000829{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100830 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831 if (*regparse == '\\')
832 prevchr_len = 1;
833 else
834 prevchr_len = 0;
835 if (regparse[prevchr_len] != NUL)
836 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000837 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100838 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000839 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000840 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000841 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000842 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000843 ++prevchr_len;
844 }
845 regparse += prevchr_len;
846 prev_at_start = at_start;
847 at_start = FALSE;
848 prevprevchr = prevchr;
849 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100850 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851 nextchr = -1;
852}
853
854/*
855 * Skip a character while keeping the value of prev_at_start for at_start.
856 * prevchr and prevprevchr are also kept.
857 */
858 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100859skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860{
861 int as = prev_at_start;
862 int pr = prevchr;
863 int prpr = prevprevchr;
864
865 skipchr();
866 at_start = as;
867 prevchr = pr;
868 prevprevchr = prpr;
869}
870
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200871/*
872 * Get the next character from the pattern. We know about magic and such, so
873 * therefore we need a lexical analyzer.
874 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100876getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877{
878 int chr = peekchr();
879
880 skipchr();
881 return chr;
882}
883
884/*
885 * put character back. Works only once!
886 */
887 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100888ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889{
890 nextchr = curchr;
891 curchr = prevchr;
892 prevchr = prevprevchr;
893 at_start = prev_at_start;
894 prev_at_start = FALSE;
895
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100896 // Backup regparse, so that it's at the same position as before the
897 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000898 regparse -= prevchr_len;
899}
900
901/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000902 * Get and return the value of the hex string at the current position.
903 * Return -1 if there is no valid hex number.
904 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000905 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000906 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000907 * The parameter controls the maximum number of input characters. This will be
908 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
909 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100910 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100911gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000912{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100913 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000914 int c;
915 int i;
916
917 for (i = 0; i < maxinputlen; ++i)
918 {
919 c = regparse[0];
920 if (!vim_isxdigit(c))
921 break;
922 nr <<= 4;
923 nr |= hex2nr(c);
924 ++regparse;
925 }
926
927 if (i == 0)
928 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100929 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000930}
931
932/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200933 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000934 * current position. Return -1 for invalid. Consumes all digits.
935 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100936 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100937getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000938{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100939 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000940 int c;
941 int i;
942
943 for (i = 0; ; ++i)
944 {
945 c = regparse[0];
946 if (c < '0' || c > '9')
947 break;
948 nr *= 10;
949 nr += c - '0';
950 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100951 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000952 }
953
954 if (i == 0)
955 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100956 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000957}
958
959/*
960 * get and return the value of the octal string immediately after the current
961 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
962 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
963 * treat 8 or 9 as recognised characters. Position is updated:
964 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000965 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000966 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100967 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100968getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000969{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100970 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000971 int c;
972 int i;
973
974 for (i = 0; i < 3 && nr < 040; ++i)
975 {
976 c = regparse[0];
977 if (c < '0' || c > '7')
978 break;
979 nr <<= 3;
980 nr |= hex2nr(c);
981 ++regparse;
982 }
983
984 if (i == 0)
985 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100986 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000987}
988
989/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 * read_limits - Read two integers to be taken as a minimum and maximum.
991 * If the first character is '-', then the range is reversed.
992 * Should end with 'end'. If minval is missing, zero is default, if maxval is
993 * missing, a very big number is the default.
994 */
995 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100996read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997{
998 int reverse = FALSE;
999 char_u *first_char;
1000 long tmp;
1001
1002 if (*regparse == '-')
1003 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001004 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001005 regparse++;
1006 reverse = TRUE;
1007 }
1008 first_char = regparse;
1009 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001010 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001011 {
1012 if (vim_isdigit(*++regparse))
1013 *maxval = getdigits(&regparse);
1014 else
1015 *maxval = MAX_LIMIT;
1016 }
1017 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001018 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001019 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001020 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001021 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001022 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001023 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001024 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001025 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001026
1027 /*
1028 * Reverse the range if there was a '-', or make sure it is in the right
1029 * order otherwise.
1030 */
1031 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1032 {
1033 tmp = *minval;
1034 *minval = *maxval;
1035 *maxval = tmp;
1036 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001037 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001038 return OK;
1039}
1040
1041/*
1042 * vim_regexec and friends
1043 */
1044
1045/*
1046 * Global work variables for vim_regexec().
1047 */
1048
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001049static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001050#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001051static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001052#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001053static void reg_nextline(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001054static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055
1056/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1058 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001059 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001060 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001061static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001062static unsigned reg_tofreelen;
1063
1064/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001065 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001066 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001067 * done:
1068 * single-line multi-line
1069 * reg_match &regmatch_T NULL
1070 * reg_mmatch NULL &regmmatch_T
1071 * reg_startp reg_match->startp <invalid>
1072 * reg_endp reg_match->endp <invalid>
1073 * reg_startpos <invalid> reg_mmatch->startpos
1074 * reg_endpos <invalid> reg_mmatch->endpos
1075 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001076 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001077 * reg_firstlnum <invalid> first line in which to search
1078 * reg_maxline 0 last line nr
1079 * reg_line_lbr FALSE or TRUE FALSE
1080 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001081typedef struct {
1082 regmatch_T *reg_match;
1083 regmmatch_T *reg_mmatch;
1084 char_u **reg_startp;
1085 char_u **reg_endp;
1086 lpos_T *reg_startpos;
1087 lpos_T *reg_endpos;
1088 win_T *reg_win;
1089 buf_T *reg_buf;
1090 linenr_T reg_firstlnum;
1091 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001092 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001093
Bram Moolenaar0270f382018-07-17 05:43:58 +02001094 // The current match-position is stord in these variables:
1095 linenr_T lnum; // line number, relative to first line
1096 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001097 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001098
1099 int need_clear_subexpr; // subexpressions still need to be cleared
1100#ifdef FEAT_SYN_HL
1101 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1102 // cleared
1103#endif
1104
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001105 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1106 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1107 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001108 int reg_ic;
1109
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001110 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1111 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001112 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001113
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001114 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1115 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001116 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001117
1118 // State for the NFA engine regexec.
1119 int nfa_has_zend; // NFA regexp \ze operator encountered.
1120 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1121 int nfa_nsubexpr; // Number of sub expressions actually being used
1122 // during execution. 1 if only the whole match
1123 // (subexpr 0) is used.
1124 // listid is global, so that it increases on recursive calls to
1125 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1126 // all the states.
1127 int nfa_listid;
1128 int nfa_alt_listid;
1129
1130#ifdef FEAT_SYN_HL
1131 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1132#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001133} regexec_T;
1134
1135static regexec_T rex;
1136static int rex_in_use = FALSE;
1137
Bram Moolenaar071d4272004-06-13 20:20:40 +00001138/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001139 * Return TRUE if character 'c' is included in 'iskeyword' option for
1140 * "reg_buf" buffer.
1141 */
1142 static int
1143reg_iswordc(int c)
1144{
1145 return vim_iswordc_buf(c, rex.reg_buf);
1146}
1147
1148/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001149 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1150 */
1151 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001152reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001153{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001154 // when looking behind for a match/no-match lnum is negative. But we
1155 // can't go before line 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001156 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001157 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001158 if (lnum > rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001159 // Must have matched the "\n" in the last line.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001160 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001161 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001162}
1163
Bram Moolenaar071d4272004-06-13 20:20:40 +00001164#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001165static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1166static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1167static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1168static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001169#endif
1170
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001171// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001172#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001173
Bram Moolenaar071d4272004-06-13 20:20:40 +00001174#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001175/*
1176 * Create a new extmatch and mark it as referenced once.
1177 */
1178 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001179make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001180{
1181 reg_extmatch_T *em;
1182
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001183 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001184 if (em != NULL)
1185 em->refcnt = 1;
1186 return em;
1187}
1188
1189/*
1190 * Add a reference to an extmatch.
1191 */
1192 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001193ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001194{
1195 if (em != NULL)
1196 em->refcnt++;
1197 return em;
1198}
1199
1200/*
1201 * Remove a reference to an extmatch. If there are no references left, free
1202 * the info.
1203 */
1204 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001205unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206{
1207 int i;
1208
1209 if (em != NULL && --em->refcnt <= 0)
1210 {
1211 for (i = 0; i < NSUBEXP; ++i)
1212 vim_free(em->matches[i]);
1213 vim_free(em);
1214 }
1215}
1216#endif
1217
1218/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001219 * Get class of previous character.
1220 */
1221 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001222reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001223{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001224 if (rex.input > rex.line)
1225 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001226 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001227 return -1;
1228}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001229
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001230/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001231 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001232 */
1233 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001234reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001235{
1236 pos_T top, bot;
1237 linenr_T lnum;
1238 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001239 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001240 int mode;
1241 colnr_T start, end;
1242 colnr_T start2, end2;
1243 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001244 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001245
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001246 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001247 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001248 return FALSE;
1249
1250 if (VIsual_active)
1251 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001252 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001253 {
1254 top = VIsual;
1255 bot = wp->w_cursor;
1256 }
1257 else
1258 {
1259 top = wp->w_cursor;
1260 bot = VIsual;
1261 }
1262 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001263 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001264 }
1265 else
1266 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001267 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001268 {
1269 top = curbuf->b_visual.vi_start;
1270 bot = curbuf->b_visual.vi_end;
1271 }
1272 else
1273 {
1274 top = curbuf->b_visual.vi_end;
1275 bot = curbuf->b_visual.vi_start;
1276 }
1277 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001278 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001279 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001280 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001281 if (lnum < top.lnum || lnum > bot.lnum)
1282 return FALSE;
1283
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001284 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001285 if (mode == 'v')
1286 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001287 if ((lnum == top.lnum && col < top.col)
1288 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1289 return FALSE;
1290 }
1291 else if (mode == Ctrl_V)
1292 {
1293 getvvcol(wp, &top, &start, NULL, &end);
1294 getvvcol(wp, &bot, &start2, NULL, &end2);
1295 if (start2 < start)
1296 start = start2;
1297 if (end2 > end)
1298 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001299 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001300 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001301
1302 // getvvcol() flushes rex.line, need to get it again
1303 rex.line = reg_getline(rex.lnum);
1304 rex.input = rex.line + col;
1305
Bram Moolenaar7f9969c2022-07-25 18:13:54 +01001306 cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001307 if (cols < start || cols > end - (*p_sel == 'e'))
1308 return FALSE;
1309 }
1310 return TRUE;
1311}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001312
Bram Moolenaar071d4272004-06-13 20:20:40 +00001313/*
1314 * Check the regexp program for its magic number.
1315 * Return TRUE if it's wrong.
1316 */
1317 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001318prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001320 regprog_T *prog;
1321
Bram Moolenaar6100d022016-10-02 16:51:57 +02001322 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001323 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001324 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001325 return FALSE;
1326
1327 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001328 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001329 emsg(_(e_corrupted_regexp_program));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 return TRUE;
1331 }
1332 return FALSE;
1333}
1334
1335/*
1336 * Cleanup the subexpressions, if this wasn't done yet.
1337 * This construction is used to clear the subexpressions only when they are
1338 * used (to increase speed).
1339 */
1340 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001341cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001342{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001343 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001344 {
1345 if (REG_MULTI)
1346 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001347 // Use 0xff to set lnum to -1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001348 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1349 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001350 }
1351 else
1352 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001353 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1354 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001355 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001356 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001357 }
1358}
1359
1360#ifdef FEAT_SYN_HL
1361 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001362cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001363{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001364 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001365 {
1366 if (REG_MULTI)
1367 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001368 // Use 0xff to set lnum to -1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001369 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1370 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1371 }
1372 else
1373 {
1374 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1375 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1376 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001377 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001378 }
1379}
1380#endif
1381
1382/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001383 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001384 */
1385 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001386reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001388 rex.line = reg_getline(++rex.lnum);
1389 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390 fast_breakcheck();
1391}
1392
1393/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001394 * Check whether a backreference matches.
1395 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001396 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1397 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001398 */
1399 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001400match_with_backref(
1401 linenr_T start_lnum,
1402 colnr_T start_col,
1403 linenr_T end_lnum,
1404 colnr_T end_col,
1405 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001406{
1407 linenr_T clnum = start_lnum;
1408 colnr_T ccol = start_col;
1409 int len;
1410 char_u *p;
1411
1412 if (bytelen != NULL)
1413 *bytelen = 0;
1414 for (;;)
1415 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001416 // Since getting one line may invalidate the other, need to make copy.
1417 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001418 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001419 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001420 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001421 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1422 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001423 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001424 vim_free(reg_tofree);
1425 reg_tofree = alloc(len);
1426 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001427 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001428 reg_tofreelen = len;
1429 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001430 STRCPY(reg_tofree, rex.line);
1431 rex.input = reg_tofree + (rex.input - rex.line);
1432 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001433 }
1434
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001435 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001436 p = reg_getline(clnum);
1437 if (clnum == end_lnum)
1438 len = end_col - ccol;
1439 else
1440 len = (int)STRLEN(p + ccol);
1441
Bram Moolenaar0270f382018-07-17 05:43:58 +02001442 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001443 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001444 if (bytelen != NULL)
1445 *bytelen += len;
1446 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001447 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001448 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001449 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001450
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001451 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001452 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001453 if (bytelen != NULL)
1454 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001455 ++clnum;
1456 ccol = 0;
1457 if (got_int)
1458 return RA_FAIL;
1459 }
1460
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001461 // found a match! Note that rex.line may now point to a copy of the line,
1462 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001463 return RA_MATCH;
1464}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001465
Bram Moolenaarfb031402014-09-09 17:18:49 +02001466/*
1467 * Used in a place where no * or \+ can follow.
1468 */
1469 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001470re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001471{
1472 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001473 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001474 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001475 rc_did_emsg = TRUE;
1476 return FAIL;
1477 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001478 return OK;
1479}
1480
Bram Moolenaar071d4272004-06-13 20:20:40 +00001481typedef struct
1482{
1483 int a, b, c;
1484} decomp_T;
1485
1486
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001487// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001488static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001489{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001490 {0x5e2,0,0}, // 0xfb20 alt ayin
1491 {0x5d0,0,0}, // 0xfb21 alt alef
1492 {0x5d3,0,0}, // 0xfb22 alt dalet
1493 {0x5d4,0,0}, // 0xfb23 alt he
1494 {0x5db,0,0}, // 0xfb24 alt kaf
1495 {0x5dc,0,0}, // 0xfb25 alt lamed
1496 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1497 {0x5e8,0,0}, // 0xfb27 alt resh
1498 {0x5ea,0,0}, // 0xfb28 alt tav
1499 {'+', 0, 0}, // 0xfb29 alt plus
1500 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1501 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1502 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1503 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1504 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1505 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1506 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1507 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1508 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1509 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1510 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1511 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1512 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1513 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1514 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1515 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1516 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1517 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1518 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1519 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1520 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1521 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1522 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1523 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1524 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1525 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1526 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1527 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1528 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1529 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1530 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1531 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1532 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1533 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1534 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1535 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1536 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1537 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001538};
1539
1540 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001541mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001542{
1543 decomp_T d;
1544
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001545 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001546 {
1547 d = decomp_table[c - 0xfb20];
1548 *c1 = d.a;
1549 *c2 = d.b;
1550 *c3 = d.c;
1551 }
1552 else
1553 {
1554 *c1 = c;
1555 *c2 = *c3 = 0;
1556 }
1557}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001558
1559/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001560 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001561 * Return 0 if strings match, non-zero otherwise.
1562 * Correct the length "*n" when composing characters are ignored.
1563 */
1564 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001565cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001566{
1567 int result;
1568
Bram Moolenaar6100d022016-10-02 16:51:57 +02001569 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570 result = STRNCMP(s1, s2, *n);
1571 else
1572 result = MB_STRNICMP(s1, s2, *n);
1573
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001574 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001575 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001576 {
1577 char_u *str1, *str2;
1578 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001579 int junk;
1580
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001581 // we have to handle the strcmp ourselves, since it is necessary to
1582 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001583 str1 = s1;
1584 str2 = s2;
1585 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001586 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001587 {
1588 c1 = mb_ptr2char_adv(&str1);
1589 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001590
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001591 // Decompose the character if necessary, into 'base' characters.
1592 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001593 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001594 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001595 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001596 mb_decompose(c1, &c11, &junk, &junk);
1597 mb_decompose(c2, &c12, &junk, &junk);
1598 c1 = c11;
1599 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001600 if (c11 != c12
1601 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001602 break;
1603 }
1604 }
1605 result = c2 - c1;
1606 if (result == 0)
1607 *n = (int)(str2 - s2);
1608 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609
1610 return result;
1611}
1612
1613/*
1614 * cstrchr: This function is used a lot for simple searches, keep it fast!
1615 */
1616 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001617cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001618{
1619 char_u *p;
1620 int cc;
1621
Bram Moolenaara12a1612019-01-24 16:39:02 +01001622 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001623 return vim_strchr(s, c);
1624
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001625 // tolower() and toupper() can be slow, comparing twice should be a lot
1626 // faster (esp. when using MS Visual C++!).
1627 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001628 if (enc_utf8 && c > 0x80)
1629 cc = utf_fold(c);
1630 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001631 if (MB_ISUPPER(c))
1632 cc = MB_TOLOWER(c);
1633 else if (MB_ISLOWER(c))
1634 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001635 else
1636 return vim_strchr(s, c);
1637
Bram Moolenaar071d4272004-06-13 20:20:40 +00001638 if (has_mbyte)
1639 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001640 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001641 {
1642 if (enc_utf8 && c > 0x80)
1643 {
1644 if (utf_fold(utf_ptr2char(p)) == cc)
1645 return p;
1646 }
1647 else if (*p == c || *p == cc)
1648 return p;
1649 }
1650 }
1651 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001652 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001653 for (p = s; *p != NUL; ++p)
1654 if (*p == c || *p == cc)
1655 return p;
1656
1657 return NULL;
1658}
1659
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001660////////////////////////////////////////////////////////////////
1661// regsub stuff //
1662////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001663
Bram Moolenaar071d4272004-06-13 20:20:40 +00001664/*
1665 * We should define ftpr as a pointer to a function returning a pointer to
1666 * a function returning a pointer to a function ...
1667 * This is impossible, so we declare a pointer to a function returning a
Bram Moolenaar30d64132020-09-06 17:09:12 +02001668 * void pointer. This should work for all compilers.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001669 */
Bram Moolenaar30d64132020-09-06 17:09:12 +02001670typedef void (*(*fptr_T)(int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001671
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001672static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001673
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001674 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001675do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001676{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001677 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001678
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001679 return (fptr_T)NULL;
1680}
1681
1682 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001683do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001684{
1685 *d = MB_TOUPPER(c);
1686
1687 return (fptr_T)do_Upper;
1688}
1689
1690 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001691do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001692{
1693 *d = MB_TOLOWER(c);
1694
1695 return (fptr_T)NULL;
1696}
1697
1698 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001699do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001700{
1701 *d = MB_TOLOWER(c);
1702
1703 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001704}
1705
1706/*
1707 * regtilde(): Replace tildes in the pattern by the old pattern.
1708 *
1709 * Short explanation of the tilde: It stands for the previous replacement
1710 * pattern. If that previous pattern also contains a ~ we should go back a
1711 * step further... But we insert the previous pattern into the current one
1712 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001713 * This still does not handle the case where "magic" changes. So require the
1714 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001715 *
1716 * The tildes are parsed once before the first call to vim_regsub().
1717 */
1718 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001719regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001720{
1721 char_u *newsub = source;
1722 char_u *tmpsub;
1723 char_u *p;
1724 int len;
1725 int prevlen;
1726
1727 for (p = newsub; *p; ++p)
1728 {
1729 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1730 {
1731 if (reg_prev_sub != NULL)
1732 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001733 // length = len(newsub) - 1 + len(prev_sub) + 1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001734 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02001735 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001736 if (tmpsub != NULL)
1737 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001738 // copy prefix
1739 len = (int)(p - newsub); // not including ~
Bram Moolenaar071d4272004-06-13 20:20:40 +00001740 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001741 // interpret tilde
Bram Moolenaar071d4272004-06-13 20:20:40 +00001742 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001743 // copy postfix
Bram Moolenaar071d4272004-06-13 20:20:40 +00001744 if (!magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001745 ++p; // back off backslash
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 STRCPY(tmpsub + len + prevlen, p + 1);
1747
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001748 if (newsub != source) // already allocated newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001749 vim_free(newsub);
1750 newsub = tmpsub;
1751 p = newsub + len + prevlen;
1752 }
1753 }
1754 else if (magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001755 STRMOVE(p, p + 1); // remove '~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001756 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001757 STRMOVE(p, p + 2); // remove '\~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001758 --p;
1759 }
1760 else
1761 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001762 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001764 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001765 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001766 }
1767 }
1768
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001769 // Store a copy of newsub in reg_prev_sub. It is always allocated,
1770 // because recursive calls may make the returned string invalid.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001771 vim_free(reg_prev_sub);
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001772 reg_prev_sub = vim_strsave(newsub);
1773
Bram Moolenaar071d4272004-06-13 20:20:40 +00001774 return newsub;
1775}
1776
1777#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001778static int can_f_submatch = FALSE; // TRUE when submatch() can be used
Bram Moolenaar071d4272004-06-13 20:20:40 +00001779
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001780// These pointers are used for reg_submatch(). Needed for when the
1781// substitution string is an expression that contains a call to substitute()
1782// and submatch().
Bram Moolenaar6100d022016-10-02 16:51:57 +02001783typedef struct {
1784 regmatch_T *sm_match;
1785 regmmatch_T *sm_mmatch;
1786 linenr_T sm_firstlnum;
1787 linenr_T sm_maxline;
1788 int sm_line_lbr;
1789} regsubmatch_T;
1790
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001791static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
Bram Moolenaar071d4272004-06-13 20:20:40 +00001792#endif
1793
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001794#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001795
1796/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001797 * Put the submatches in "argv[argskip]" which is a list passed into
1798 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001799 */
1800 static int
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001801fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, int argcount)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001802{
1803 listitem_T *li;
1804 int i;
1805 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001806 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001807
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001808 if (argcount == argskip)
1809 // called function doesn't take a submatches argument
1810 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001811
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001812 // Relies on sl_list to be the first item in staticList10_T.
1813 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001814
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001815 // There are always 10 list items in staticList10_T.
1816 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001817 for (i = 0; i < 10; ++i)
1818 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001819 s = rsm.sm_match->startp[i];
1820 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001821 s = NULL;
1822 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02001823 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001824 li->li_tv.v_type = VAR_STRING;
1825 li->li_tv.vval.v_string = s;
1826 li = li->li_next;
1827 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001828 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001829}
1830
1831 static void
1832clear_submatch_list(staticList10_T *sl)
1833{
1834 int i;
1835
1836 for (i = 0; i < 10; ++i)
1837 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1838}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001839#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001840
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841/*
1842 * vim_regsub() - perform substitutions after a vim_regexec() or
1843 * vim_regexec_multi() match.
1844 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001845 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
1846 * Oterwise nothing is copied, only compue the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001847 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001848 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
1849 *
1850 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
1851 * double them to keep them, and insert a backslash before a CR to avoid it
1852 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001853 *
1854 * Note: The matched text must not change between the call of
1855 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1856 * references invalid!
1857 *
1858 * Returns the size of the replacement, including terminating NUL.
1859 */
1860 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001861vim_regsub(
1862 regmatch_T *rmp,
1863 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001864 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001865 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001866 int destlen,
1867 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001868{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001869 int result;
1870 regexec_T rex_save;
1871 int rex_in_use_save = rex_in_use;
1872
1873 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001874 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001875 rex_save = rex;
1876 rex_in_use = TRUE;
1877
1878 rex.reg_match = rmp;
1879 rex.reg_mmatch = NULL;
1880 rex.reg_maxline = 0;
1881 rex.reg_buf = curbuf;
1882 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001883 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001884
1885 rex_in_use = rex_in_use_save;
1886 if (rex_in_use)
1887 rex = rex_save;
1888
1889 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001890}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001891
1892 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001893vim_regsub_multi(
1894 regmmatch_T *rmp,
1895 linenr_T lnum,
1896 char_u *source,
1897 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001898 int destlen,
1899 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001900{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001901 int result;
1902 regexec_T rex_save;
1903 int rex_in_use_save = rex_in_use;
1904
1905 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001906 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001907 rex_save = rex;
1908 rex_in_use = TRUE;
1909
1910 rex.reg_match = NULL;
1911 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001912 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02001913 rex.reg_firstlnum = lnum;
1914 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1915 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001916 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001917
1918 rex_in_use = rex_in_use_save;
1919 if (rex_in_use)
1920 rex = rex_save;
1921
1922 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923}
1924
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001925#if defined(FEAT_EVAL) || defined(PROTO)
1926// When nesting more than a couple levels it's probably a mistake.
1927# define MAX_REGSUB_NESTING 4
1928static char_u *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
1929
1930# if defined(EXITFREE) || defined(PROTO)
1931 void
1932free_resub_eval_result(void)
1933{
1934 int i;
1935
1936 for (i = 0; i < MAX_REGSUB_NESTING; ++i)
1937 VIM_CLEAR(eval_result[i]);
1938}
1939# endif
1940#endif
1941
Bram Moolenaar071d4272004-06-13 20:20:40 +00001942 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001943vim_regsub_both(
1944 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001945 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001946 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001947 int destlen,
1948 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001949{
1950 char_u *src;
1951 char_u *dst;
1952 char_u *s;
1953 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001954 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001955 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01001956 fptr_T func_all = (fptr_T)NULL;
1957 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001958 linenr_T clnum = 0; // init for GCC
1959 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00001960#ifdef FEAT_EVAL
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001961 static int nesting = 0;
1962 int nested;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001963#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001964 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001965
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001966 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001967 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001968 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001969 emsg(_(e_null_argument));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001970 return 0;
1971 }
1972 if (prog_magic_wrong())
1973 return 0;
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001974#ifdef FEAT_EVAL
1975 if (nesting == MAX_REGSUB_NESTING)
1976 {
1977 emsg(_(e_substitute_nesting_too_deep));
1978 return 0;
1979 }
1980 nested = nesting;
1981#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001982 src = source;
1983 dst = dest;
1984
1985 /*
1986 * When the substitute part starts with "\=" evaluate it as an expression.
1987 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001988 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001989 {
1990#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001991 // To make sure that the length doesn't change between checking the
1992 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01001993 // resulting string is saved from the call with
1994 // "flags & REGSUB_COPY" == 0 to the call with
1995 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001996 if (copy)
1997 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001998 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001999 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002000 STRCPY(dest, eval_result[nested]);
2001 dst += STRLEN(eval_result[nested]);
2002 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002003 }
2004 }
2005 else
2006 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002007 int prev_can_f_submatch = can_f_submatch;
2008 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002009
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002010 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002011
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002012 // The expression may contain substitute(), which calls us
2013 // recursively. Make sure submatch() gets the text from the first
2014 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002015 if (can_f_submatch)
2016 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002017 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002018 rsm.sm_match = rex.reg_match;
2019 rsm.sm_mmatch = rex.reg_mmatch;
2020 rsm.sm_firstlnum = rex.reg_firstlnum;
2021 rsm.sm_maxline = rex.reg_maxline;
2022 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002023
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002024 // Although unlikely, it is possible that the expression invokes a
2025 // substitute command (it might fail, but still). Therefore keep
Bram Moolenaarabd56da2022-06-23 20:46:27 +01002026 // an array of eval results.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002027 ++nesting;
2028
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002029 if (expr != NULL)
2030 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002031 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002032 char_u buf[NUMBUFLEN];
2033 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002034 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002035 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002036
2037 rettv.v_type = VAR_STRING;
2038 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002039 argv[0].v_type = VAR_LIST;
2040 argv[0].vval.v_list = &matchList.sl_list;
2041 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002042 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002043 funcexe.fe_argv_func = fill_submatch_list;
2044 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002045 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002046 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002047 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002048 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002049 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002050 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002051 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002052 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002053
Bram Moolenaar6100d022016-10-02 16:51:57 +02002054 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002055 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002056 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002057 }
LemonBoyf3b48952022-05-05 13:53:03 +01002058 else if (expr->v_type == VAR_INSTR)
2059 {
2060 exe_typval_instr(expr, &rettv);
2061 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002062 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002063 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002064 clear_submatch_list(&matchList);
2065
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002066 if (rettv.v_type == VAR_UNKNOWN)
2067 // something failed, no need to report another error
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002068 eval_result[nested] = NULL;
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002069 else
2070 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002071 eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
2072 if (eval_result[nested] != NULL)
2073 eval_result[nested] = vim_strsave(eval_result[nested]);
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002074 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002075 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002076 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002077 else if (substitute_instr != NULL)
2078 // Execute instructions from ISN_SUBSTITUTE.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002079 eval_result[nested] = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002080 else
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002081 eval_result[nested] = eval_to_string(source + 2, TRUE);
2082 --nesting;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002083
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002084 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002085 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002086 int had_backslash = FALSE;
2087
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002088 for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002089 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002090 // Change NL to CR, so that it becomes a line break,
2091 // unless called from vim_regexec_nl().
2092 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002093 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002094 *s = CAR;
2095 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002096 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002097 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002098 /* Change NL to CR here too, so that this works:
2099 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2100 * abc\
2101 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002102 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002103 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002104 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002105 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002106 had_backslash = TRUE;
2107 }
2108 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002109 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002110 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002111 // Backslashes will be consumed, need to double them.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002112 s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
Bram Moolenaar06975a42010-03-23 16:27:22 +01002113 if (s != NULL)
2114 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002115 vim_free(eval_result[nested]);
2116 eval_result[nested] = s;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002117 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002118 }
2119
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002120 dst += STRLEN(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002121 }
2122
Bram Moolenaar6100d022016-10-02 16:51:57 +02002123 can_f_submatch = prev_can_f_submatch;
2124 if (can_f_submatch)
2125 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002126 }
2127#endif
2128 }
2129 else
2130 while ((c = *src++) != NUL)
2131 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002132 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002133 no = 0;
2134 else if (c == '\\' && *src != NUL)
2135 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002136 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002137 {
2138 ++src;
2139 no = 0;
2140 }
2141 else if ('0' <= *src && *src <= '9')
2142 {
2143 no = *src++ - '0';
2144 }
2145 else if (vim_strchr((char_u *)"uUlLeE", *src))
2146 {
2147 switch (*src++)
2148 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002149 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002150 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002151 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002152 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002153 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002154 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002155 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002156 continue;
2157 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002158 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002159 continue;
2160 }
2161 }
2162 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002163 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002164 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002165 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2166 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002167 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002168 if (copy)
2169 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002170 if (dst + 3 > dest + destlen)
2171 {
2172 iemsg("vim_regsub_both(): not enough space");
2173 return 0;
2174 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002175 *dst++ = c;
2176 *dst++ = *src++;
2177 *dst++ = *src++;
2178 }
2179 else
2180 {
2181 dst += 3;
2182 src += 2;
2183 }
2184 continue;
2185 }
2186
Bram Moolenaar071d4272004-06-13 20:20:40 +00002187 if (c == '\\' && *src != NUL)
2188 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002189 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002190 switch (*src)
2191 {
2192 case 'r': c = CAR; ++src; break;
2193 case 'n': c = NL; ++src; break;
2194 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002195 // Oh no! \e already has meaning in subst pat :-(
2196 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002197 case 'b': c = Ctrl_H; ++src; break;
2198
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002199 // If "backslash" is TRUE the backslash will be removed
2200 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002201 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002202 {
2203 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002204 {
2205 if (dst + 1 > dest + destlen)
2206 {
2207 iemsg("vim_regsub_both(): not enough space");
2208 return 0;
2209 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002210 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002211 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002212 ++dst;
2213 }
2214 c = *src++;
2215 }
2216 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002217 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002218 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002219
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002220 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002221 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002222 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002223 func_one = (fptr_T)(func_one(&cc, c));
2224 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002225 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002226 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002227 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002228 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002229
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002230 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002231 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002232 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002233 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002234
Bram Moolenaar071d4272004-06-13 20:20:40 +00002235 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002236 {
2237 if (dst + charlen > dest + destlen)
2238 {
2239 iemsg("vim_regsub_both(): not enough space");
2240 return 0;
2241 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002242 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002243 }
2244 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002245 if (enc_utf8)
2246 {
2247 int clen = utf_ptr2len(src - 1);
2248
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002249 // If the character length is shorter than "totlen", there
2250 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002251 if (clen < totlen)
2252 {
2253 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002254 {
2255 if (dst + totlen - clen > dest + destlen)
2256 {
2257 iemsg("vim_regsub_both(): not enough space");
2258 return 0;
2259 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002260 mch_memmove(dst + 1, src - 1 + clen,
2261 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002262 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002263 dst += totlen - clen;
2264 }
2265 }
2266 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002267 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002268 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002269 {
2270 if (dst + 1 > dest + destlen)
2271 {
2272 iemsg("vim_regsub_both(): not enough space");
2273 return 0;
2274 }
2275 *dst = cc;
2276 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002277 dst++;
2278 }
2279 else
2280 {
2281 if (REG_MULTI)
2282 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002283 clnum = rex.reg_mmatch->startpos[no].lnum;
2284 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002285 s = NULL;
2286 else
2287 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002288 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2289 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2290 len = rex.reg_mmatch->endpos[no].col
2291 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002292 else
2293 len = (int)STRLEN(s);
2294 }
2295 }
2296 else
2297 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002298 s = rex.reg_match->startp[no];
2299 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002300 s = NULL;
2301 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002302 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002303 }
2304 if (s != NULL)
2305 {
2306 for (;;)
2307 {
2308 if (len == 0)
2309 {
2310 if (REG_MULTI)
2311 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002312 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002313 break;
2314 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002315 {
2316 if (dst + 1 > dest + destlen)
2317 {
2318 iemsg("vim_regsub_both(): not enough space");
2319 return 0;
2320 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002321 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002322 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002323 ++dst;
2324 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002325 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2326 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002327 else
2328 len = (int)STRLEN(s);
2329 }
2330 else
2331 break;
2332 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002333 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002334 {
2335 if (copy)
Bram Moolenaare29a27f2021-07-20 21:07:36 +02002336 iemsg(_(e_damaged_match_string));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002337 goto exit;
2338 }
2339 else
2340 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002341 if ((flags & REGSUB_BACKSLASH)
2342 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002343 {
2344 /*
2345 * Insert a backslash in front of a CR, otherwise
2346 * it will be replaced by a line break.
2347 * Number of backslashes will be halved later,
2348 * double them here.
2349 */
2350 if (copy)
2351 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002352 if (dst + 2 > dest + destlen)
2353 {
2354 iemsg("vim_regsub_both(): not enough space");
2355 return 0;
2356 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002357 dst[0] = '\\';
2358 dst[1] = *s;
2359 }
2360 dst += 2;
2361 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002362 else
2363 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002364 if (has_mbyte)
2365 c = mb_ptr2char(s);
2366 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002367 c = *s;
2368
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002369 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002370 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002371 func_one = (fptr_T)(func_one(&cc, c));
2372 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002373 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002374 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002375 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002376 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002377
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002378 if (has_mbyte)
2379 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002380 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002381 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002382
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002383 // Copy composing characters separately, one
2384 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002385 if (enc_utf8)
2386 l = utf_ptr2len(s) - 1;
2387 else
2388 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002389
2390 s += l;
2391 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002392 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002393 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002394 {
2395 if (dst + charlen > dest + destlen)
2396 {
2397 iemsg("vim_regsub_both(): not enough space");
2398 return 0;
2399 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002400 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002401 }
2402 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002403 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002404 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002405 {
2406 if (dst + 1 > dest + destlen)
2407 {
2408 iemsg("vim_regsub_both(): not enough space");
2409 return 0;
2410 }
2411 *dst = cc;
2412 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002413 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002414 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002415
Bram Moolenaar071d4272004-06-13 20:20:40 +00002416 ++s;
2417 --len;
2418 }
2419 }
2420 }
2421 no = -1;
2422 }
2423 }
2424 if (copy)
2425 *dst = NUL;
2426
2427exit:
2428 return (int)((dst - dest) + 1);
2429}
2430
2431#ifdef FEAT_EVAL
2432/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002433 * Call reg_getline() with the line numbers from the submatch. If a
2434 * substitute() was used the reg_maxline and other values have been
2435 * overwritten.
2436 */
2437 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002438reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002439{
2440 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002441 linenr_T save_first = rex.reg_firstlnum;
2442 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002443
Bram Moolenaar6100d022016-10-02 16:51:57 +02002444 rex.reg_firstlnum = rsm.sm_firstlnum;
2445 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002446
2447 s = reg_getline(lnum);
2448
Bram Moolenaar6100d022016-10-02 16:51:57 +02002449 rex.reg_firstlnum = save_first;
2450 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002451 return s;
2452}
2453
2454/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002455 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002456 * allocated memory.
2457 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2458 */
2459 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002460reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002461{
2462 char_u *retval = NULL;
2463 char_u *s;
2464 int len;
2465 int round;
2466 linenr_T lnum;
2467
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002468 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002469 return NULL;
2470
Bram Moolenaar6100d022016-10-02 16:51:57 +02002471 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002472 {
2473 /*
2474 * First round: compute the length and allocate memory.
2475 * Second round: copy the text.
2476 */
2477 for (round = 1; round <= 2; ++round)
2478 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002479 lnum = rsm.sm_mmatch->startpos[no].lnum;
2480 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002481 return NULL;
2482
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002483 s = reg_getline_submatch(lnum);
2484 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002485 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002486 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002487 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002488 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002489 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002490 len = rsm.sm_mmatch->endpos[no].col
2491 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002492 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002493 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002494 ++len;
2495 }
2496 else
2497 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002498 // Multiple lines: take start line from start col, middle
2499 // lines completely and end line up to end col.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002500 len = (int)STRLEN(s);
2501 if (round == 2)
2502 {
2503 STRCPY(retval, s);
2504 retval[len] = '\n';
2505 }
2506 ++len;
2507 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002508 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002509 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002510 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002511 if (round == 2)
2512 STRCPY(retval + len, s);
2513 len += (int)STRLEN(s);
2514 if (round == 2)
2515 retval[len] = '\n';
2516 ++len;
2517 }
2518 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002519 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002520 rsm.sm_mmatch->endpos[no].col);
2521 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002522 if (round == 2)
2523 retval[len] = NUL;
2524 ++len;
2525 }
2526
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002527 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002528 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002529 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002530 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002531 return NULL;
2532 }
2533 }
2534 }
2535 else
2536 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002537 s = rsm.sm_match->startp[no];
2538 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002539 retval = NULL;
2540 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002541 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002542 }
2543
2544 return retval;
2545}
Bram Moolenaar41571762014-04-02 19:00:58 +02002546
2547/*
2548 * Used for the submatch() function with the optional non-zero argument: get
2549 * the list of strings from the n'th submatch in allocated memory with NULs
2550 * represented in NLs.
2551 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2552 * command, for a non-existing submatch and for any error.
2553 */
2554 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002555reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002556{
2557 char_u *s;
2558 linenr_T slnum;
2559 linenr_T elnum;
2560 colnr_T scol;
2561 colnr_T ecol;
2562 int i;
2563 list_T *list;
2564 int error = FALSE;
2565
2566 if (!can_f_submatch || no < 0)
2567 return NULL;
2568
Bram Moolenaar6100d022016-10-02 16:51:57 +02002569 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002570 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002571 slnum = rsm.sm_mmatch->startpos[no].lnum;
2572 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002573 if (slnum < 0 || elnum < 0)
2574 return NULL;
2575
Bram Moolenaar6100d022016-10-02 16:51:57 +02002576 scol = rsm.sm_mmatch->startpos[no].col;
2577 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002578
2579 list = list_alloc();
2580 if (list == NULL)
2581 return NULL;
2582
2583 s = reg_getline_submatch(slnum) + scol;
2584 if (slnum == elnum)
2585 {
2586 if (list_append_string(list, s, ecol - scol) == FAIL)
2587 error = TRUE;
2588 }
2589 else
2590 {
2591 if (list_append_string(list, s, -1) == FAIL)
2592 error = TRUE;
2593 for (i = 1; i < elnum - slnum; i++)
2594 {
2595 s = reg_getline_submatch(slnum + i);
2596 if (list_append_string(list, s, -1) == FAIL)
2597 error = TRUE;
2598 }
2599 s = reg_getline_submatch(elnum);
2600 if (list_append_string(list, s, ecol) == FAIL)
2601 error = TRUE;
2602 }
2603 }
2604 else
2605 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002606 s = rsm.sm_match->startp[no];
2607 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002608 return NULL;
2609 list = list_alloc();
2610 if (list == NULL)
2611 return NULL;
2612 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002613 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002614 error = TRUE;
2615 }
2616
2617 if (error)
2618 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002619 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002620 return NULL;
2621 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002622 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002623 return list;
2624}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002625#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002626
Bram Moolenaarf4140482020-02-15 23:06:45 +01002627/*
2628 * Initialize the values used for matching against multiple lines
2629 */
2630 static void
2631init_regexec_multi(
2632 regmmatch_T *rmp,
2633 win_T *win, // window in which to search or NULL
2634 buf_T *buf, // buffer in which to search
2635 linenr_T lnum) // nr of line to start looking for match
2636{
2637 rex.reg_match = NULL;
2638 rex.reg_mmatch = rmp;
2639 rex.reg_buf = buf;
2640 rex.reg_win = win;
2641 rex.reg_firstlnum = lnum;
2642 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2643 rex.reg_line_lbr = FALSE;
2644 rex.reg_ic = rmp->rmm_ic;
2645 rex.reg_icombine = FALSE;
2646 rex.reg_maxcol = rmp->rmm_maxcol;
2647}
2648
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002649#include "regexp_bt.c"
2650
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002651static regengine_T bt_regengine =
2652{
2653 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002654 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002655 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002656 bt_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002657};
2658
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002659#include "regexp_nfa.c"
2660
2661static regengine_T nfa_regengine =
2662{
2663 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002664 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002665 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002666 nfa_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002667};
2668
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002669// Which regexp engine to use? Needed for vim_regcomp().
2670// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002671static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002672
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002673#ifdef DEBUG
2674static char_u regname[][30] = {
2675 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002676 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002677 "NFA Regexp Engine"
2678 };
2679#endif
2680
2681/*
2682 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002683 * Returns the program in allocated memory.
2684 * Use vim_regfree() to free the memory.
2685 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002686 */
2687 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002688vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002689{
2690 regprog_T *prog = NULL;
2691 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002692 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002693
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002694 regexp_engine = p_re;
2695
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002696 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002697 if (STRNCMP(expr, "\\%#=", 4) == 0)
2698 {
2699 int newengine = expr[4] - '0';
2700
2701 if (newengine == AUTOMATIC_ENGINE
2702 || newengine == BACKTRACKING_ENGINE
2703 || newengine == NFA_ENGINE)
2704 {
2705 regexp_engine = expr[4] - '0';
2706 expr += 5;
2707#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002708 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002709 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002710#endif
2711 }
2712 else
2713 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002714 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002715 regexp_engine = AUTOMATIC_ENGINE;
2716 }
2717 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002718#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002719 bt_regengine.expr = expr;
2720 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002721#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002722 // reg_iswordc() uses rex.reg_buf
2723 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002724
2725 /*
2726 * First try the NFA engine, unless backtracking was requested.
2727 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002728 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002729 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002730 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002731 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002732 else
2733 prog = bt_regengine.regcomp(expr, re_flags);
2734
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002735 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002736 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002737 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002738#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002739 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002740 {
2741 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002742 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002743 if (f)
2744 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002745 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002746 fclose(f);
2747 }
2748 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002749 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002750 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002751 }
2752#endif
2753 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002754 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002755 * The NFA engine also fails for patterns that it can't handle well
2756 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002757 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002758 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002759 if (regexp_engine == AUTOMATIC_ENGINE
2760 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002761 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002762 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002763#ifdef FEAT_EVAL
2764 report_re_switch(expr);
2765#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002766 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002767 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002768 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002769
Bram Moolenaarfda37292014-11-05 14:27:36 +01002770 if (prog != NULL)
2771 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002772 // Store the info needed to call regcomp() again when the engine turns
2773 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002774 prog->re_engine = regexp_engine;
2775 prog->re_flags = re_flags;
2776 }
2777
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002778 return prog;
2779}
2780
2781/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002782 * Free a compiled regexp program, returned by vim_regcomp().
2783 */
2784 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002785vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002786{
2787 if (prog != NULL)
2788 prog->engine->regfree(prog);
2789}
2790
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002791#if defined(EXITFREE) || defined(PROTO)
2792 void
2793free_regexp_stuff(void)
2794{
2795 ga_clear(&regstack);
2796 ga_clear(&backpos);
2797 vim_free(reg_tofree);
2798 vim_free(reg_prev_sub);
2799}
2800#endif
2801
Bram Moolenaarfda37292014-11-05 14:27:36 +01002802#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002803 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002804report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002805{
2806 if (p_verbose > 0)
2807 {
2808 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002809 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2810 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002811 verbose_leave();
2812 }
2813}
2814#endif
2815
Bram Moolenaar651fca82021-11-29 20:39:38 +00002816#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002817/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002818 * Return whether "prog" is currently being executed.
2819 */
2820 int
2821regprog_in_use(regprog_T *prog)
2822{
2823 return prog->re_in_use;
2824}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002825#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002826
2827/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002828 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002829 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002830 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002831 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002832 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002833 *
2834 * Return TRUE if there is a match, FALSE if not.
2835 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002836 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002837vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002838 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002839 char_u *line, // string to match against
2840 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01002841 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002842{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002843 int result;
2844 regexec_T rex_save;
2845 int rex_in_use_save = rex_in_use;
2846
Bram Moolenaar0270f382018-07-17 05:43:58 +02002847 // Cannot use the same prog recursively, it contains state.
2848 if (rmp->regprog->re_in_use)
2849 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002850 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002851 return FALSE;
2852 }
2853 rmp->regprog->re_in_use = TRUE;
2854
Bram Moolenaar6100d022016-10-02 16:51:57 +02002855 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002856 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002857 rex_save = rex;
2858 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002859
Bram Moolenaar6100d022016-10-02 16:51:57 +02002860 rex.reg_startp = NULL;
2861 rex.reg_endp = NULL;
2862 rex.reg_startpos = NULL;
2863 rex.reg_endpos = NULL;
2864
2865 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002866 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002867
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002868 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002869 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2870 && result == NFA_TOO_EXPENSIVE)
2871 {
2872 int save_p_re = p_re;
2873 int re_flags = rmp->regprog->re_flags;
2874 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2875
2876 p_re = BACKTRACKING_ENGINE;
2877 vim_regfree(rmp->regprog);
2878 if (pat != NULL)
2879 {
2880#ifdef FEAT_EVAL
2881 report_re_switch(pat);
2882#endif
2883 rmp->regprog = vim_regcomp(pat, re_flags);
2884 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002885 {
2886 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002887 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002888 rmp->regprog->re_in_use = FALSE;
2889 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002890 vim_free(pat);
2891 }
2892
2893 p_re = save_p_re;
2894 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002895
2896 rex_in_use = rex_in_use_save;
2897 if (rex_in_use)
2898 rex = rex_save;
2899
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002900 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002901}
2902
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002903/*
2904 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002905 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002906 */
2907 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002908vim_regexec_prog(
2909 regprog_T **prog,
2910 int ignore_case,
2911 char_u *line,
2912 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002913{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002914 int r;
2915 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002916
2917 regmatch.regprog = *prog;
2918 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002919 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002920 *prog = regmatch.regprog;
2921 return r;
2922}
2923
2924/*
2925 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002926 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002927 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002928 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002929vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002930{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002931 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002932}
2933
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002934/*
2935 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002936 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002937 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002938 */
2939 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002940vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002941{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002942 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002943}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002944
2945/*
2946 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002947 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2948 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002949 * Uses curbuf for line count and 'iskeyword'.
2950 *
2951 * Return zero if there is no match. Return number of lines contained in the
2952 * match otherwise.
2953 */
2954 long
Bram Moolenaar05540972016-01-30 20:31:25 +01002955vim_regexec_multi(
2956 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002957 win_T *win, // window in which to search or NULL
2958 buf_T *buf, // buffer in which to search
2959 linenr_T lnum, // nr of line to start looking for match
2960 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002961 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002962{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002963 int result;
2964 regexec_T rex_save;
2965 int rex_in_use_save = rex_in_use;
2966
Bram Moolenaar0270f382018-07-17 05:43:58 +02002967 // Cannot use the same prog recursively, it contains state.
2968 if (rmp->regprog->re_in_use)
2969 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002970 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002971 return FALSE;
2972 }
2973 rmp->regprog->re_in_use = TRUE;
2974
Bram Moolenaar6100d022016-10-02 16:51:57 +02002975 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002976 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002977 rex_save = rex;
2978 rex_in_use = TRUE;
2979
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002980 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01002981 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002982 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002983
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002984 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002985 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2986 && result == NFA_TOO_EXPENSIVE)
2987 {
2988 int save_p_re = p_re;
2989 int re_flags = rmp->regprog->re_flags;
2990 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2991
2992 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002993 if (pat != NULL)
2994 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002995 regprog_T *prev_prog = rmp->regprog;
2996
Bram Moolenaarfda37292014-11-05 14:27:36 +01002997#ifdef FEAT_EVAL
2998 report_re_switch(pat);
2999#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003000#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003001 // checking for \z misuse was already done when compiling for NFA,
3002 // allow all here
3003 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003004#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01003005 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003006#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003007 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003008#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003009 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003010 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003011 // Somehow compiling the pattern failed now, put back the
3012 // previous one to avoid "regprog" becoming NULL.
3013 rmp->regprog = prev_prog;
3014 }
3015 else
3016 {
3017 vim_regfree(prev_prog);
3018
Bram Moolenaar41499802018-07-18 06:02:09 +02003019 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003020 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003021 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003022 rmp->regprog->re_in_use = FALSE;
3023 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003024 vim_free(pat);
3025 }
3026 p_re = save_p_re;
3027 }
3028
Bram Moolenaar6100d022016-10-02 16:51:57 +02003029 rex_in_use = rex_in_use_save;
3030 if (rex_in_use)
3031 rex = rex_save;
3032
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003033 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003034}