blob: 1bd126169066f6290c29f565e71100708b06ba2c [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
Bram Moolenaar155f2d12022-06-20 13:38:33 +010024static sig_atomic_t dummy_timeout_flag = 0;
25static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010026#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
Bram Moolenaar0f618382022-08-26 21:33:04 +010054static int timeout_nesting = 0;
55
56/*
57 * Start a timer that will cause the regexp to abort after "msec".
58 * This doesn't work well recursively. In case it happens anyway, the first
59 * set timeout will prevail, nested ones are ignored.
60 * The caller must make sure there is a matching disable_regexp_timeout() call!
61 */
Paul Ollis65745772022-06-05 16:55:54 +010062 void
63init_regexp_timeout(long msec)
64{
Bram Moolenaar0f618382022-08-26 21:33:04 +010065 if (timeout_nesting == 0)
66 timeout_flag = start_timeout(msec);
67 ++timeout_nesting;
Paul Ollis65745772022-06-05 16:55:54 +010068}
69
70 void
71disable_regexp_timeout(void)
72{
Bram Moolenaar0f618382022-08-26 21:33:04 +010073 if (timeout_nesting == 0)
74 iemsg("disable_regexp_timeout() called without active timer");
75 else if (--timeout_nesting == 0)
76 {
77 stop_timeout();
78 timeout_flag = &dummy_timeout_flag;
79 }
Paul Ollis65745772022-06-05 16:55:54 +010080}
81#endif
82
Bram Moolenaar071d4272004-06-13 20:20:40 +000083/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020084 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +000085 * number; the start node begins in the second byte. It's used to catch the
86 * most severe mutilation of the program by the caller.
87 */
88
89#define REGMAGIC 0234
90
91/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000092 * Utility definitions.
93 */
94#define UCHARAT(p) ((int)*(char_u *)(p))
95
Bram Moolenaar63d9e732019-12-05 21:10:38 +010096// Used for an error (down from) vim_regcomp(): give the error message, set
97// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010098#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
99#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
100#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
101#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +0100102#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100103#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +0000104#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000105
Bram Moolenaar95f09602016-11-10 20:01:45 +0100106
Bram Moolenaar071d4272004-06-13 20:20:40 +0000107#define MAX_LIMIT (32767L << 16L)
108
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109#define NOT_MULTI 0
110#define MULTI_ONE 1
111#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200112
113// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100114#define RA_FAIL 1 // something failed, abort
115#define RA_CONT 2 // continue in inner loop
116#define RA_BREAK 3 // break inner loop
117#define RA_MATCH 4 // successful match
118#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200119
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120/*
121 * Return NOT_MULTI if c is not a "multi" operator.
122 * Return MULTI_ONE if c is a single "multi" operator.
123 * Return MULTI_MULT if c is a multi "multi" operator.
124 */
125 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100126re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000127{
128 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
129 return MULTI_ONE;
130 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
131 return MULTI_MULT;
132 return NOT_MULTI;
133}
134
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000135static char_u *reg_prev_sub = NULL;
136
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137/*
138 * REGEXP_INRANGE contains all characters which are always special in a []
139 * range after '\'.
140 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
141 * These are:
142 * \n - New line (NL).
143 * \r - Carriage Return (CR).
144 * \t - Tab (TAB).
145 * \e - Escape (ESC).
146 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000147 * \d - Character code in decimal, eg \d123
148 * \o - Character code in octal, eg \o80
149 * \x - Character code in hex, eg \x4a
150 * \u - Multibyte character code, eg \u20ac
151 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000152 */
153static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000154static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000155
Bram Moolenaar071d4272004-06-13 20:20:40 +0000156/*
157 * Translate '\x' to its control character, except "\n", which is Magic.
158 */
159 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100160backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000161{
162 switch (c)
163 {
164 case 'r': return CAR;
165 case 't': return TAB;
166 case 'e': return ESC;
167 case 'b': return BS;
168 }
169 return c;
170}
171
172/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000173 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000174 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
175 * recognized. Otherwise "pp" is advanced to after the item.
176 */
177 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100178get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000179{
180 static const char *(class_names[]) =
181 {
182 "alnum:]",
183#define CLASS_ALNUM 0
184 "alpha:]",
185#define CLASS_ALPHA 1
186 "blank:]",
187#define CLASS_BLANK 2
188 "cntrl:]",
189#define CLASS_CNTRL 3
190 "digit:]",
191#define CLASS_DIGIT 4
192 "graph:]",
193#define CLASS_GRAPH 5
194 "lower:]",
195#define CLASS_LOWER 6
196 "print:]",
197#define CLASS_PRINT 7
198 "punct:]",
199#define CLASS_PUNCT 8
200 "space:]",
201#define CLASS_SPACE 9
202 "upper:]",
203#define CLASS_UPPER 10
204 "xdigit:]",
205#define CLASS_XDIGIT 11
206 "tab:]",
207#define CLASS_TAB 12
208 "return:]",
209#define CLASS_RETURN 13
210 "backspace:]",
211#define CLASS_BACKSPACE 14
212 "escape:]",
213#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100214 "ident:]",
215#define CLASS_IDENT 16
216 "keyword:]",
217#define CLASS_KEYWORD 17
218 "fname:]",
219#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000220 };
221#define CLASS_NONE 99
222 int i;
223
224 if ((*pp)[1] == ':')
225 {
K.Takataeeec2542021-06-02 13:28:16 +0200226 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000227 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
228 {
229 *pp += STRLEN(class_names[i]) + 2;
230 return i;
231 }
232 }
233 return CLASS_NONE;
234}
235
236/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000237 * Specific version of character class functions.
238 * Using a table to keep this fast.
239 */
240static short class_tab[256];
241
242#define RI_DIGIT 0x01
243#define RI_HEX 0x02
244#define RI_OCTAL 0x04
245#define RI_WORD 0x08
246#define RI_HEAD 0x10
247#define RI_ALPHA 0x20
248#define RI_LOWER 0x40
249#define RI_UPPER 0x80
250#define RI_WHITE 0x100
251
252 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100253init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254{
255 int i;
256 static int done = FALSE;
257
258 if (done)
259 return;
260
261 for (i = 0; i < 256; ++i)
262 {
263 if (i >= '0' && i <= '7')
264 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
265 else if (i >= '8' && i <= '9')
266 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
267 else if (i >= 'a' && i <= 'f')
268 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000269 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000270 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
271 else if (i >= 'A' && i <= 'F')
272 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000273 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000274 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
275 else if (i == '_')
276 class_tab[i] = RI_WORD + RI_HEAD;
277 else
278 class_tab[i] = 0;
279 }
280 class_tab[' '] |= RI_WHITE;
281 class_tab['\t'] |= RI_WHITE;
282 done = TRUE;
283}
284
kylo252ae6f1d82022-02-16 19:24:07 +0000285#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
286#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
287#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
288#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
289#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
290#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
291#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
292#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
293#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000294
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100295// flags for regflags
296#define RF_ICASE 1 // ignore case
297#define RF_NOICASE 2 // don't ignore case
298#define RF_HASNL 4 // can match a NL
299#define RF_ICOMBINE 8 // ignore combining characters
300#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000301
302/*
303 * Global work variables for vim_regcomp().
304 */
305
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100306static char_u *regparse; // Input-scan pointer.
307static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100308static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000309#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100310static int regnzpar; // \z() count.
311static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000312#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100313static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000314#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100315static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000316#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000317
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100318static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000319
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100320static int reg_string; // matching with a string instead of a buffer
321 // line
322static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323
324/*
325 * META contains all characters that may be magic, except '^' and '$'.
326 */
327
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100328// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000329static char_u META_flags[] = {
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100332// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000333 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100334// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000335 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100336// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000337 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100338// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000339 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100340// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000341 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100342// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
344};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100346static int curchr; // currently parsed character
347// Previous character. Note: prevchr is sometimes -1 when we are not at the
348// start, eg in /[ ^I]^ the pattern was never found even if it existed,
349// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200350static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100351static int prevprevchr; // previous-previous character
352static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100354// arguments for reg()
355#define REG_NOPAREN 0 // toplevel reg()
356#define REG_PAREN 1 // \(\)
357#define REG_ZPAREN 2 // \z(\)
358#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000359
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200360typedef struct
361{
362 char_u *regparse;
363 int prevchr_len;
364 int curchr;
365 int prevchr;
366 int prevprevchr;
367 int nextchr;
368 int at_start;
369 int prev_at_start;
370 int regnpar;
371} parse_state_T;
372
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100373static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100374static int getchr(void);
375static void skipchr_keepstart(void);
376static int peekchr(void);
377static void skipchr(void);
378static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100379static long gethexchrs(int maxinputlen);
380static long getoctchrs(void);
381static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100382static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100383static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200384static int cstrncmp(char_u *s1, char_u *s2, int *n);
385static char_u *cstrchr(char_u *, int);
386static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100387static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100388#ifdef FEAT_EVAL
389static void report_re_switch(char_u *pat);
390#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000391
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200392static regengine_T bt_regengine;
393static regengine_T nfa_regengine;
394
Bram Moolenaar071d4272004-06-13 20:20:40 +0000395/*
396 * Return TRUE if compiled regular expression "prog" can match a line break.
397 */
398 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100399re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000400{
401 return (prog->regflags & RF_HASNL);
402}
403
404/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000405 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
406 * Returns a character representing the class. Zero means that no item was
407 * recognized. Otherwise "pp" is advanced to after the item.
408 */
409 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100410get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000411{
412 int c;
413 int l = 1;
414 char_u *p = *pp;
415
Bram Moolenaar985079c2019-02-16 17:07:47 +0100416 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000417 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000418 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000419 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000420 if (p[l + 2] == '=' && p[l + 3] == ']')
421 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000422 if (has_mbyte)
423 c = mb_ptr2char(p + 2);
424 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000425 c = p[2];
426 *pp += l + 4;
427 return c;
428 }
429 }
430 return 0;
431}
432
433/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000434 * Check for a collating element "[.a.]". "pp" points to the '['.
435 * Returns a character. Zero means that no item was recognized. Otherwise
436 * "pp" is advanced to after the item.
437 * Currently only single characters are recognized!
438 */
439 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100440get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000441{
442 int c;
443 int l = 1;
444 char_u *p = *pp;
445
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100446 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000447 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000448 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000449 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000450 if (p[l + 2] == '.' && p[l + 3] == ']')
451 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000452 if (has_mbyte)
453 c = mb_ptr2char(p + 2);
454 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000455 c = p[2];
456 *pp += l + 4;
457 return c;
458 }
459 }
460 return 0;
461}
462
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100463static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
464static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200465
466 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100467get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200468{
469 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
470 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
471}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000472
473/*
474 * Skip over a "[]" range.
475 * "p" must point to the character after the '['.
476 * The returned pointer is on the matching ']', or the terminating NUL.
477 */
478 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100479skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000480{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000481 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000482
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100483 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000484 ++p;
485 if (*p == ']' || *p == '-')
486 ++p;
487 while (*p != NUL && *p != ']')
488 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000489 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000490 p += l;
491 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000492 if (*p == '-')
493 {
494 ++p;
495 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100496 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000497 }
498 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200499 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000500 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200501 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000502 p += 2;
503 else if (*p == '[')
504 {
505 if (get_char_class(&p) == CLASS_NONE
506 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200507 && get_coll_element(&p) == 0
508 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100509 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000510 }
511 else
512 ++p;
513 }
514
515 return p;
516}
517
518/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000519 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200520 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000521 * Take care of characters with a backslash in front of it.
522 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000523 */
524 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100525skip_regexp(
526 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200527 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200528 int magic)
529{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100530 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200531}
532
533/*
534 * Call skip_regexp() and when the delimiter does not match give an error and
535 * return NULL.
536 */
537 char_u *
538skip_regexp_err(
539 char_u *startp,
540 int delim,
541 int magic)
542{
543 char_u *p = skip_regexp(startp, delim, magic);
544
545 if (*p != delim)
546 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000547 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200548 return NULL;
549 }
550 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200551}
552
553/*
554 * skip_regexp() with extra arguments:
555 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
556 * expression and change "\?" to "?". If "*newp" is not NULL the expression
557 * is changed in-place.
558 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100559 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200560 */
561 char_u *
562skip_regexp_ex(
563 char_u *startp,
564 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100565 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200566 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100567 int *dropped,
568 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000569{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100570 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000571 char_u *p = startp;
572
573 if (magic)
574 mymagic = MAGIC_ON;
575 else
576 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200577 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000578
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100579 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000580 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100581 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000582 break;
583 if ((p[0] == '[' && mymagic >= MAGIC_ON)
584 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
585 {
586 p = skip_anyof(p + 1);
587 if (p[0] == NUL)
588 break;
589 }
590 else if (p[0] == '\\' && p[1] != NUL)
591 {
592 if (dirc == '?' && newp != NULL && p[1] == '?')
593 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100594 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595 if (*newp == NULL)
596 {
597 *newp = vim_strsave(startp);
598 if (*newp != NULL)
599 p = *newp + (p - startp);
600 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200601 if (dropped != NULL)
602 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000603 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000604 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000605 else
606 ++p;
607 }
608 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100609 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000610 if (*p == 'v')
611 mymagic = MAGIC_ALL;
612 else if (*p == 'V')
613 mymagic = MAGIC_NONE;
614 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000615 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100616 if (magic_val != NULL)
617 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000618 return p;
619}
620
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200621/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200622 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200623 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100624static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200625static int at_start; // True when on the first character
626static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100627
Bram Moolenaar071d4272004-06-13 20:20:40 +0000628/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200629 * Start parsing at "str".
630 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000631 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100632initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000633{
634 regparse = str;
635 prevchr_len = 0;
636 curchr = prevprevchr = prevchr = nextchr = -1;
637 at_start = TRUE;
638 prev_at_start = FALSE;
639}
640
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200641/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200642 * Save the current parse state, so that it can be restored and parsing
643 * starts in the same state again.
644 */
645 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100646save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200647{
648 ps->regparse = regparse;
649 ps->prevchr_len = prevchr_len;
650 ps->curchr = curchr;
651 ps->prevchr = prevchr;
652 ps->prevprevchr = prevprevchr;
653 ps->nextchr = nextchr;
654 ps->at_start = at_start;
655 ps->prev_at_start = prev_at_start;
656 ps->regnpar = regnpar;
657}
658
659/*
660 * Restore a previously saved parse state.
661 */
662 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100663restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200664{
665 regparse = ps->regparse;
666 prevchr_len = ps->prevchr_len;
667 curchr = ps->curchr;
668 prevchr = ps->prevchr;
669 prevprevchr = ps->prevprevchr;
670 nextchr = ps->nextchr;
671 at_start = ps->at_start;
672 prev_at_start = ps->prev_at_start;
673 regnpar = ps->regnpar;
674}
675
676
677/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200678 * Get the next character without advancing.
679 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100681peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000682{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000683 static int after_slash = FALSE;
684
Bram Moolenaar071d4272004-06-13 20:20:40 +0000685 if (curchr == -1)
686 {
687 switch (curchr = regparse[0])
688 {
689 case '.':
690 case '[':
691 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100692 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000693 if (reg_magic >= MAGIC_ON)
694 curchr = Magic(curchr);
695 break;
696 case '(':
697 case ')':
698 case '{':
699 case '%':
700 case '+':
701 case '=':
702 case '?':
703 case '@':
704 case '!':
705 case '&':
706 case '|':
707 case '<':
708 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100709 case '#': // future ext.
710 case '"': // future ext.
711 case '\'': // future ext.
712 case ',': // future ext.
713 case '-': // future ext.
714 case ':': // future ext.
715 case ';': // future ext.
716 case '`': // future ext.
717 case '/': // Can't be used in / command
718 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000719 if (reg_magic == MAGIC_ALL)
720 curchr = Magic(curchr);
721 break;
722 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100723 // * is not magic as the very first character, eg "?*ptr", when
724 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
725 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000726 if (reg_magic >= MAGIC_ON
727 && !at_start
728 && !(prev_at_start && prevchr == Magic('^'))
729 && (after_slash
730 || (prevchr != Magic('(')
731 && prevchr != Magic('&')
732 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000733 curchr = Magic('*');
734 break;
735 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100736 // '^' is only magic as the very first character and if it's after
737 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000738 if (reg_magic >= MAGIC_OFF
739 && (at_start
740 || reg_magic == MAGIC_ALL
741 || prevchr == Magic('(')
742 || prevchr == Magic('|')
743 || prevchr == Magic('&')
744 || prevchr == Magic('n')
745 || (no_Magic(prevchr) == '('
746 && prevprevchr == Magic('%'))))
747 {
748 curchr = Magic('^');
749 at_start = TRUE;
750 prev_at_start = FALSE;
751 }
752 break;
753 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100754 // '$' is only magic as the very last char and if it's in front of
755 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000756 if (reg_magic >= MAGIC_OFF)
757 {
758 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200759 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000760
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100761 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000762 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200763 || p[1] == 'm' || p[1] == 'M'
764 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
765 {
766 if (p[1] == 'v')
767 is_magic_all = TRUE;
768 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
769 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000770 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200771 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000772 if (p[0] == NUL
773 || (p[0] == '\\'
774 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
775 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200776 || (is_magic_all
777 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000778 || reg_magic == MAGIC_ALL)
779 curchr = Magic('$');
780 }
781 break;
782 case '\\':
783 {
784 int c = regparse[1];
785
786 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100787 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000788 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789 {
790 /*
791 * META contains everything that may be magic sometimes,
792 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200793 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000794 * magicness. Therefore, \ is so meta-magic that it is
795 * not in META.
796 */
797 curchr = -1;
798 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100799 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000800 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000801 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000802 peekchr();
803 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000804 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000805 curchr = toggle_Magic(curchr);
806 }
807 else if (vim_strchr(REGEXP_ABBR, c))
808 {
809 /*
810 * Handle abbreviations, like "\t" for TAB -- webb
811 */
812 curchr = backslash_trans(c);
813 }
814 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
815 curchr = toggle_Magic(c);
816 else
817 {
818 /*
819 * Next character can never be (made) magic?
820 * Then backslashing it won't do anything.
821 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822 if (has_mbyte)
823 curchr = (*mb_ptr2char)(regparse + 1);
824 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000825 curchr = c;
826 }
827 break;
828 }
829
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830 default:
831 if (has_mbyte)
832 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000833 }
834 }
835
836 return curchr;
837}
838
839/*
840 * Eat one lexed character. Do this in a way that we can undo it.
841 */
842 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100843skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000844{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100845 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000846 if (*regparse == '\\')
847 prevchr_len = 1;
848 else
849 prevchr_len = 0;
850 if (regparse[prevchr_len] != NUL)
851 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000852 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100853 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000854 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000855 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000856 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000857 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000858 ++prevchr_len;
859 }
860 regparse += prevchr_len;
861 prev_at_start = at_start;
862 at_start = FALSE;
863 prevprevchr = prevchr;
864 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100865 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000866 nextchr = -1;
867}
868
869/*
870 * Skip a character while keeping the value of prev_at_start for at_start.
871 * prevchr and prevprevchr are also kept.
872 */
873 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100874skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875{
876 int as = prev_at_start;
877 int pr = prevchr;
878 int prpr = prevprevchr;
879
880 skipchr();
881 at_start = as;
882 prevchr = pr;
883 prevprevchr = prpr;
884}
885
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200886/*
887 * Get the next character from the pattern. We know about magic and such, so
888 * therefore we need a lexical analyzer.
889 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000890 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100891getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000892{
893 int chr = peekchr();
894
895 skipchr();
896 return chr;
897}
898
899/*
900 * put character back. Works only once!
901 */
902 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100903ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000904{
905 nextchr = curchr;
906 curchr = prevchr;
907 prevchr = prevprevchr;
908 at_start = prev_at_start;
909 prev_at_start = FALSE;
910
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100911 // Backup regparse, so that it's at the same position as before the
912 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000913 regparse -= prevchr_len;
914}
915
916/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000917 * Get and return the value of the hex string at the current position.
918 * Return -1 if there is no valid hex number.
919 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000920 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000921 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000922 * The parameter controls the maximum number of input characters. This will be
923 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
924 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100925 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100926gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000927{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100928 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000929 int c;
930 int i;
931
932 for (i = 0; i < maxinputlen; ++i)
933 {
934 c = regparse[0];
935 if (!vim_isxdigit(c))
936 break;
937 nr <<= 4;
938 nr |= hex2nr(c);
939 ++regparse;
940 }
941
942 if (i == 0)
943 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100944 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000945}
946
947/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200948 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000949 * current position. Return -1 for invalid. Consumes all digits.
950 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100951 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100952getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000953{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100954 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000955 int c;
956 int i;
957
958 for (i = 0; ; ++i)
959 {
960 c = regparse[0];
961 if (c < '0' || c > '9')
962 break;
963 nr *= 10;
964 nr += c - '0';
965 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100966 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000967 }
968
969 if (i == 0)
970 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100971 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000972}
973
974/*
975 * get and return the value of the octal string immediately after the current
976 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
977 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
978 * treat 8 or 9 as recognised characters. Position is updated:
979 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000980 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000981 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100982 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100983getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000984{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100985 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000986 int c;
987 int i;
988
989 for (i = 0; i < 3 && nr < 040; ++i)
990 {
991 c = regparse[0];
992 if (c < '0' || c > '7')
993 break;
994 nr <<= 3;
995 nr |= hex2nr(c);
996 ++regparse;
997 }
998
999 if (i == 0)
1000 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001001 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001002}
1003
1004/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001005 * read_limits - Read two integers to be taken as a minimum and maximum.
1006 * If the first character is '-', then the range is reversed.
1007 * Should end with 'end'. If minval is missing, zero is default, if maxval is
1008 * missing, a very big number is the default.
1009 */
1010 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001011read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001012{
1013 int reverse = FALSE;
1014 char_u *first_char;
1015 long tmp;
1016
1017 if (*regparse == '-')
1018 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001019 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001020 regparse++;
1021 reverse = TRUE;
1022 }
1023 first_char = regparse;
1024 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001025 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001026 {
1027 if (vim_isdigit(*++regparse))
1028 *maxval = getdigits(&regparse);
1029 else
1030 *maxval = MAX_LIMIT;
1031 }
1032 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001033 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001034 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001035 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001036 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001037 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001038 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001039 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001040 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001041
1042 /*
1043 * Reverse the range if there was a '-', or make sure it is in the right
1044 * order otherwise.
1045 */
1046 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1047 {
1048 tmp = *minval;
1049 *minval = *maxval;
1050 *maxval = tmp;
1051 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001052 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001053 return OK;
1054}
1055
1056/*
1057 * vim_regexec and friends
1058 */
1059
1060/*
1061 * Global work variables for vim_regexec().
1062 */
1063
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001064static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001065#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001066static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001067#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001068static void reg_nextline(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001069static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001070
1071/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001072 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1073 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001074 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001075 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001076static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001077static unsigned reg_tofreelen;
1078
1079/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001080 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001081 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001082 * done:
1083 * single-line multi-line
1084 * reg_match &regmatch_T NULL
1085 * reg_mmatch NULL &regmmatch_T
1086 * reg_startp reg_match->startp <invalid>
1087 * reg_endp reg_match->endp <invalid>
1088 * reg_startpos <invalid> reg_mmatch->startpos
1089 * reg_endpos <invalid> reg_mmatch->endpos
1090 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001091 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001092 * reg_firstlnum <invalid> first line in which to search
1093 * reg_maxline 0 last line nr
1094 * reg_line_lbr FALSE or TRUE FALSE
1095 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001096typedef struct {
1097 regmatch_T *reg_match;
1098 regmmatch_T *reg_mmatch;
1099 char_u **reg_startp;
1100 char_u **reg_endp;
1101 lpos_T *reg_startpos;
1102 lpos_T *reg_endpos;
1103 win_T *reg_win;
1104 buf_T *reg_buf;
1105 linenr_T reg_firstlnum;
1106 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001107 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001108
Bram Moolenaar0270f382018-07-17 05:43:58 +02001109 // The current match-position is stord in these variables:
1110 linenr_T lnum; // line number, relative to first line
1111 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001112 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001113
1114 int need_clear_subexpr; // subexpressions still need to be cleared
1115#ifdef FEAT_SYN_HL
1116 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1117 // cleared
1118#endif
1119
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001120 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1121 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1122 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001123 int reg_ic;
1124
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001125 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1126 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001127 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001128
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001129 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1130 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001131 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001132
1133 // State for the NFA engine regexec.
1134 int nfa_has_zend; // NFA regexp \ze operator encountered.
1135 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1136 int nfa_nsubexpr; // Number of sub expressions actually being used
1137 // during execution. 1 if only the whole match
1138 // (subexpr 0) is used.
1139 // listid is global, so that it increases on recursive calls to
1140 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1141 // all the states.
1142 int nfa_listid;
1143 int nfa_alt_listid;
1144
1145#ifdef FEAT_SYN_HL
1146 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1147#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001148} regexec_T;
1149
1150static regexec_T rex;
1151static int rex_in_use = FALSE;
1152
Bram Moolenaar071d4272004-06-13 20:20:40 +00001153/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001154 * Return TRUE if character 'c' is included in 'iskeyword' option for
1155 * "reg_buf" buffer.
1156 */
1157 static int
1158reg_iswordc(int c)
1159{
1160 return vim_iswordc_buf(c, rex.reg_buf);
1161}
1162
1163/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001164 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1165 */
1166 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001167reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001168{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001169 // when looking behind for a match/no-match lnum is negative. But we
1170 // can't go before line 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001171 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001172 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001173 if (lnum > rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001174 // Must have matched the "\n" in the last line.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001175 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001176 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001177}
1178
Bram Moolenaar071d4272004-06-13 20:20:40 +00001179#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001180static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1181static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1182static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1183static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001184#endif
1185
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001186// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001187#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001188
Bram Moolenaar071d4272004-06-13 20:20:40 +00001189#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001190/*
1191 * Create a new extmatch and mark it as referenced once.
1192 */
1193 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001194make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001195{
1196 reg_extmatch_T *em;
1197
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001198 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001199 if (em != NULL)
1200 em->refcnt = 1;
1201 return em;
1202}
1203
1204/*
1205 * Add a reference to an extmatch.
1206 */
1207 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001208ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001209{
1210 if (em != NULL)
1211 em->refcnt++;
1212 return em;
1213}
1214
1215/*
1216 * Remove a reference to an extmatch. If there are no references left, free
1217 * the info.
1218 */
1219 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001220unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001221{
1222 int i;
1223
1224 if (em != NULL && --em->refcnt <= 0)
1225 {
1226 for (i = 0; i < NSUBEXP; ++i)
1227 vim_free(em->matches[i]);
1228 vim_free(em);
1229 }
1230}
1231#endif
1232
1233/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001234 * Get class of previous character.
1235 */
1236 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001237reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001239 if (rex.input > rex.line)
1240 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001241 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001242 return -1;
1243}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001244
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001245/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001246 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001247 */
1248 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001249reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001250{
1251 pos_T top, bot;
1252 linenr_T lnum;
1253 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001254 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001255 int mode;
1256 colnr_T start, end;
1257 colnr_T start2, end2;
1258 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001259 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001260
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001261 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001262 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001263 return FALSE;
1264
1265 if (VIsual_active)
1266 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001267 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001268 {
1269 top = VIsual;
1270 bot = wp->w_cursor;
1271 }
1272 else
1273 {
1274 top = wp->w_cursor;
1275 bot = VIsual;
1276 }
1277 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001278 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001279 }
1280 else
1281 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001282 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001283 {
1284 top = curbuf->b_visual.vi_start;
1285 bot = curbuf->b_visual.vi_end;
1286 }
1287 else
1288 {
1289 top = curbuf->b_visual.vi_end;
1290 bot = curbuf->b_visual.vi_start;
1291 }
1292 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001293 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001294 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001295 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001296 if (lnum < top.lnum || lnum > bot.lnum)
1297 return FALSE;
1298
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001299 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001300 if (mode == 'v')
1301 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001302 if ((lnum == top.lnum && col < top.col)
1303 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1304 return FALSE;
1305 }
1306 else if (mode == Ctrl_V)
1307 {
1308 getvvcol(wp, &top, &start, NULL, &end);
1309 getvvcol(wp, &bot, &start2, NULL, &end2);
1310 if (start2 < start)
1311 start = start2;
1312 if (end2 > end)
1313 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001314 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001315 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001316
1317 // getvvcol() flushes rex.line, need to get it again
1318 rex.line = reg_getline(rex.lnum);
1319 rex.input = rex.line + col;
1320
Bram Moolenaar7f9969c2022-07-25 18:13:54 +01001321 cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001322 if (cols < start || cols > end - (*p_sel == 'e'))
1323 return FALSE;
1324 }
1325 return TRUE;
1326}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001327
Bram Moolenaar071d4272004-06-13 20:20:40 +00001328/*
1329 * Check the regexp program for its magic number.
1330 * Return TRUE if it's wrong.
1331 */
1332 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001333prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001335 regprog_T *prog;
1336
Bram Moolenaar6100d022016-10-02 16:51:57 +02001337 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001338 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001339 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001340 return FALSE;
1341
1342 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001343 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001344 emsg(_(e_corrupted_regexp_program));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001345 return TRUE;
1346 }
1347 return FALSE;
1348}
1349
1350/*
1351 * Cleanup the subexpressions, if this wasn't done yet.
1352 * This construction is used to clear the subexpressions only when they are
1353 * used (to increase speed).
1354 */
1355 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001356cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001357{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001358 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001359 {
1360 if (REG_MULTI)
1361 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001362 // Use 0xff to set lnum to -1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001363 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1364 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001365 }
1366 else
1367 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001368 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1369 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001370 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001371 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372 }
1373}
1374
1375#ifdef FEAT_SYN_HL
1376 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001377cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001378{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001379 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001380 {
1381 if (REG_MULTI)
1382 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001383 // Use 0xff to set lnum to -1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001384 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1385 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1386 }
1387 else
1388 {
1389 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1390 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1391 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001392 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001393 }
1394}
1395#endif
1396
1397/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001398 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001399 */
1400 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001401reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001402{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001403 rex.line = reg_getline(++rex.lnum);
1404 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001405 fast_breakcheck();
1406}
1407
1408/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001409 * Check whether a backreference matches.
1410 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001411 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1412 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001413 */
1414 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001415match_with_backref(
1416 linenr_T start_lnum,
1417 colnr_T start_col,
1418 linenr_T end_lnum,
1419 colnr_T end_col,
1420 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001421{
1422 linenr_T clnum = start_lnum;
1423 colnr_T ccol = start_col;
1424 int len;
1425 char_u *p;
1426
1427 if (bytelen != NULL)
1428 *bytelen = 0;
1429 for (;;)
1430 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001431 // Since getting one line may invalidate the other, need to make copy.
1432 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001433 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001434 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001435 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001436 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1437 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001438 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001439 vim_free(reg_tofree);
1440 reg_tofree = alloc(len);
1441 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001442 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001443 reg_tofreelen = len;
1444 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001445 STRCPY(reg_tofree, rex.line);
1446 rex.input = reg_tofree + (rex.input - rex.line);
1447 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001448 }
1449
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001450 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001451 p = reg_getline(clnum);
1452 if (clnum == end_lnum)
1453 len = end_col - ccol;
1454 else
1455 len = (int)STRLEN(p + ccol);
1456
Bram Moolenaar0270f382018-07-17 05:43:58 +02001457 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001458 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001459 if (bytelen != NULL)
1460 *bytelen += len;
1461 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001462 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001463 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001464 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001465
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001466 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001467 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001468 if (bytelen != NULL)
1469 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001470 ++clnum;
1471 ccol = 0;
1472 if (got_int)
1473 return RA_FAIL;
1474 }
1475
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001476 // found a match! Note that rex.line may now point to a copy of the line,
1477 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001478 return RA_MATCH;
1479}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001480
Bram Moolenaarfb031402014-09-09 17:18:49 +02001481/*
1482 * Used in a place where no * or \+ can follow.
1483 */
1484 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001485re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001486{
1487 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001488 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001489 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001490 rc_did_emsg = TRUE;
1491 return FAIL;
1492 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001493 return OK;
1494}
1495
Bram Moolenaar071d4272004-06-13 20:20:40 +00001496typedef struct
1497{
1498 int a, b, c;
1499} decomp_T;
1500
1501
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001502// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001503static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001504{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001505 {0x5e2,0,0}, // 0xfb20 alt ayin
1506 {0x5d0,0,0}, // 0xfb21 alt alef
1507 {0x5d3,0,0}, // 0xfb22 alt dalet
1508 {0x5d4,0,0}, // 0xfb23 alt he
1509 {0x5db,0,0}, // 0xfb24 alt kaf
1510 {0x5dc,0,0}, // 0xfb25 alt lamed
1511 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1512 {0x5e8,0,0}, // 0xfb27 alt resh
1513 {0x5ea,0,0}, // 0xfb28 alt tav
1514 {'+', 0, 0}, // 0xfb29 alt plus
1515 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1516 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1517 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1518 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1519 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1520 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1521 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1522 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1523 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1524 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1525 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1526 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1527 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1528 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1529 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1530 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1531 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1532 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1533 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1534 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1535 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1536 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1537 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1538 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1539 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1540 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1541 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1542 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1543 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1544 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1545 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1546 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1547 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1548 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1549 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1550 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1551 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1552 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001553};
1554
1555 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001556mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001557{
1558 decomp_T d;
1559
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001560 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001561 {
1562 d = decomp_table[c - 0xfb20];
1563 *c1 = d.a;
1564 *c2 = d.b;
1565 *c3 = d.c;
1566 }
1567 else
1568 {
1569 *c1 = c;
1570 *c2 = *c3 = 0;
1571 }
1572}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001573
1574/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001575 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001576 * Return 0 if strings match, non-zero otherwise.
1577 * Correct the length "*n" when composing characters are ignored.
1578 */
1579 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001580cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001581{
1582 int result;
1583
Bram Moolenaar6100d022016-10-02 16:51:57 +02001584 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001585 result = STRNCMP(s1, s2, *n);
1586 else
1587 result = MB_STRNICMP(s1, s2, *n);
1588
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001589 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001590 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001591 {
1592 char_u *str1, *str2;
1593 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001594 int junk;
1595
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001596 // we have to handle the strcmp ourselves, since it is necessary to
1597 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001598 str1 = s1;
1599 str2 = s2;
1600 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001601 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001602 {
1603 c1 = mb_ptr2char_adv(&str1);
1604 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001605
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001606 // Decompose the character if necessary, into 'base' characters.
1607 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001608 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001610 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001611 mb_decompose(c1, &c11, &junk, &junk);
1612 mb_decompose(c2, &c12, &junk, &junk);
1613 c1 = c11;
1614 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001615 if (c11 != c12
1616 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001617 break;
1618 }
1619 }
1620 result = c2 - c1;
1621 if (result == 0)
1622 *n = (int)(str2 - s2);
1623 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001624
1625 return result;
1626}
1627
1628/*
1629 * cstrchr: This function is used a lot for simple searches, keep it fast!
1630 */
1631 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001632cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001633{
1634 char_u *p;
1635 int cc;
1636
Bram Moolenaara12a1612019-01-24 16:39:02 +01001637 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001638 return vim_strchr(s, c);
1639
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001640 // tolower() and toupper() can be slow, comparing twice should be a lot
1641 // faster (esp. when using MS Visual C++!).
1642 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001643 if (enc_utf8 && c > 0x80)
1644 cc = utf_fold(c);
1645 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001646 if (MB_ISUPPER(c))
1647 cc = MB_TOLOWER(c);
1648 else if (MB_ISLOWER(c))
1649 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001650 else
1651 return vim_strchr(s, c);
1652
Bram Moolenaar071d4272004-06-13 20:20:40 +00001653 if (has_mbyte)
1654 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001655 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001656 {
1657 if (enc_utf8 && c > 0x80)
1658 {
Bram Moolenaarf5094052022-07-29 16:22:25 +01001659 int uc = utf_ptr2char(p);
1660
1661 // Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
1662 // not 0xff.
1663 if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001664 return p;
1665 }
1666 else if (*p == c || *p == cc)
1667 return p;
1668 }
1669 }
1670 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001671 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001672 for (p = s; *p != NUL; ++p)
1673 if (*p == c || *p == cc)
1674 return p;
1675
1676 return NULL;
1677}
1678
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001679////////////////////////////////////////////////////////////////
1680// regsub stuff //
1681////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001682
Bram Moolenaar071d4272004-06-13 20:20:40 +00001683/*
1684 * We should define ftpr as a pointer to a function returning a pointer to
1685 * a function returning a pointer to a function ...
1686 * This is impossible, so we declare a pointer to a function returning a
Bram Moolenaar30d64132020-09-06 17:09:12 +02001687 * void pointer. This should work for all compilers.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001688 */
Bram Moolenaar30d64132020-09-06 17:09:12 +02001689typedef void (*(*fptr_T)(int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001690
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001691static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001692
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001693 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001694do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001695{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001696 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001697
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001698 return (fptr_T)NULL;
1699}
1700
1701 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001702do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001703{
1704 *d = MB_TOUPPER(c);
1705
1706 return (fptr_T)do_Upper;
1707}
1708
1709 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001710do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001711{
1712 *d = MB_TOLOWER(c);
1713
1714 return (fptr_T)NULL;
1715}
1716
1717 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001718do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001719{
1720 *d = MB_TOLOWER(c);
1721
1722 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001723}
1724
1725/*
1726 * regtilde(): Replace tildes in the pattern by the old pattern.
1727 *
1728 * Short explanation of the tilde: It stands for the previous replacement
1729 * pattern. If that previous pattern also contains a ~ we should go back a
1730 * step further... But we insert the previous pattern into the current one
1731 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001732 * This still does not handle the case where "magic" changes. So require the
1733 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001734 *
1735 * The tildes are parsed once before the first call to vim_regsub().
1736 */
1737 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001738regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001739{
1740 char_u *newsub = source;
1741 char_u *tmpsub;
1742 char_u *p;
1743 int len;
1744 int prevlen;
1745
1746 for (p = newsub; *p; ++p)
1747 {
1748 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1749 {
1750 if (reg_prev_sub != NULL)
1751 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001752 // length = len(newsub) - 1 + len(prev_sub) + 1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001753 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02001754 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001755 if (tmpsub != NULL)
1756 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001757 // copy prefix
1758 len = (int)(p - newsub); // not including ~
Bram Moolenaar071d4272004-06-13 20:20:40 +00001759 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001760 // interpret tilde
Bram Moolenaar071d4272004-06-13 20:20:40 +00001761 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001762 // copy postfix
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 if (!magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001764 ++p; // back off backslash
Bram Moolenaar071d4272004-06-13 20:20:40 +00001765 STRCPY(tmpsub + len + prevlen, p + 1);
1766
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001767 if (newsub != source) // already allocated newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001768 vim_free(newsub);
1769 newsub = tmpsub;
1770 p = newsub + len + prevlen;
1771 }
1772 }
1773 else if (magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001774 STRMOVE(p, p + 1); // remove '~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001775 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001776 STRMOVE(p, p + 2); // remove '\~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001777 --p;
1778 }
1779 else
1780 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001781 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001782 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001783 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001784 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001785 }
1786 }
1787
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001788 // Store a copy of newsub in reg_prev_sub. It is always allocated,
1789 // because recursive calls may make the returned string invalid.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001790 vim_free(reg_prev_sub);
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001791 reg_prev_sub = vim_strsave(newsub);
1792
Bram Moolenaar071d4272004-06-13 20:20:40 +00001793 return newsub;
1794}
1795
1796#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001797static int can_f_submatch = FALSE; // TRUE when submatch() can be used
Bram Moolenaar071d4272004-06-13 20:20:40 +00001798
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001799// These pointers are used for reg_submatch(). Needed for when the
1800// substitution string is an expression that contains a call to substitute()
1801// and submatch().
Bram Moolenaar6100d022016-10-02 16:51:57 +02001802typedef struct {
1803 regmatch_T *sm_match;
1804 regmmatch_T *sm_mmatch;
1805 linenr_T sm_firstlnum;
1806 linenr_T sm_maxline;
1807 int sm_line_lbr;
1808} regsubmatch_T;
1809
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001810static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811#endif
1812
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001813#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001814
1815/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001816 * Put the submatches in "argv[argskip]" which is a list passed into
1817 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001818 */
1819 static int
zeertzjq48db5da2022-09-16 12:10:03 +01001820fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001821{
1822 listitem_T *li;
1823 int i;
1824 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001825 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001826
zeertzjq48db5da2022-09-16 12:10:03 +01001827 if (!fp->uf_varargs && fp->uf_args.ga_len <= argskip)
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001828 // called function doesn't take a submatches argument
1829 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001830
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001831 // Relies on sl_list to be the first item in staticList10_T.
1832 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001833
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001834 // There are always 10 list items in staticList10_T.
1835 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001836 for (i = 0; i < 10; ++i)
1837 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001838 s = rsm.sm_match->startp[i];
1839 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001840 s = NULL;
1841 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02001842 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001843 li->li_tv.v_type = VAR_STRING;
1844 li->li_tv.vval.v_string = s;
1845 li = li->li_next;
1846 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001847 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001848}
1849
1850 static void
1851clear_submatch_list(staticList10_T *sl)
1852{
1853 int i;
1854
1855 for (i = 0; i < 10; ++i)
1856 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1857}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001858#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001859
Bram Moolenaar071d4272004-06-13 20:20:40 +00001860/*
1861 * vim_regsub() - perform substitutions after a vim_regexec() or
1862 * vim_regexec_multi() match.
1863 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001864 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
1865 * Oterwise nothing is copied, only compue the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001866 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001867 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
1868 *
1869 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
1870 * double them to keep them, and insert a backslash before a CR to avoid it
1871 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001872 *
1873 * Note: The matched text must not change between the call of
1874 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1875 * references invalid!
1876 *
1877 * Returns the size of the replacement, including terminating NUL.
1878 */
1879 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001880vim_regsub(
1881 regmatch_T *rmp,
1882 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001883 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001884 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001885 int destlen,
1886 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001887{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001888 int result;
1889 regexec_T rex_save;
1890 int rex_in_use_save = rex_in_use;
1891
1892 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001893 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001894 rex_save = rex;
1895 rex_in_use = TRUE;
1896
1897 rex.reg_match = rmp;
1898 rex.reg_mmatch = NULL;
1899 rex.reg_maxline = 0;
1900 rex.reg_buf = curbuf;
1901 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001902 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001903
1904 rex_in_use = rex_in_use_save;
1905 if (rex_in_use)
1906 rex = rex_save;
1907
1908 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001909}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001910
1911 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001912vim_regsub_multi(
1913 regmmatch_T *rmp,
1914 linenr_T lnum,
1915 char_u *source,
1916 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001917 int destlen,
1918 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001919{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001920 int result;
1921 regexec_T rex_save;
1922 int rex_in_use_save = rex_in_use;
1923
1924 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001925 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001926 rex_save = rex;
1927 rex_in_use = TRUE;
1928
1929 rex.reg_match = NULL;
1930 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001931 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02001932 rex.reg_firstlnum = lnum;
1933 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1934 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001935 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001936
1937 rex_in_use = rex_in_use_save;
1938 if (rex_in_use)
1939 rex = rex_save;
1940
1941 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001942}
1943
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001944#if defined(FEAT_EVAL) || defined(PROTO)
1945// When nesting more than a couple levels it's probably a mistake.
1946# define MAX_REGSUB_NESTING 4
1947static char_u *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
1948
1949# if defined(EXITFREE) || defined(PROTO)
1950 void
1951free_resub_eval_result(void)
1952{
1953 int i;
1954
1955 for (i = 0; i < MAX_REGSUB_NESTING; ++i)
1956 VIM_CLEAR(eval_result[i]);
1957}
1958# endif
1959#endif
1960
Bram Moolenaar071d4272004-06-13 20:20:40 +00001961 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001962vim_regsub_both(
1963 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001964 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001965 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001966 int destlen,
1967 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001968{
1969 char_u *src;
1970 char_u *dst;
1971 char_u *s;
1972 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001973 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001974 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01001975 fptr_T func_all = (fptr_T)NULL;
1976 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001977 linenr_T clnum = 0; // init for GCC
1978 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00001979#ifdef FEAT_EVAL
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001980 static int nesting = 0;
1981 int nested;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001982#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001983 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001984
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001985 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001986 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001987 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001988 emsg(_(e_null_argument));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001989 return 0;
1990 }
1991 if (prog_magic_wrong())
1992 return 0;
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001993#ifdef FEAT_EVAL
1994 if (nesting == MAX_REGSUB_NESTING)
1995 {
1996 emsg(_(e_substitute_nesting_too_deep));
1997 return 0;
1998 }
1999 nested = nesting;
2000#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002001 src = source;
2002 dst = dest;
2003
2004 /*
2005 * When the substitute part starts with "\=" evaluate it as an expression.
2006 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002007 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002008 {
2009#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002010 // To make sure that the length doesn't change between checking the
2011 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01002012 // resulting string is saved from the call with
2013 // "flags & REGSUB_COPY" == 0 to the call with
2014 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002015 if (copy)
2016 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002017 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002018 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002019 STRCPY(dest, eval_result[nested]);
2020 dst += STRLEN(eval_result[nested]);
2021 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002022 }
2023 }
2024 else
2025 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002026 int prev_can_f_submatch = can_f_submatch;
2027 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002028
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002029 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002030
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002031 // The expression may contain substitute(), which calls us
2032 // recursively. Make sure submatch() gets the text from the first
2033 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002034 if (can_f_submatch)
2035 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002036 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002037 rsm.sm_match = rex.reg_match;
2038 rsm.sm_mmatch = rex.reg_mmatch;
2039 rsm.sm_firstlnum = rex.reg_firstlnum;
2040 rsm.sm_maxline = rex.reg_maxline;
2041 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002042
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002043 // Although unlikely, it is possible that the expression invokes a
2044 // substitute command (it might fail, but still). Therefore keep
Bram Moolenaarabd56da2022-06-23 20:46:27 +01002045 // an array of eval results.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002046 ++nesting;
2047
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002048 if (expr != NULL)
2049 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002050 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002051 char_u buf[NUMBUFLEN];
2052 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002053 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002054 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002055
2056 rettv.v_type = VAR_STRING;
2057 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002058 argv[0].v_type = VAR_LIST;
2059 argv[0].vval.v_list = &matchList.sl_list;
2060 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002061 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002062 funcexe.fe_argv_func = fill_submatch_list;
2063 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002064 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002065 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002066 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002067 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002068 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002069 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002070 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002071 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002072
Bram Moolenaar6100d022016-10-02 16:51:57 +02002073 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002074 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002075 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002076 }
LemonBoyf3b48952022-05-05 13:53:03 +01002077 else if (expr->v_type == VAR_INSTR)
2078 {
2079 exe_typval_instr(expr, &rettv);
2080 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002081 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002082 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002083 clear_submatch_list(&matchList);
2084
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002085 if (rettv.v_type == VAR_UNKNOWN)
2086 // something failed, no need to report another error
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002087 eval_result[nested] = NULL;
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002088 else
2089 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002090 eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
2091 if (eval_result[nested] != NULL)
2092 eval_result[nested] = vim_strsave(eval_result[nested]);
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002093 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002094 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002095 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002096 else if (substitute_instr != NULL)
2097 // Execute instructions from ISN_SUBSTITUTE.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002098 eval_result[nested] = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002099 else
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002100 eval_result[nested] = eval_to_string(source + 2, TRUE);
2101 --nesting;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002102
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002103 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002104 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002105 int had_backslash = FALSE;
2106
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002107 for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002108 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002109 // Change NL to CR, so that it becomes a line break,
2110 // unless called from vim_regexec_nl().
2111 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002112 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002113 *s = CAR;
2114 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002115 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002116 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002117 /* Change NL to CR here too, so that this works:
2118 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2119 * abc\
2120 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002121 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002122 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002123 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002124 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002125 had_backslash = TRUE;
2126 }
2127 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002128 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002129 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002130 // Backslashes will be consumed, need to double them.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002131 s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
Bram Moolenaar06975a42010-03-23 16:27:22 +01002132 if (s != NULL)
2133 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002134 vim_free(eval_result[nested]);
2135 eval_result[nested] = s;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002136 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002137 }
2138
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002139 dst += STRLEN(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002140 }
2141
Bram Moolenaar6100d022016-10-02 16:51:57 +02002142 can_f_submatch = prev_can_f_submatch;
2143 if (can_f_submatch)
2144 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002145 }
2146#endif
2147 }
2148 else
2149 while ((c = *src++) != NUL)
2150 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002151 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002152 no = 0;
2153 else if (c == '\\' && *src != NUL)
2154 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002155 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002156 {
2157 ++src;
2158 no = 0;
2159 }
2160 else if ('0' <= *src && *src <= '9')
2161 {
2162 no = *src++ - '0';
2163 }
2164 else if (vim_strchr((char_u *)"uUlLeE", *src))
2165 {
2166 switch (*src++)
2167 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002168 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002169 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002170 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002171 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002172 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002173 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002174 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002175 continue;
2176 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002177 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002178 continue;
2179 }
2180 }
2181 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002182 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002183 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002184 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2185 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002186 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002187 if (copy)
2188 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002189 if (dst + 3 > dest + destlen)
2190 {
2191 iemsg("vim_regsub_both(): not enough space");
2192 return 0;
2193 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002194 *dst++ = c;
2195 *dst++ = *src++;
2196 *dst++ = *src++;
2197 }
2198 else
2199 {
2200 dst += 3;
2201 src += 2;
2202 }
2203 continue;
2204 }
2205
Bram Moolenaar071d4272004-06-13 20:20:40 +00002206 if (c == '\\' && *src != NUL)
2207 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002208 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002209 switch (*src)
2210 {
2211 case 'r': c = CAR; ++src; break;
2212 case 'n': c = NL; ++src; break;
2213 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002214 // Oh no! \e already has meaning in subst pat :-(
2215 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002216 case 'b': c = Ctrl_H; ++src; break;
2217
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002218 // If "backslash" is TRUE the backslash will be removed
2219 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002220 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002221 {
2222 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002223 {
2224 if (dst + 1 > dest + destlen)
2225 {
2226 iemsg("vim_regsub_both(): not enough space");
2227 return 0;
2228 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002229 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002230 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002231 ++dst;
2232 }
2233 c = *src++;
2234 }
2235 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002236 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002237 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002238
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002239 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002240 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002241 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002242 func_one = (fptr_T)(func_one(&cc, c));
2243 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002244 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002245 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002246 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002247 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002248
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002249 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002250 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002251 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002252 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002253
Bram Moolenaar071d4272004-06-13 20:20:40 +00002254 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002255 {
2256 if (dst + charlen > dest + destlen)
2257 {
2258 iemsg("vim_regsub_both(): not enough space");
2259 return 0;
2260 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002261 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002262 }
2263 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002264 if (enc_utf8)
2265 {
2266 int clen = utf_ptr2len(src - 1);
2267
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002268 // If the character length is shorter than "totlen", there
2269 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002270 if (clen < totlen)
2271 {
2272 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002273 {
2274 if (dst + totlen - clen > dest + destlen)
2275 {
2276 iemsg("vim_regsub_both(): not enough space");
2277 return 0;
2278 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002279 mch_memmove(dst + 1, src - 1 + clen,
2280 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002281 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002282 dst += totlen - clen;
2283 }
2284 }
2285 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002286 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002287 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002288 {
2289 if (dst + 1 > dest + destlen)
2290 {
2291 iemsg("vim_regsub_both(): not enough space");
2292 return 0;
2293 }
2294 *dst = cc;
2295 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002296 dst++;
2297 }
2298 else
2299 {
2300 if (REG_MULTI)
2301 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002302 clnum = rex.reg_mmatch->startpos[no].lnum;
2303 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002304 s = NULL;
2305 else
2306 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002307 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2308 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2309 len = rex.reg_mmatch->endpos[no].col
2310 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002311 else
2312 len = (int)STRLEN(s);
2313 }
2314 }
2315 else
2316 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002317 s = rex.reg_match->startp[no];
2318 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002319 s = NULL;
2320 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002321 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002322 }
2323 if (s != NULL)
2324 {
2325 for (;;)
2326 {
2327 if (len == 0)
2328 {
2329 if (REG_MULTI)
2330 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002331 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002332 break;
2333 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002334 {
2335 if (dst + 1 > dest + destlen)
2336 {
2337 iemsg("vim_regsub_both(): not enough space");
2338 return 0;
2339 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002340 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002341 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002342 ++dst;
2343 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002344 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2345 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002346 else
2347 len = (int)STRLEN(s);
2348 }
2349 else
2350 break;
2351 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002352 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002353 {
2354 if (copy)
Bram Moolenaare29a27f2021-07-20 21:07:36 +02002355 iemsg(_(e_damaged_match_string));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002356 goto exit;
2357 }
2358 else
2359 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002360 if ((flags & REGSUB_BACKSLASH)
2361 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002362 {
2363 /*
2364 * Insert a backslash in front of a CR, otherwise
2365 * it will be replaced by a line break.
2366 * Number of backslashes will be halved later,
2367 * double them here.
2368 */
2369 if (copy)
2370 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002371 if (dst + 2 > dest + destlen)
2372 {
2373 iemsg("vim_regsub_both(): not enough space");
2374 return 0;
2375 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002376 dst[0] = '\\';
2377 dst[1] = *s;
2378 }
2379 dst += 2;
2380 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002381 else
2382 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002383 if (has_mbyte)
2384 c = mb_ptr2char(s);
2385 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002386 c = *s;
2387
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002388 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002389 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002390 func_one = (fptr_T)(func_one(&cc, c));
2391 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002392 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002393 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002394 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002395 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002396
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002397 if (has_mbyte)
2398 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002399 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002400 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002401
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002402 // Copy composing characters separately, one
2403 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002404 if (enc_utf8)
2405 l = utf_ptr2len(s) - 1;
2406 else
2407 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002408
2409 s += l;
2410 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002411 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002412 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002413 {
2414 if (dst + charlen > dest + destlen)
2415 {
2416 iemsg("vim_regsub_both(): not enough space");
2417 return 0;
2418 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002419 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002420 }
2421 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002422 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002423 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002424 {
2425 if (dst + 1 > dest + destlen)
2426 {
2427 iemsg("vim_regsub_both(): not enough space");
2428 return 0;
2429 }
2430 *dst = cc;
2431 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002432 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002433 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002434
Bram Moolenaar071d4272004-06-13 20:20:40 +00002435 ++s;
2436 --len;
2437 }
2438 }
2439 }
2440 no = -1;
2441 }
2442 }
2443 if (copy)
2444 *dst = NUL;
2445
2446exit:
2447 return (int)((dst - dest) + 1);
2448}
2449
2450#ifdef FEAT_EVAL
2451/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002452 * Call reg_getline() with the line numbers from the submatch. If a
2453 * substitute() was used the reg_maxline and other values have been
2454 * overwritten.
2455 */
2456 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002457reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002458{
2459 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002460 linenr_T save_first = rex.reg_firstlnum;
2461 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002462
Bram Moolenaar6100d022016-10-02 16:51:57 +02002463 rex.reg_firstlnum = rsm.sm_firstlnum;
2464 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002465
2466 s = reg_getline(lnum);
2467
Bram Moolenaar6100d022016-10-02 16:51:57 +02002468 rex.reg_firstlnum = save_first;
2469 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002470 return s;
2471}
2472
2473/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002474 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002475 * allocated memory.
2476 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2477 */
2478 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002479reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002480{
2481 char_u *retval = NULL;
2482 char_u *s;
2483 int len;
2484 int round;
2485 linenr_T lnum;
2486
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002487 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002488 return NULL;
2489
Bram Moolenaar6100d022016-10-02 16:51:57 +02002490 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002491 {
2492 /*
2493 * First round: compute the length and allocate memory.
2494 * Second round: copy the text.
2495 */
2496 for (round = 1; round <= 2; ++round)
2497 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002498 lnum = rsm.sm_mmatch->startpos[no].lnum;
2499 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002500 return NULL;
2501
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002502 s = reg_getline_submatch(lnum);
2503 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002504 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002505 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002506 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002507 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002508 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002509 len = rsm.sm_mmatch->endpos[no].col
2510 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002511 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002512 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002513 ++len;
2514 }
2515 else
2516 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002517 // Multiple lines: take start line from start col, middle
2518 // lines completely and end line up to end col.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002519 len = (int)STRLEN(s);
2520 if (round == 2)
2521 {
2522 STRCPY(retval, s);
2523 retval[len] = '\n';
2524 }
2525 ++len;
2526 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002527 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002528 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002529 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002530 if (round == 2)
2531 STRCPY(retval + len, s);
2532 len += (int)STRLEN(s);
2533 if (round == 2)
2534 retval[len] = '\n';
2535 ++len;
2536 }
2537 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002538 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002539 rsm.sm_mmatch->endpos[no].col);
2540 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002541 if (round == 2)
2542 retval[len] = NUL;
2543 ++len;
2544 }
2545
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002546 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002547 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002548 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002549 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002550 return NULL;
2551 }
2552 }
2553 }
2554 else
2555 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002556 s = rsm.sm_match->startp[no];
2557 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002558 retval = NULL;
2559 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002560 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002561 }
2562
2563 return retval;
2564}
Bram Moolenaar41571762014-04-02 19:00:58 +02002565
2566/*
2567 * Used for the submatch() function with the optional non-zero argument: get
2568 * the list of strings from the n'th submatch in allocated memory with NULs
2569 * represented in NLs.
2570 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2571 * command, for a non-existing submatch and for any error.
2572 */
2573 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002574reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002575{
2576 char_u *s;
2577 linenr_T slnum;
2578 linenr_T elnum;
2579 colnr_T scol;
2580 colnr_T ecol;
2581 int i;
2582 list_T *list;
2583 int error = FALSE;
2584
2585 if (!can_f_submatch || no < 0)
2586 return NULL;
2587
Bram Moolenaar6100d022016-10-02 16:51:57 +02002588 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002589 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002590 slnum = rsm.sm_mmatch->startpos[no].lnum;
2591 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002592 if (slnum < 0 || elnum < 0)
2593 return NULL;
2594
Bram Moolenaar6100d022016-10-02 16:51:57 +02002595 scol = rsm.sm_mmatch->startpos[no].col;
2596 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002597
2598 list = list_alloc();
2599 if (list == NULL)
2600 return NULL;
2601
2602 s = reg_getline_submatch(slnum) + scol;
2603 if (slnum == elnum)
2604 {
2605 if (list_append_string(list, s, ecol - scol) == FAIL)
2606 error = TRUE;
2607 }
2608 else
2609 {
2610 if (list_append_string(list, s, -1) == FAIL)
2611 error = TRUE;
2612 for (i = 1; i < elnum - slnum; i++)
2613 {
2614 s = reg_getline_submatch(slnum + i);
2615 if (list_append_string(list, s, -1) == FAIL)
2616 error = TRUE;
2617 }
2618 s = reg_getline_submatch(elnum);
2619 if (list_append_string(list, s, ecol) == FAIL)
2620 error = TRUE;
2621 }
2622 }
2623 else
2624 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002625 s = rsm.sm_match->startp[no];
2626 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002627 return NULL;
2628 list = list_alloc();
2629 if (list == NULL)
2630 return NULL;
2631 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002632 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002633 error = TRUE;
2634 }
2635
2636 if (error)
2637 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002638 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002639 return NULL;
2640 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002641 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002642 return list;
2643}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002644#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002645
Bram Moolenaarf4140482020-02-15 23:06:45 +01002646/*
2647 * Initialize the values used for matching against multiple lines
2648 */
2649 static void
2650init_regexec_multi(
2651 regmmatch_T *rmp,
2652 win_T *win, // window in which to search or NULL
2653 buf_T *buf, // buffer in which to search
2654 linenr_T lnum) // nr of line to start looking for match
2655{
2656 rex.reg_match = NULL;
2657 rex.reg_mmatch = rmp;
2658 rex.reg_buf = buf;
2659 rex.reg_win = win;
2660 rex.reg_firstlnum = lnum;
2661 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2662 rex.reg_line_lbr = FALSE;
2663 rex.reg_ic = rmp->rmm_ic;
2664 rex.reg_icombine = FALSE;
2665 rex.reg_maxcol = rmp->rmm_maxcol;
2666}
2667
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002668#include "regexp_bt.c"
2669
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002670static regengine_T bt_regengine =
2671{
2672 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002673 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002674 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002675 bt_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002676};
2677
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002678#include "regexp_nfa.c"
2679
2680static regengine_T nfa_regengine =
2681{
2682 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002683 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002684 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002685 nfa_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002686};
2687
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002688// Which regexp engine to use? Needed for vim_regcomp().
2689// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002690static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002691
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002692#ifdef DEBUG
2693static char_u regname[][30] = {
2694 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002695 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002696 "NFA Regexp Engine"
2697 };
2698#endif
2699
2700/*
2701 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002702 * Returns the program in allocated memory.
2703 * Use vim_regfree() to free the memory.
2704 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002705 */
2706 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002707vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002708{
2709 regprog_T *prog = NULL;
2710 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002711 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002712
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002713 regexp_engine = p_re;
2714
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002715 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002716 if (STRNCMP(expr, "\\%#=", 4) == 0)
2717 {
2718 int newengine = expr[4] - '0';
2719
2720 if (newengine == AUTOMATIC_ENGINE
2721 || newengine == BACKTRACKING_ENGINE
2722 || newengine == NFA_ENGINE)
2723 {
2724 regexp_engine = expr[4] - '0';
2725 expr += 5;
2726#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002727 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002728 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002729#endif
2730 }
2731 else
2732 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002733 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002734 regexp_engine = AUTOMATIC_ENGINE;
2735 }
2736 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002737#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002738 bt_regengine.expr = expr;
2739 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002740#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002741 // reg_iswordc() uses rex.reg_buf
2742 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002743
2744 /*
2745 * First try the NFA engine, unless backtracking was requested.
2746 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002747 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002748 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002749 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002750 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002751 else
2752 prog = bt_regengine.regcomp(expr, re_flags);
2753
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002754 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002755 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002756 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002757#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002758 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002759 {
2760 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002761 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002762 if (f)
2763 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002764 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002765 fclose(f);
2766 }
2767 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002768 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002769 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002770 }
2771#endif
2772 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002773 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002774 * The NFA engine also fails for patterns that it can't handle well
2775 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002776 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002777 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002778 if (regexp_engine == AUTOMATIC_ENGINE
2779 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002780 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002781 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002782#ifdef FEAT_EVAL
2783 report_re_switch(expr);
2784#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002785 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002786 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002787 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002788
Bram Moolenaarfda37292014-11-05 14:27:36 +01002789 if (prog != NULL)
2790 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002791 // Store the info needed to call regcomp() again when the engine turns
2792 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002793 prog->re_engine = regexp_engine;
2794 prog->re_flags = re_flags;
2795 }
2796
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002797 return prog;
2798}
2799
2800/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002801 * Free a compiled regexp program, returned by vim_regcomp().
2802 */
2803 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002804vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002805{
2806 if (prog != NULL)
2807 prog->engine->regfree(prog);
2808}
2809
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002810#if defined(EXITFREE) || defined(PROTO)
2811 void
2812free_regexp_stuff(void)
2813{
2814 ga_clear(&regstack);
2815 ga_clear(&backpos);
2816 vim_free(reg_tofree);
2817 vim_free(reg_prev_sub);
2818}
2819#endif
2820
Bram Moolenaarfda37292014-11-05 14:27:36 +01002821#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002822 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002823report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002824{
2825 if (p_verbose > 0)
2826 {
2827 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002828 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2829 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002830 verbose_leave();
2831 }
2832}
2833#endif
2834
Bram Moolenaar651fca82021-11-29 20:39:38 +00002835#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002836/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002837 * Return whether "prog" is currently being executed.
2838 */
2839 int
2840regprog_in_use(regprog_T *prog)
2841{
2842 return prog->re_in_use;
2843}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002844#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002845
2846/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002847 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002848 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002849 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002850 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002851 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002852 *
2853 * Return TRUE if there is a match, FALSE if not.
2854 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002855 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002856vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002857 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002858 char_u *line, // string to match against
2859 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01002860 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002861{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002862 int result;
2863 regexec_T rex_save;
2864 int rex_in_use_save = rex_in_use;
2865
Bram Moolenaar0270f382018-07-17 05:43:58 +02002866 // Cannot use the same prog recursively, it contains state.
2867 if (rmp->regprog->re_in_use)
2868 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002869 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002870 return FALSE;
2871 }
2872 rmp->regprog->re_in_use = TRUE;
2873
Bram Moolenaar6100d022016-10-02 16:51:57 +02002874 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002875 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002876 rex_save = rex;
2877 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002878
Bram Moolenaar6100d022016-10-02 16:51:57 +02002879 rex.reg_startp = NULL;
2880 rex.reg_endp = NULL;
2881 rex.reg_startpos = NULL;
2882 rex.reg_endpos = NULL;
2883
2884 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002885 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002886
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002887 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002888 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2889 && result == NFA_TOO_EXPENSIVE)
2890 {
2891 int save_p_re = p_re;
2892 int re_flags = rmp->regprog->re_flags;
2893 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2894
2895 p_re = BACKTRACKING_ENGINE;
2896 vim_regfree(rmp->regprog);
2897 if (pat != NULL)
2898 {
2899#ifdef FEAT_EVAL
2900 report_re_switch(pat);
2901#endif
2902 rmp->regprog = vim_regcomp(pat, re_flags);
2903 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002904 {
2905 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002906 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002907 rmp->regprog->re_in_use = FALSE;
2908 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002909 vim_free(pat);
2910 }
2911
2912 p_re = save_p_re;
2913 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002914
2915 rex_in_use = rex_in_use_save;
2916 if (rex_in_use)
2917 rex = rex_save;
2918
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002919 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002920}
2921
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002922/*
2923 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002924 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002925 */
2926 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002927vim_regexec_prog(
2928 regprog_T **prog,
2929 int ignore_case,
2930 char_u *line,
2931 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002932{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002933 int r;
2934 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002935
2936 regmatch.regprog = *prog;
2937 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002938 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002939 *prog = regmatch.regprog;
2940 return r;
2941}
2942
2943/*
2944 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002945 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002946 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002947 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002948vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002949{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002950 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002951}
2952
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002953/*
2954 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002955 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002956 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002957 */
2958 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002959vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002960{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002961 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002962}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002963
2964/*
2965 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002966 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2967 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002968 * Uses curbuf for line count and 'iskeyword'.
2969 *
2970 * Return zero if there is no match. Return number of lines contained in the
2971 * match otherwise.
2972 */
2973 long
Bram Moolenaar05540972016-01-30 20:31:25 +01002974vim_regexec_multi(
2975 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002976 win_T *win, // window in which to search or NULL
2977 buf_T *buf, // buffer in which to search
2978 linenr_T lnum, // nr of line to start looking for match
2979 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002980 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002981{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002982 int result;
2983 regexec_T rex_save;
2984 int rex_in_use_save = rex_in_use;
2985
Bram Moolenaar0270f382018-07-17 05:43:58 +02002986 // Cannot use the same prog recursively, it contains state.
2987 if (rmp->regprog->re_in_use)
2988 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002989 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002990 return FALSE;
2991 }
2992 rmp->regprog->re_in_use = TRUE;
2993
Bram Moolenaar6100d022016-10-02 16:51:57 +02002994 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002995 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002996 rex_save = rex;
2997 rex_in_use = TRUE;
2998
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002999 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003000 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003001 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003002
Bram Moolenaar63d9e732019-12-05 21:10:38 +01003003 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01003004 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
3005 && result == NFA_TOO_EXPENSIVE)
3006 {
3007 int save_p_re = p_re;
3008 int re_flags = rmp->regprog->re_flags;
3009 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
3010
3011 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003012 if (pat != NULL)
3013 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003014 regprog_T *prev_prog = rmp->regprog;
3015
Bram Moolenaarfda37292014-11-05 14:27:36 +01003016#ifdef FEAT_EVAL
3017 report_re_switch(pat);
3018#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003019#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003020 // checking for \z misuse was already done when compiling for NFA,
3021 // allow all here
3022 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003023#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01003024 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003025#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003026 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003027#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003028 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003029 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003030 // Somehow compiling the pattern failed now, put back the
3031 // previous one to avoid "regprog" becoming NULL.
3032 rmp->regprog = prev_prog;
3033 }
3034 else
3035 {
3036 vim_regfree(prev_prog);
3037
Bram Moolenaar41499802018-07-18 06:02:09 +02003038 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003039 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003040 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003041 rmp->regprog->re_in_use = FALSE;
3042 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003043 vim_free(pat);
3044 }
3045 p_re = save_p_re;
3046 }
3047
Bram Moolenaar6100d022016-10-02 16:51:57 +02003048 rex_in_use = rex_in_use_save;
3049 if (rex_in_use)
3050 rex = rex_save;
3051
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003052 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003053}