blob: 3d08d5a2043d75ace434946190917dec0c3d36d2 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
24static int dummy_timeout_flag = 0;
25static const int *timeout_flag = &dummy_timeout_flag;
26#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
54 void
55init_regexp_timeout(long msec)
56{
57 timeout_flag = start_timeout(msec);
58}
59
60 void
61disable_regexp_timeout(void)
62{
63 stop_timeout();
64}
65#endif
66
Bram Moolenaar071d4272004-06-13 20:20:40 +000067/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020068 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +000069 * number; the start node begins in the second byte. It's used to catch the
70 * most severe mutilation of the program by the caller.
71 */
72
73#define REGMAGIC 0234
74
75/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000076 * Utility definitions.
77 */
78#define UCHARAT(p) ((int)*(char_u *)(p))
79
Bram Moolenaar63d9e732019-12-05 21:10:38 +010080// Used for an error (down from) vim_regcomp(): give the error message, set
81// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010082#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
83#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
84#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
85#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +010086#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010087#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +000088#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +000089
Bram Moolenaar95f09602016-11-10 20:01:45 +010090
Bram Moolenaar071d4272004-06-13 20:20:40 +000091#define MAX_LIMIT (32767L << 16L)
92
Bram Moolenaar071d4272004-06-13 20:20:40 +000093#define NOT_MULTI 0
94#define MULTI_ONE 1
95#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020096
97// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +010098#define RA_FAIL 1 // something failed, abort
99#define RA_CONT 2 // continue in inner loop
100#define RA_BREAK 3 // break inner loop
101#define RA_MATCH 4 // successful match
102#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200103
Bram Moolenaar071d4272004-06-13 20:20:40 +0000104/*
105 * Return NOT_MULTI if c is not a "multi" operator.
106 * Return MULTI_ONE if c is a single "multi" operator.
107 * Return MULTI_MULT if c is a multi "multi" operator.
108 */
109 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100110re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000111{
112 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
113 return MULTI_ONE;
114 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
115 return MULTI_MULT;
116 return NOT_MULTI;
117}
118
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000119static char_u *reg_prev_sub = NULL;
120
Bram Moolenaar071d4272004-06-13 20:20:40 +0000121/*
122 * REGEXP_INRANGE contains all characters which are always special in a []
123 * range after '\'.
124 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
125 * These are:
126 * \n - New line (NL).
127 * \r - Carriage Return (CR).
128 * \t - Tab (TAB).
129 * \e - Escape (ESC).
130 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000131 * \d - Character code in decimal, eg \d123
132 * \o - Character code in octal, eg \o80
133 * \x - Character code in hex, eg \x4a
134 * \u - Multibyte character code, eg \u20ac
135 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000136 */
137static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000138static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000139
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140/*
141 * Translate '\x' to its control character, except "\n", which is Magic.
142 */
143 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100144backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145{
146 switch (c)
147 {
148 case 'r': return CAR;
149 case 't': return TAB;
150 case 'e': return ESC;
151 case 'b': return BS;
152 }
153 return c;
154}
155
156/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000157 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000158 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
159 * recognized. Otherwise "pp" is advanced to after the item.
160 */
161 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100162get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000163{
164 static const char *(class_names[]) =
165 {
166 "alnum:]",
167#define CLASS_ALNUM 0
168 "alpha:]",
169#define CLASS_ALPHA 1
170 "blank:]",
171#define CLASS_BLANK 2
172 "cntrl:]",
173#define CLASS_CNTRL 3
174 "digit:]",
175#define CLASS_DIGIT 4
176 "graph:]",
177#define CLASS_GRAPH 5
178 "lower:]",
179#define CLASS_LOWER 6
180 "print:]",
181#define CLASS_PRINT 7
182 "punct:]",
183#define CLASS_PUNCT 8
184 "space:]",
185#define CLASS_SPACE 9
186 "upper:]",
187#define CLASS_UPPER 10
188 "xdigit:]",
189#define CLASS_XDIGIT 11
190 "tab:]",
191#define CLASS_TAB 12
192 "return:]",
193#define CLASS_RETURN 13
194 "backspace:]",
195#define CLASS_BACKSPACE 14
196 "escape:]",
197#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100198 "ident:]",
199#define CLASS_IDENT 16
200 "keyword:]",
201#define CLASS_KEYWORD 17
202 "fname:]",
203#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000204 };
205#define CLASS_NONE 99
206 int i;
207
208 if ((*pp)[1] == ':')
209 {
K.Takataeeec2542021-06-02 13:28:16 +0200210 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000211 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
212 {
213 *pp += STRLEN(class_names[i]) + 2;
214 return i;
215 }
216 }
217 return CLASS_NONE;
218}
219
220/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000221 * Specific version of character class functions.
222 * Using a table to keep this fast.
223 */
224static short class_tab[256];
225
226#define RI_DIGIT 0x01
227#define RI_HEX 0x02
228#define RI_OCTAL 0x04
229#define RI_WORD 0x08
230#define RI_HEAD 0x10
231#define RI_ALPHA 0x20
232#define RI_LOWER 0x40
233#define RI_UPPER 0x80
234#define RI_WHITE 0x100
235
236 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100237init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238{
239 int i;
240 static int done = FALSE;
241
242 if (done)
243 return;
244
245 for (i = 0; i < 256; ++i)
246 {
247 if (i >= '0' && i <= '7')
248 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
249 else if (i >= '8' && i <= '9')
250 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
251 else if (i >= 'a' && i <= 'f')
252 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000253 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
255 else if (i >= 'A' && i <= 'F')
256 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000257 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
259 else if (i == '_')
260 class_tab[i] = RI_WORD + RI_HEAD;
261 else
262 class_tab[i] = 0;
263 }
264 class_tab[' '] |= RI_WHITE;
265 class_tab['\t'] |= RI_WHITE;
266 done = TRUE;
267}
268
kylo252ae6f1d82022-02-16 19:24:07 +0000269#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
270#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
271#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
272#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
273#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
274#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
275#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
276#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
277#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000278
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100279// flags for regflags
280#define RF_ICASE 1 // ignore case
281#define RF_NOICASE 2 // don't ignore case
282#define RF_HASNL 4 // can match a NL
283#define RF_ICOMBINE 8 // ignore combining characters
284#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000285
286/*
287 * Global work variables for vim_regcomp().
288 */
289
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100290static char_u *regparse; // Input-scan pointer.
291static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100292static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000293#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100294static int regnzpar; // \z() count.
295static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100297static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000298#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100299static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000301
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100302static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000303
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100304static int reg_string; // matching with a string instead of a buffer
305 // line
306static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000307
308/*
309 * META contains all characters that may be magic, except '^' and '$'.
310 */
311
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100312// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000313static char_u META_flags[] = {
314 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100316// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000317 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100318// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000319 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100320// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000321 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100322// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000323 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100324// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000325 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100326// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000327 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
328};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000329
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100330static int curchr; // currently parsed character
331// Previous character. Note: prevchr is sometimes -1 when we are not at the
332// start, eg in /[ ^I]^ the pattern was never found even if it existed,
333// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200334static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100335static int prevprevchr; // previous-previous character
336static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000337
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100338// arguments for reg()
339#define REG_NOPAREN 0 // toplevel reg()
340#define REG_PAREN 1 // \(\)
341#define REG_ZPAREN 2 // \z(\)
342#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200344typedef struct
345{
346 char_u *regparse;
347 int prevchr_len;
348 int curchr;
349 int prevchr;
350 int prevprevchr;
351 int nextchr;
352 int at_start;
353 int prev_at_start;
354 int regnpar;
355} parse_state_T;
356
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100357static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static int getchr(void);
359static void skipchr_keepstart(void);
360static int peekchr(void);
361static void skipchr(void);
362static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100363static long gethexchrs(int maxinputlen);
364static long getoctchrs(void);
365static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100366static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100367static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200368static int cstrncmp(char_u *s1, char_u *s2, int *n);
369static char_u *cstrchr(char_u *, int);
370static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100371static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100372#ifdef FEAT_EVAL
373static void report_re_switch(char_u *pat);
374#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000375
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200376static regengine_T bt_regengine;
377static regengine_T nfa_regengine;
378
Bram Moolenaar071d4272004-06-13 20:20:40 +0000379/*
380 * Return TRUE if compiled regular expression "prog" can match a line break.
381 */
382 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100383re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384{
385 return (prog->regflags & RF_HASNL);
386}
387
388/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000389 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
390 * Returns a character representing the class. Zero means that no item was
391 * recognized. Otherwise "pp" is advanced to after the item.
392 */
393 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100394get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000395{
396 int c;
397 int l = 1;
398 char_u *p = *pp;
399
Bram Moolenaar985079c2019-02-16 17:07:47 +0100400 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000401 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000402 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000403 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000404 if (p[l + 2] == '=' && p[l + 3] == ']')
405 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000406 if (has_mbyte)
407 c = mb_ptr2char(p + 2);
408 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000409 c = p[2];
410 *pp += l + 4;
411 return c;
412 }
413 }
414 return 0;
415}
416
417/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000418 * Check for a collating element "[.a.]". "pp" points to the '['.
419 * Returns a character. Zero means that no item was recognized. Otherwise
420 * "pp" is advanced to after the item.
421 * Currently only single characters are recognized!
422 */
423 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100424get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000425{
426 int c;
427 int l = 1;
428 char_u *p = *pp;
429
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100430 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000431 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000432 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000433 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000434 if (p[l + 2] == '.' && p[l + 3] == ']')
435 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000436 if (has_mbyte)
437 c = mb_ptr2char(p + 2);
438 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000439 c = p[2];
440 *pp += l + 4;
441 return c;
442 }
443 }
444 return 0;
445}
446
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100447static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
448static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200449
450 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100451get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200452{
453 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
454 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
455}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000456
457/*
458 * Skip over a "[]" range.
459 * "p" must point to the character after the '['.
460 * The returned pointer is on the matching ']', or the terminating NUL.
461 */
462 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100463skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000464{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000465 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000466
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100467 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000468 ++p;
469 if (*p == ']' || *p == '-')
470 ++p;
471 while (*p != NUL && *p != ']')
472 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000473 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000474 p += l;
475 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000476 if (*p == '-')
477 {
478 ++p;
479 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100480 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000481 }
482 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200483 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000484 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200485 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000486 p += 2;
487 else if (*p == '[')
488 {
489 if (get_char_class(&p) == CLASS_NONE
490 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200491 && get_coll_element(&p) == 0
492 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100493 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000494 }
495 else
496 ++p;
497 }
498
499 return p;
500}
501
502/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000503 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200504 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000505 * Take care of characters with a backslash in front of it.
506 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000507 */
508 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100509skip_regexp(
510 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200511 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200512 int magic)
513{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100514 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200515}
516
517/*
518 * Call skip_regexp() and when the delimiter does not match give an error and
519 * return NULL.
520 */
521 char_u *
522skip_regexp_err(
523 char_u *startp,
524 int delim,
525 int magic)
526{
527 char_u *p = skip_regexp(startp, delim, magic);
528
529 if (*p != delim)
530 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000531 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200532 return NULL;
533 }
534 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200535}
536
537/*
538 * skip_regexp() with extra arguments:
539 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
540 * expression and change "\?" to "?". If "*newp" is not NULL the expression
541 * is changed in-place.
542 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100543 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200544 */
545 char_u *
546skip_regexp_ex(
547 char_u *startp,
548 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100549 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200550 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100551 int *dropped,
552 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000553{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100554 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000555 char_u *p = startp;
556
557 if (magic)
558 mymagic = MAGIC_ON;
559 else
560 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200561 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000562
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100563 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000564 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100565 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000566 break;
567 if ((p[0] == '[' && mymagic >= MAGIC_ON)
568 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
569 {
570 p = skip_anyof(p + 1);
571 if (p[0] == NUL)
572 break;
573 }
574 else if (p[0] == '\\' && p[1] != NUL)
575 {
576 if (dirc == '?' && newp != NULL && p[1] == '?')
577 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100578 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000579 if (*newp == NULL)
580 {
581 *newp = vim_strsave(startp);
582 if (*newp != NULL)
583 p = *newp + (p - startp);
584 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200585 if (dropped != NULL)
586 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000587 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000588 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000589 else
590 ++p;
591 }
592 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100593 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000594 if (*p == 'v')
595 mymagic = MAGIC_ALL;
596 else if (*p == 'V')
597 mymagic = MAGIC_NONE;
598 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000599 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100600 if (magic_val != NULL)
601 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000602 return p;
603}
604
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200605/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200606 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200607 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100608static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200609static int at_start; // True when on the first character
610static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100611
Bram Moolenaar071d4272004-06-13 20:20:40 +0000612/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200613 * Start parsing at "str".
614 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000615 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100616initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000617{
618 regparse = str;
619 prevchr_len = 0;
620 curchr = prevprevchr = prevchr = nextchr = -1;
621 at_start = TRUE;
622 prev_at_start = FALSE;
623}
624
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200625/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200626 * Save the current parse state, so that it can be restored and parsing
627 * starts in the same state again.
628 */
629 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100630save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200631{
632 ps->regparse = regparse;
633 ps->prevchr_len = prevchr_len;
634 ps->curchr = curchr;
635 ps->prevchr = prevchr;
636 ps->prevprevchr = prevprevchr;
637 ps->nextchr = nextchr;
638 ps->at_start = at_start;
639 ps->prev_at_start = prev_at_start;
640 ps->regnpar = regnpar;
641}
642
643/*
644 * Restore a previously saved parse state.
645 */
646 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100647restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200648{
649 regparse = ps->regparse;
650 prevchr_len = ps->prevchr_len;
651 curchr = ps->curchr;
652 prevchr = ps->prevchr;
653 prevprevchr = ps->prevprevchr;
654 nextchr = ps->nextchr;
655 at_start = ps->at_start;
656 prev_at_start = ps->prev_at_start;
657 regnpar = ps->regnpar;
658}
659
660
661/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200662 * Get the next character without advancing.
663 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100665peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000667 static int after_slash = FALSE;
668
Bram Moolenaar071d4272004-06-13 20:20:40 +0000669 if (curchr == -1)
670 {
671 switch (curchr = regparse[0])
672 {
673 case '.':
674 case '[':
675 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100676 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000677 if (reg_magic >= MAGIC_ON)
678 curchr = Magic(curchr);
679 break;
680 case '(':
681 case ')':
682 case '{':
683 case '%':
684 case '+':
685 case '=':
686 case '?':
687 case '@':
688 case '!':
689 case '&':
690 case '|':
691 case '<':
692 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100693 case '#': // future ext.
694 case '"': // future ext.
695 case '\'': // future ext.
696 case ',': // future ext.
697 case '-': // future ext.
698 case ':': // future ext.
699 case ';': // future ext.
700 case '`': // future ext.
701 case '/': // Can't be used in / command
702 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703 if (reg_magic == MAGIC_ALL)
704 curchr = Magic(curchr);
705 break;
706 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100707 // * is not magic as the very first character, eg "?*ptr", when
708 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
709 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000710 if (reg_magic >= MAGIC_ON
711 && !at_start
712 && !(prev_at_start && prevchr == Magic('^'))
713 && (after_slash
714 || (prevchr != Magic('(')
715 && prevchr != Magic('&')
716 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717 curchr = Magic('*');
718 break;
719 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100720 // '^' is only magic as the very first character and if it's after
721 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000722 if (reg_magic >= MAGIC_OFF
723 && (at_start
724 || reg_magic == MAGIC_ALL
725 || prevchr == Magic('(')
726 || prevchr == Magic('|')
727 || prevchr == Magic('&')
728 || prevchr == Magic('n')
729 || (no_Magic(prevchr) == '('
730 && prevprevchr == Magic('%'))))
731 {
732 curchr = Magic('^');
733 at_start = TRUE;
734 prev_at_start = FALSE;
735 }
736 break;
737 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100738 // '$' is only magic as the very last char and if it's in front of
739 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000740 if (reg_magic >= MAGIC_OFF)
741 {
742 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200743 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000744
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100745 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000746 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200747 || p[1] == 'm' || p[1] == 'M'
748 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
749 {
750 if (p[1] == 'v')
751 is_magic_all = TRUE;
752 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
753 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000754 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200755 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000756 if (p[0] == NUL
757 || (p[0] == '\\'
758 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
759 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200760 || (is_magic_all
761 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000762 || reg_magic == MAGIC_ALL)
763 curchr = Magic('$');
764 }
765 break;
766 case '\\':
767 {
768 int c = regparse[1];
769
770 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100771 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000772 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000773 {
774 /*
775 * META contains everything that may be magic sometimes,
776 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200777 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000778 * magicness. Therefore, \ is so meta-magic that it is
779 * not in META.
780 */
781 curchr = -1;
782 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100783 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000784 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000785 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000786 peekchr();
787 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000788 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000789 curchr = toggle_Magic(curchr);
790 }
791 else if (vim_strchr(REGEXP_ABBR, c))
792 {
793 /*
794 * Handle abbreviations, like "\t" for TAB -- webb
795 */
796 curchr = backslash_trans(c);
797 }
798 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
799 curchr = toggle_Magic(c);
800 else
801 {
802 /*
803 * Next character can never be (made) magic?
804 * Then backslashing it won't do anything.
805 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000806 if (has_mbyte)
807 curchr = (*mb_ptr2char)(regparse + 1);
808 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000809 curchr = c;
810 }
811 break;
812 }
813
Bram Moolenaar071d4272004-06-13 20:20:40 +0000814 default:
815 if (has_mbyte)
816 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000817 }
818 }
819
820 return curchr;
821}
822
823/*
824 * Eat one lexed character. Do this in a way that we can undo it.
825 */
826 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100827skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000828{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100829 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000830 if (*regparse == '\\')
831 prevchr_len = 1;
832 else
833 prevchr_len = 0;
834 if (regparse[prevchr_len] != NUL)
835 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000836 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100837 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000838 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000839 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000840 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000841 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000842 ++prevchr_len;
843 }
844 regparse += prevchr_len;
845 prev_at_start = at_start;
846 at_start = FALSE;
847 prevprevchr = prevchr;
848 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100849 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000850 nextchr = -1;
851}
852
853/*
854 * Skip a character while keeping the value of prev_at_start for at_start.
855 * prevchr and prevprevchr are also kept.
856 */
857 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100858skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000859{
860 int as = prev_at_start;
861 int pr = prevchr;
862 int prpr = prevprevchr;
863
864 skipchr();
865 at_start = as;
866 prevchr = pr;
867 prevprevchr = prpr;
868}
869
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200870/*
871 * Get the next character from the pattern. We know about magic and such, so
872 * therefore we need a lexical analyzer.
873 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000874 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100875getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000876{
877 int chr = peekchr();
878
879 skipchr();
880 return chr;
881}
882
883/*
884 * put character back. Works only once!
885 */
886 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100887ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000888{
889 nextchr = curchr;
890 curchr = prevchr;
891 prevchr = prevprevchr;
892 at_start = prev_at_start;
893 prev_at_start = FALSE;
894
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100895 // Backup regparse, so that it's at the same position as before the
896 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000897 regparse -= prevchr_len;
898}
899
900/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000901 * Get and return the value of the hex string at the current position.
902 * Return -1 if there is no valid hex number.
903 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000904 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000905 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000906 * The parameter controls the maximum number of input characters. This will be
907 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
908 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100909 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100910gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000911{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100912 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000913 int c;
914 int i;
915
916 for (i = 0; i < maxinputlen; ++i)
917 {
918 c = regparse[0];
919 if (!vim_isxdigit(c))
920 break;
921 nr <<= 4;
922 nr |= hex2nr(c);
923 ++regparse;
924 }
925
926 if (i == 0)
927 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100928 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000929}
930
931/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200932 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000933 * current position. Return -1 for invalid. Consumes all digits.
934 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100935 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100936getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000937{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100938 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000939 int c;
940 int i;
941
942 for (i = 0; ; ++i)
943 {
944 c = regparse[0];
945 if (c < '0' || c > '9')
946 break;
947 nr *= 10;
948 nr += c - '0';
949 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100950 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000951 }
952
953 if (i == 0)
954 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100955 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000956}
957
958/*
959 * get and return the value of the octal string immediately after the current
960 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
961 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
962 * treat 8 or 9 as recognised characters. Position is updated:
963 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000964 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000965 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100966 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100967getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000968{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100969 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000970 int c;
971 int i;
972
973 for (i = 0; i < 3 && nr < 040; ++i)
974 {
975 c = regparse[0];
976 if (c < '0' || c > '7')
977 break;
978 nr <<= 3;
979 nr |= hex2nr(c);
980 ++regparse;
981 }
982
983 if (i == 0)
984 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100985 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000986}
987
988/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000989 * read_limits - Read two integers to be taken as a minimum and maximum.
990 * If the first character is '-', then the range is reversed.
991 * Should end with 'end'. If minval is missing, zero is default, if maxval is
992 * missing, a very big number is the default.
993 */
994 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100995read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000996{
997 int reverse = FALSE;
998 char_u *first_char;
999 long tmp;
1000
1001 if (*regparse == '-')
1002 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001003 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001004 regparse++;
1005 reverse = TRUE;
1006 }
1007 first_char = regparse;
1008 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001009 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001010 {
1011 if (vim_isdigit(*++regparse))
1012 *maxval = getdigits(&regparse);
1013 else
1014 *maxval = MAX_LIMIT;
1015 }
1016 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001017 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001019 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001020 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001021 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001022 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001023 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001024 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001025
1026 /*
1027 * Reverse the range if there was a '-', or make sure it is in the right
1028 * order otherwise.
1029 */
1030 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1031 {
1032 tmp = *minval;
1033 *minval = *maxval;
1034 *maxval = tmp;
1035 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001036 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001037 return OK;
1038}
1039
1040/*
1041 * vim_regexec and friends
1042 */
1043
1044/*
1045 * Global work variables for vim_regexec().
1046 */
1047
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001048static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001049#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001050static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001051#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001052static void reg_nextline(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001053static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001054
1055/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001056 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1057 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001058 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001059 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001060static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001061static unsigned reg_tofreelen;
1062
1063/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001064 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001065 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001066 * done:
1067 * single-line multi-line
1068 * reg_match &regmatch_T NULL
1069 * reg_mmatch NULL &regmmatch_T
1070 * reg_startp reg_match->startp <invalid>
1071 * reg_endp reg_match->endp <invalid>
1072 * reg_startpos <invalid> reg_mmatch->startpos
1073 * reg_endpos <invalid> reg_mmatch->endpos
1074 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001075 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001076 * reg_firstlnum <invalid> first line in which to search
1077 * reg_maxline 0 last line nr
1078 * reg_line_lbr FALSE or TRUE FALSE
1079 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001080typedef struct {
1081 regmatch_T *reg_match;
1082 regmmatch_T *reg_mmatch;
1083 char_u **reg_startp;
1084 char_u **reg_endp;
1085 lpos_T *reg_startpos;
1086 lpos_T *reg_endpos;
1087 win_T *reg_win;
1088 buf_T *reg_buf;
1089 linenr_T reg_firstlnum;
1090 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001091 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001092
Bram Moolenaar0270f382018-07-17 05:43:58 +02001093 // The current match-position is stord in these variables:
1094 linenr_T lnum; // line number, relative to first line
1095 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001096 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001097
1098 int need_clear_subexpr; // subexpressions still need to be cleared
1099#ifdef FEAT_SYN_HL
1100 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1101 // cleared
1102#endif
1103
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001104 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1105 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1106 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001107 int reg_ic;
1108
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001109 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1110 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001111 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001112
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001113 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1114 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001115 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001116
1117 // State for the NFA engine regexec.
1118 int nfa_has_zend; // NFA regexp \ze operator encountered.
1119 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1120 int nfa_nsubexpr; // Number of sub expressions actually being used
1121 // during execution. 1 if only the whole match
1122 // (subexpr 0) is used.
1123 // listid is global, so that it increases on recursive calls to
1124 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1125 // all the states.
1126 int nfa_listid;
1127 int nfa_alt_listid;
1128
1129#ifdef FEAT_SYN_HL
1130 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1131#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001132} regexec_T;
1133
1134static regexec_T rex;
1135static int rex_in_use = FALSE;
1136
Bram Moolenaar071d4272004-06-13 20:20:40 +00001137/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001138 * Return TRUE if character 'c' is included in 'iskeyword' option for
1139 * "reg_buf" buffer.
1140 */
1141 static int
1142reg_iswordc(int c)
1143{
1144 return vim_iswordc_buf(c, rex.reg_buf);
1145}
1146
1147/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001148 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1149 */
1150 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001151reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001152{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001153 // when looking behind for a match/no-match lnum is negative. But we
1154 // can't go before line 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001155 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001156 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001157 if (lnum > rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001158 // Must have matched the "\n" in the last line.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001159 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001160 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001161}
1162
Bram Moolenaar071d4272004-06-13 20:20:40 +00001163#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001164static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1165static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1166static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1167static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001168#endif
1169
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001170// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001171#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001172
Bram Moolenaar071d4272004-06-13 20:20:40 +00001173#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001174/*
1175 * Create a new extmatch and mark it as referenced once.
1176 */
1177 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001178make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001179{
1180 reg_extmatch_T *em;
1181
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001182 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001183 if (em != NULL)
1184 em->refcnt = 1;
1185 return em;
1186}
1187
1188/*
1189 * Add a reference to an extmatch.
1190 */
1191 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001192ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001193{
1194 if (em != NULL)
1195 em->refcnt++;
1196 return em;
1197}
1198
1199/*
1200 * Remove a reference to an extmatch. If there are no references left, free
1201 * the info.
1202 */
1203 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001204unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001205{
1206 int i;
1207
1208 if (em != NULL && --em->refcnt <= 0)
1209 {
1210 for (i = 0; i < NSUBEXP; ++i)
1211 vim_free(em->matches[i]);
1212 vim_free(em);
1213 }
1214}
1215#endif
1216
1217/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001218 * Get class of previous character.
1219 */
1220 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001221reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001222{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001223 if (rex.input > rex.line)
1224 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001225 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001226 return -1;
1227}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001228
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001229/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001230 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001231 */
1232 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001233reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001234{
1235 pos_T top, bot;
1236 linenr_T lnum;
1237 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001238 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001239 int mode;
1240 colnr_T start, end;
1241 colnr_T start2, end2;
1242 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001243 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001244
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001245 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001246 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001247 return FALSE;
1248
1249 if (VIsual_active)
1250 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001251 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001252 {
1253 top = VIsual;
1254 bot = wp->w_cursor;
1255 }
1256 else
1257 {
1258 top = wp->w_cursor;
1259 bot = VIsual;
1260 }
1261 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001262 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001263 }
1264 else
1265 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001266 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001267 {
1268 top = curbuf->b_visual.vi_start;
1269 bot = curbuf->b_visual.vi_end;
1270 }
1271 else
1272 {
1273 top = curbuf->b_visual.vi_end;
1274 bot = curbuf->b_visual.vi_start;
1275 }
1276 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001277 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001278 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001279 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001280 if (lnum < top.lnum || lnum > bot.lnum)
1281 return FALSE;
1282
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001283 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001284 if (mode == 'v')
1285 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001286 if ((lnum == top.lnum && col < top.col)
1287 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1288 return FALSE;
1289 }
1290 else if (mode == Ctrl_V)
1291 {
1292 getvvcol(wp, &top, &start, NULL, &end);
1293 getvvcol(wp, &bot, &start2, NULL, &end2);
1294 if (start2 < start)
1295 start = start2;
1296 if (end2 > end)
1297 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001298 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001299 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001300
1301 // getvvcol() flushes rex.line, need to get it again
1302 rex.line = reg_getline(rex.lnum);
1303 rex.input = rex.line + col;
1304
1305 cols = win_linetabsize(wp, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001306 if (cols < start || cols > end - (*p_sel == 'e'))
1307 return FALSE;
1308 }
1309 return TRUE;
1310}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001311
Bram Moolenaar071d4272004-06-13 20:20:40 +00001312/*
1313 * Check the regexp program for its magic number.
1314 * Return TRUE if it's wrong.
1315 */
1316 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001317prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001318{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001319 regprog_T *prog;
1320
Bram Moolenaar6100d022016-10-02 16:51:57 +02001321 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001322 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001323 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001324 return FALSE;
1325
1326 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001327 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001328 emsg(_(e_corrupted_regexp_program));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001329 return TRUE;
1330 }
1331 return FALSE;
1332}
1333
1334/*
1335 * Cleanup the subexpressions, if this wasn't done yet.
1336 * This construction is used to clear the subexpressions only when they are
1337 * used (to increase speed).
1338 */
1339 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001340cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001341{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001342 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001343 {
1344 if (REG_MULTI)
1345 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001346 // Use 0xff to set lnum to -1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001347 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1348 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001349 }
1350 else
1351 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001352 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1353 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001354 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001355 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001356 }
1357}
1358
1359#ifdef FEAT_SYN_HL
1360 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001361cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001362{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001363 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001364 {
1365 if (REG_MULTI)
1366 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001367 // Use 0xff to set lnum to -1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001368 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1369 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1370 }
1371 else
1372 {
1373 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1374 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1375 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001376 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001377 }
1378}
1379#endif
1380
1381/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001382 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001383 */
1384 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001385reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001386{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001387 rex.line = reg_getline(++rex.lnum);
1388 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001389 fast_breakcheck();
1390}
1391
1392/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001393 * Check whether a backreference matches.
1394 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001395 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1396 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001397 */
1398 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001399match_with_backref(
1400 linenr_T start_lnum,
1401 colnr_T start_col,
1402 linenr_T end_lnum,
1403 colnr_T end_col,
1404 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001405{
1406 linenr_T clnum = start_lnum;
1407 colnr_T ccol = start_col;
1408 int len;
1409 char_u *p;
1410
1411 if (bytelen != NULL)
1412 *bytelen = 0;
1413 for (;;)
1414 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001415 // Since getting one line may invalidate the other, need to make copy.
1416 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001417 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001418 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001419 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001420 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1421 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001422 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001423 vim_free(reg_tofree);
1424 reg_tofree = alloc(len);
1425 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001426 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001427 reg_tofreelen = len;
1428 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001429 STRCPY(reg_tofree, rex.line);
1430 rex.input = reg_tofree + (rex.input - rex.line);
1431 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001432 }
1433
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001434 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001435 p = reg_getline(clnum);
1436 if (clnum == end_lnum)
1437 len = end_col - ccol;
1438 else
1439 len = (int)STRLEN(p + ccol);
1440
Bram Moolenaar0270f382018-07-17 05:43:58 +02001441 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001442 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001443 if (bytelen != NULL)
1444 *bytelen += len;
1445 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001446 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001447 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001448 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001449
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001450 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001451 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001452 if (bytelen != NULL)
1453 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001454 ++clnum;
1455 ccol = 0;
1456 if (got_int)
1457 return RA_FAIL;
1458 }
1459
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001460 // found a match! Note that rex.line may now point to a copy of the line,
1461 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001462 return RA_MATCH;
1463}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001464
Bram Moolenaarfb031402014-09-09 17:18:49 +02001465/*
1466 * Used in a place where no * or \+ can follow.
1467 */
1468 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001469re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001470{
1471 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001472 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001473 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001474 rc_did_emsg = TRUE;
1475 return FAIL;
1476 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001477 return OK;
1478}
1479
Bram Moolenaar071d4272004-06-13 20:20:40 +00001480typedef struct
1481{
1482 int a, b, c;
1483} decomp_T;
1484
1485
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001486// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001487static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001488{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001489 {0x5e2,0,0}, // 0xfb20 alt ayin
1490 {0x5d0,0,0}, // 0xfb21 alt alef
1491 {0x5d3,0,0}, // 0xfb22 alt dalet
1492 {0x5d4,0,0}, // 0xfb23 alt he
1493 {0x5db,0,0}, // 0xfb24 alt kaf
1494 {0x5dc,0,0}, // 0xfb25 alt lamed
1495 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1496 {0x5e8,0,0}, // 0xfb27 alt resh
1497 {0x5ea,0,0}, // 0xfb28 alt tav
1498 {'+', 0, 0}, // 0xfb29 alt plus
1499 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1500 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1501 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1502 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1503 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1504 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1505 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1506 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1507 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1508 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1509 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1510 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1511 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1512 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1513 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1514 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1515 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1516 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1517 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1518 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1519 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1520 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1521 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1522 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1523 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1524 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1525 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1526 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1527 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1528 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1529 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1530 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1531 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1532 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1533 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1534 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1535 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1536 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001537};
1538
1539 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001540mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001541{
1542 decomp_T d;
1543
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001544 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001545 {
1546 d = decomp_table[c - 0xfb20];
1547 *c1 = d.a;
1548 *c2 = d.b;
1549 *c3 = d.c;
1550 }
1551 else
1552 {
1553 *c1 = c;
1554 *c2 = *c3 = 0;
1555 }
1556}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001557
1558/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001559 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001560 * Return 0 if strings match, non-zero otherwise.
1561 * Correct the length "*n" when composing characters are ignored.
1562 */
1563 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001564cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001565{
1566 int result;
1567
Bram Moolenaar6100d022016-10-02 16:51:57 +02001568 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001569 result = STRNCMP(s1, s2, *n);
1570 else
1571 result = MB_STRNICMP(s1, s2, *n);
1572
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001573 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001574 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001575 {
1576 char_u *str1, *str2;
1577 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578 int junk;
1579
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001580 // we have to handle the strcmp ourselves, since it is necessary to
1581 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001582 str1 = s1;
1583 str2 = s2;
1584 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001585 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001586 {
1587 c1 = mb_ptr2char_adv(&str1);
1588 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001589
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001590 // Decompose the character if necessary, into 'base' characters.
1591 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001592 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001593 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001594 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001595 mb_decompose(c1, &c11, &junk, &junk);
1596 mb_decompose(c2, &c12, &junk, &junk);
1597 c1 = c11;
1598 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001599 if (c11 != c12
1600 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001601 break;
1602 }
1603 }
1604 result = c2 - c1;
1605 if (result == 0)
1606 *n = (int)(str2 - s2);
1607 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001608
1609 return result;
1610}
1611
1612/*
1613 * cstrchr: This function is used a lot for simple searches, keep it fast!
1614 */
1615 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001616cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001617{
1618 char_u *p;
1619 int cc;
1620
Bram Moolenaara12a1612019-01-24 16:39:02 +01001621 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001622 return vim_strchr(s, c);
1623
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001624 // tolower() and toupper() can be slow, comparing twice should be a lot
1625 // faster (esp. when using MS Visual C++!).
1626 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001627 if (enc_utf8 && c > 0x80)
1628 cc = utf_fold(c);
1629 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001630 if (MB_ISUPPER(c))
1631 cc = MB_TOLOWER(c);
1632 else if (MB_ISLOWER(c))
1633 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001634 else
1635 return vim_strchr(s, c);
1636
Bram Moolenaar071d4272004-06-13 20:20:40 +00001637 if (has_mbyte)
1638 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001639 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001640 {
1641 if (enc_utf8 && c > 0x80)
1642 {
1643 if (utf_fold(utf_ptr2char(p)) == cc)
1644 return p;
1645 }
1646 else if (*p == c || *p == cc)
1647 return p;
1648 }
1649 }
1650 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001651 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001652 for (p = s; *p != NUL; ++p)
1653 if (*p == c || *p == cc)
1654 return p;
1655
1656 return NULL;
1657}
1658
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001659////////////////////////////////////////////////////////////////
1660// regsub stuff //
1661////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001662
Bram Moolenaar071d4272004-06-13 20:20:40 +00001663/*
1664 * We should define ftpr as a pointer to a function returning a pointer to
1665 * a function returning a pointer to a function ...
1666 * This is impossible, so we declare a pointer to a function returning a
Bram Moolenaar30d64132020-09-06 17:09:12 +02001667 * void pointer. This should work for all compilers.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001668 */
Bram Moolenaar30d64132020-09-06 17:09:12 +02001669typedef void (*(*fptr_T)(int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001670
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001671static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001672
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001673 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001674do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001675{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001676 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001677
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001678 return (fptr_T)NULL;
1679}
1680
1681 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001682do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001683{
1684 *d = MB_TOUPPER(c);
1685
1686 return (fptr_T)do_Upper;
1687}
1688
1689 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001690do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001691{
1692 *d = MB_TOLOWER(c);
1693
1694 return (fptr_T)NULL;
1695}
1696
1697 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001698do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001699{
1700 *d = MB_TOLOWER(c);
1701
1702 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001703}
1704
1705/*
1706 * regtilde(): Replace tildes in the pattern by the old pattern.
1707 *
1708 * Short explanation of the tilde: It stands for the previous replacement
1709 * pattern. If that previous pattern also contains a ~ we should go back a
1710 * step further... But we insert the previous pattern into the current one
1711 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001712 * This still does not handle the case where "magic" changes. So require the
1713 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001714 *
1715 * The tildes are parsed once before the first call to vim_regsub().
1716 */
1717 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001718regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001719{
1720 char_u *newsub = source;
1721 char_u *tmpsub;
1722 char_u *p;
1723 int len;
1724 int prevlen;
1725
1726 for (p = newsub; *p; ++p)
1727 {
1728 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1729 {
1730 if (reg_prev_sub != NULL)
1731 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001732 // length = len(newsub) - 1 + len(prev_sub) + 1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001733 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02001734 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001735 if (tmpsub != NULL)
1736 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001737 // copy prefix
1738 len = (int)(p - newsub); // not including ~
Bram Moolenaar071d4272004-06-13 20:20:40 +00001739 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001740 // interpret tilde
Bram Moolenaar071d4272004-06-13 20:20:40 +00001741 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001742 // copy postfix
Bram Moolenaar071d4272004-06-13 20:20:40 +00001743 if (!magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001744 ++p; // back off backslash
Bram Moolenaar071d4272004-06-13 20:20:40 +00001745 STRCPY(tmpsub + len + prevlen, p + 1);
1746
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001747 if (newsub != source) // already allocated newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001748 vim_free(newsub);
1749 newsub = tmpsub;
1750 p = newsub + len + prevlen;
1751 }
1752 }
1753 else if (magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001754 STRMOVE(p, p + 1); // remove '~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001755 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001756 STRMOVE(p, p + 2); // remove '\~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001757 --p;
1758 }
1759 else
1760 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001761 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001762 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001764 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001765 }
1766 }
1767
1768 vim_free(reg_prev_sub);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001769 if (newsub != source) // newsub was allocated, just keep it
Bram Moolenaar071d4272004-06-13 20:20:40 +00001770 reg_prev_sub = newsub;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001771 else // no ~ found, need to save newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001772 reg_prev_sub = vim_strsave(newsub);
1773 return newsub;
1774}
1775
1776#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001777static int can_f_submatch = FALSE; // TRUE when submatch() can be used
Bram Moolenaar071d4272004-06-13 20:20:40 +00001778
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001779// These pointers are used for reg_submatch(). Needed for when the
1780// substitution string is an expression that contains a call to substitute()
1781// and submatch().
Bram Moolenaar6100d022016-10-02 16:51:57 +02001782typedef struct {
1783 regmatch_T *sm_match;
1784 regmmatch_T *sm_mmatch;
1785 linenr_T sm_firstlnum;
1786 linenr_T sm_maxline;
1787 int sm_line_lbr;
1788} regsubmatch_T;
1789
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001790static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
Bram Moolenaar071d4272004-06-13 20:20:40 +00001791#endif
1792
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001793#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001794
1795/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001796 * Put the submatches in "argv[argskip]" which is a list passed into
1797 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001798 */
1799 static int
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001800fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, int argcount)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001801{
1802 listitem_T *li;
1803 int i;
1804 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001805 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001806
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001807 if (argcount == argskip)
1808 // called function doesn't take a submatches argument
1809 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001810
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001811 // Relies on sl_list to be the first item in staticList10_T.
1812 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001813
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001814 // There are always 10 list items in staticList10_T.
1815 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001816 for (i = 0; i < 10; ++i)
1817 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001818 s = rsm.sm_match->startp[i];
1819 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001820 s = NULL;
1821 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02001822 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001823 li->li_tv.v_type = VAR_STRING;
1824 li->li_tv.vval.v_string = s;
1825 li = li->li_next;
1826 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001827 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001828}
1829
1830 static void
1831clear_submatch_list(staticList10_T *sl)
1832{
1833 int i;
1834
1835 for (i = 0; i < 10; ++i)
1836 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1837}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001838#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001839
Bram Moolenaar071d4272004-06-13 20:20:40 +00001840/*
1841 * vim_regsub() - perform substitutions after a vim_regexec() or
1842 * vim_regexec_multi() match.
1843 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001844 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
1845 * Oterwise nothing is copied, only compue the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001846 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001847 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
1848 *
1849 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
1850 * double them to keep them, and insert a backslash before a CR to avoid it
1851 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001852 *
1853 * Note: The matched text must not change between the call of
1854 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1855 * references invalid!
1856 *
1857 * Returns the size of the replacement, including terminating NUL.
1858 */
1859 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001860vim_regsub(
1861 regmatch_T *rmp,
1862 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001863 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001864 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001865 int destlen,
1866 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001867{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001868 int result;
1869 regexec_T rex_save;
1870 int rex_in_use_save = rex_in_use;
1871
1872 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001873 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001874 rex_save = rex;
1875 rex_in_use = TRUE;
1876
1877 rex.reg_match = rmp;
1878 rex.reg_mmatch = NULL;
1879 rex.reg_maxline = 0;
1880 rex.reg_buf = curbuf;
1881 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001882 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001883
1884 rex_in_use = rex_in_use_save;
1885 if (rex_in_use)
1886 rex = rex_save;
1887
1888 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001889}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001890
1891 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001892vim_regsub_multi(
1893 regmmatch_T *rmp,
1894 linenr_T lnum,
1895 char_u *source,
1896 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001897 int destlen,
1898 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001899{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001900 int result;
1901 regexec_T rex_save;
1902 int rex_in_use_save = rex_in_use;
1903
1904 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001905 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001906 rex_save = rex;
1907 rex_in_use = TRUE;
1908
1909 rex.reg_match = NULL;
1910 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001911 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02001912 rex.reg_firstlnum = lnum;
1913 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1914 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001915 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001916
1917 rex_in_use = rex_in_use_save;
1918 if (rex_in_use)
1919 rex = rex_save;
1920
1921 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001922}
1923
1924 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001925vim_regsub_both(
1926 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001927 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001928 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001929 int destlen,
1930 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001931{
1932 char_u *src;
1933 char_u *dst;
1934 char_u *s;
1935 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001936 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001937 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01001938 fptr_T func_all = (fptr_T)NULL;
1939 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001940 linenr_T clnum = 0; // init for GCC
1941 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00001942#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001943 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001944#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001945 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001946
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001947 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001948 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001949 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001950 emsg(_(e_null_argument));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001951 return 0;
1952 }
1953 if (prog_magic_wrong())
1954 return 0;
1955 src = source;
1956 dst = dest;
1957
1958 /*
1959 * When the substitute part starts with "\=" evaluate it as an expression.
1960 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001961 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001962 {
1963#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001964 // To make sure that the length doesn't change between checking the
1965 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01001966 // resulting string is saved from the call with
1967 // "flags & REGSUB_COPY" == 0 to the call with
1968 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001969 if (copy)
1970 {
1971 if (eval_result != NULL)
1972 {
1973 STRCPY(dest, eval_result);
1974 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01001975 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001976 }
1977 }
1978 else
1979 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001980 int prev_can_f_submatch = can_f_submatch;
1981 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001982
Paul Ollis65745772022-06-05 16:55:54 +01001983 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001984
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001985 // The expression may contain substitute(), which calls us
1986 // recursively. Make sure submatch() gets the text from the first
1987 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001988 if (can_f_submatch)
1989 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001990 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001991 rsm.sm_match = rex.reg_match;
1992 rsm.sm_mmatch = rex.reg_mmatch;
1993 rsm.sm_firstlnum = rex.reg_firstlnum;
1994 rsm.sm_maxline = rex.reg_maxline;
1995 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001996
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001997 if (expr != NULL)
1998 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001999 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002000 char_u buf[NUMBUFLEN];
2001 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002002 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002003 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002004
2005 rettv.v_type = VAR_STRING;
2006 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002007 argv[0].v_type = VAR_LIST;
2008 argv[0].vval.v_list = &matchList.sl_list;
2009 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002010 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002011 funcexe.fe_argv_func = fill_submatch_list;
2012 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002013 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002014 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002015 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002016 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002017 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002018 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002019 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002020 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002021
Bram Moolenaar6100d022016-10-02 16:51:57 +02002022 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002023 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002024 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002025 }
LemonBoyf3b48952022-05-05 13:53:03 +01002026 else if (expr->v_type == VAR_INSTR)
2027 {
2028 exe_typval_instr(expr, &rettv);
2029 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002030 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002031 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002032 clear_submatch_list(&matchList);
2033
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002034 if (rettv.v_type == VAR_UNKNOWN)
2035 // something failed, no need to report another error
2036 eval_result = NULL;
2037 else
2038 {
2039 eval_result = tv_get_string_buf_chk(&rettv, buf);
2040 if (eval_result != NULL)
2041 eval_result = vim_strsave(eval_result);
2042 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002043 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002044 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002045 else if (substitute_instr != NULL)
2046 // Execute instructions from ISN_SUBSTITUTE.
2047 eval_result = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002048 else
Bram Moolenaarb171fb12020-06-24 20:34:03 +02002049 eval_result = eval_to_string(source + 2, TRUE);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002050
Bram Moolenaar071d4272004-06-13 20:20:40 +00002051 if (eval_result != NULL)
2052 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002053 int had_backslash = FALSE;
2054
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002055 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002056 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002057 // Change NL to CR, so that it becomes a line break,
2058 // unless called from vim_regexec_nl().
2059 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002060 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002061 *s = CAR;
2062 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002063 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002064 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002065 /* Change NL to CR here too, so that this works:
2066 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2067 * abc\
2068 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002069 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002070 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002071 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002072 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002073 had_backslash = TRUE;
2074 }
2075 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002076 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002077 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002078 // Backslashes will be consumed, need to double them.
Bram Moolenaar06975a42010-03-23 16:27:22 +01002079 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
2080 if (s != NULL)
2081 {
2082 vim_free(eval_result);
2083 eval_result = s;
2084 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002085 }
2086
2087 dst += STRLEN(eval_result);
2088 }
2089
Bram Moolenaar6100d022016-10-02 16:51:57 +02002090 can_f_submatch = prev_can_f_submatch;
2091 if (can_f_submatch)
2092 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002093 }
2094#endif
2095 }
2096 else
2097 while ((c = *src++) != NUL)
2098 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002099 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002100 no = 0;
2101 else if (c == '\\' && *src != NUL)
2102 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002103 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002104 {
2105 ++src;
2106 no = 0;
2107 }
2108 else if ('0' <= *src && *src <= '9')
2109 {
2110 no = *src++ - '0';
2111 }
2112 else if (vim_strchr((char_u *)"uUlLeE", *src))
2113 {
2114 switch (*src++)
2115 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002116 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002117 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002118 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002119 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002120 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002121 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002122 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002123 continue;
2124 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002125 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002126 continue;
2127 }
2128 }
2129 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002130 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002131 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002132 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2133 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002134 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002135 if (copy)
2136 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002137 if (dst + 3 > dest + destlen)
2138 {
2139 iemsg("vim_regsub_both(): not enough space");
2140 return 0;
2141 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002142 *dst++ = c;
2143 *dst++ = *src++;
2144 *dst++ = *src++;
2145 }
2146 else
2147 {
2148 dst += 3;
2149 src += 2;
2150 }
2151 continue;
2152 }
2153
Bram Moolenaar071d4272004-06-13 20:20:40 +00002154 if (c == '\\' && *src != NUL)
2155 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002156 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002157 switch (*src)
2158 {
2159 case 'r': c = CAR; ++src; break;
2160 case 'n': c = NL; ++src; break;
2161 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002162 // Oh no! \e already has meaning in subst pat :-(
2163 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002164 case 'b': c = Ctrl_H; ++src; break;
2165
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002166 // If "backslash" is TRUE the backslash will be removed
2167 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002168 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002169 {
2170 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002171 {
2172 if (dst + 1 > dest + destlen)
2173 {
2174 iemsg("vim_regsub_both(): not enough space");
2175 return 0;
2176 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002177 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002178 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002179 ++dst;
2180 }
2181 c = *src++;
2182 }
2183 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002184 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002185 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002186
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002187 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002188 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002189 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002190 func_one = (fptr_T)(func_one(&cc, c));
2191 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002192 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002193 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002194 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002195 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002196
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002197 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002198 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002199 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002200 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002201
Bram Moolenaar071d4272004-06-13 20:20:40 +00002202 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002203 {
2204 if (dst + charlen > dest + destlen)
2205 {
2206 iemsg("vim_regsub_both(): not enough space");
2207 return 0;
2208 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002209 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002210 }
2211 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002212 if (enc_utf8)
2213 {
2214 int clen = utf_ptr2len(src - 1);
2215
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002216 // If the character length is shorter than "totlen", there
2217 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002218 if (clen < totlen)
2219 {
2220 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002221 {
2222 if (dst + totlen - clen > dest + destlen)
2223 {
2224 iemsg("vim_regsub_both(): not enough space");
2225 return 0;
2226 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002227 mch_memmove(dst + 1, src - 1 + clen,
2228 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002229 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002230 dst += totlen - clen;
2231 }
2232 }
2233 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002234 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002235 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002236 {
2237 if (dst + 1 > dest + destlen)
2238 {
2239 iemsg("vim_regsub_both(): not enough space");
2240 return 0;
2241 }
2242 *dst = cc;
2243 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002244 dst++;
2245 }
2246 else
2247 {
2248 if (REG_MULTI)
2249 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002250 clnum = rex.reg_mmatch->startpos[no].lnum;
2251 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002252 s = NULL;
2253 else
2254 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002255 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2256 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2257 len = rex.reg_mmatch->endpos[no].col
2258 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002259 else
2260 len = (int)STRLEN(s);
2261 }
2262 }
2263 else
2264 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002265 s = rex.reg_match->startp[no];
2266 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002267 s = NULL;
2268 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002269 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002270 }
2271 if (s != NULL)
2272 {
2273 for (;;)
2274 {
2275 if (len == 0)
2276 {
2277 if (REG_MULTI)
2278 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002279 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002280 break;
2281 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002282 {
2283 if (dst + 1 > dest + destlen)
2284 {
2285 iemsg("vim_regsub_both(): not enough space");
2286 return 0;
2287 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002288 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002289 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002290 ++dst;
2291 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002292 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2293 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002294 else
2295 len = (int)STRLEN(s);
2296 }
2297 else
2298 break;
2299 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002300 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002301 {
2302 if (copy)
Bram Moolenaare29a27f2021-07-20 21:07:36 +02002303 iemsg(_(e_damaged_match_string));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002304 goto exit;
2305 }
2306 else
2307 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002308 if ((flags & REGSUB_BACKSLASH)
2309 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002310 {
2311 /*
2312 * Insert a backslash in front of a CR, otherwise
2313 * it will be replaced by a line break.
2314 * Number of backslashes will be halved later,
2315 * double them here.
2316 */
2317 if (copy)
2318 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002319 if (dst + 2 > dest + destlen)
2320 {
2321 iemsg("vim_regsub_both(): not enough space");
2322 return 0;
2323 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002324 dst[0] = '\\';
2325 dst[1] = *s;
2326 }
2327 dst += 2;
2328 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002329 else
2330 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002331 if (has_mbyte)
2332 c = mb_ptr2char(s);
2333 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002334 c = *s;
2335
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002336 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002337 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002338 func_one = (fptr_T)(func_one(&cc, c));
2339 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002340 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002341 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002342 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002343 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002344
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002345 if (has_mbyte)
2346 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002347 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002348 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002349
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002350 // Copy composing characters separately, one
2351 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002352 if (enc_utf8)
2353 l = utf_ptr2len(s) - 1;
2354 else
2355 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002356
2357 s += l;
2358 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002359 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002360 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002361 {
2362 if (dst + charlen > dest + destlen)
2363 {
2364 iemsg("vim_regsub_both(): not enough space");
2365 return 0;
2366 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002367 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002368 }
2369 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002370 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002371 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002372 {
2373 if (dst + 1 > dest + destlen)
2374 {
2375 iemsg("vim_regsub_both(): not enough space");
2376 return 0;
2377 }
2378 *dst = cc;
2379 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002380 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002381 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002382
Bram Moolenaar071d4272004-06-13 20:20:40 +00002383 ++s;
2384 --len;
2385 }
2386 }
2387 }
2388 no = -1;
2389 }
2390 }
2391 if (copy)
2392 *dst = NUL;
2393
2394exit:
2395 return (int)((dst - dest) + 1);
2396}
2397
2398#ifdef FEAT_EVAL
2399/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002400 * Call reg_getline() with the line numbers from the submatch. If a
2401 * substitute() was used the reg_maxline and other values have been
2402 * overwritten.
2403 */
2404 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002405reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002406{
2407 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002408 linenr_T save_first = rex.reg_firstlnum;
2409 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002410
Bram Moolenaar6100d022016-10-02 16:51:57 +02002411 rex.reg_firstlnum = rsm.sm_firstlnum;
2412 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002413
2414 s = reg_getline(lnum);
2415
Bram Moolenaar6100d022016-10-02 16:51:57 +02002416 rex.reg_firstlnum = save_first;
2417 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002418 return s;
2419}
2420
2421/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002422 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002423 * allocated memory.
2424 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2425 */
2426 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002427reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002428{
2429 char_u *retval = NULL;
2430 char_u *s;
2431 int len;
2432 int round;
2433 linenr_T lnum;
2434
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002435 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002436 return NULL;
2437
Bram Moolenaar6100d022016-10-02 16:51:57 +02002438 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002439 {
2440 /*
2441 * First round: compute the length and allocate memory.
2442 * Second round: copy the text.
2443 */
2444 for (round = 1; round <= 2; ++round)
2445 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002446 lnum = rsm.sm_mmatch->startpos[no].lnum;
2447 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002448 return NULL;
2449
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002450 s = reg_getline_submatch(lnum);
2451 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002452 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002453 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002454 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002455 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002456 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002457 len = rsm.sm_mmatch->endpos[no].col
2458 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002459 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002460 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002461 ++len;
2462 }
2463 else
2464 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002465 // Multiple lines: take start line from start col, middle
2466 // lines completely and end line up to end col.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002467 len = (int)STRLEN(s);
2468 if (round == 2)
2469 {
2470 STRCPY(retval, s);
2471 retval[len] = '\n';
2472 }
2473 ++len;
2474 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002475 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002476 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002477 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002478 if (round == 2)
2479 STRCPY(retval + len, s);
2480 len += (int)STRLEN(s);
2481 if (round == 2)
2482 retval[len] = '\n';
2483 ++len;
2484 }
2485 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002486 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002487 rsm.sm_mmatch->endpos[no].col);
2488 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002489 if (round == 2)
2490 retval[len] = NUL;
2491 ++len;
2492 }
2493
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002494 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002495 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002496 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002497 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002498 return NULL;
2499 }
2500 }
2501 }
2502 else
2503 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002504 s = rsm.sm_match->startp[no];
2505 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002506 retval = NULL;
2507 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002508 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002509 }
2510
2511 return retval;
2512}
Bram Moolenaar41571762014-04-02 19:00:58 +02002513
2514/*
2515 * Used for the submatch() function with the optional non-zero argument: get
2516 * the list of strings from the n'th submatch in allocated memory with NULs
2517 * represented in NLs.
2518 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2519 * command, for a non-existing submatch and for any error.
2520 */
2521 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002522reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002523{
2524 char_u *s;
2525 linenr_T slnum;
2526 linenr_T elnum;
2527 colnr_T scol;
2528 colnr_T ecol;
2529 int i;
2530 list_T *list;
2531 int error = FALSE;
2532
2533 if (!can_f_submatch || no < 0)
2534 return NULL;
2535
Bram Moolenaar6100d022016-10-02 16:51:57 +02002536 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002537 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002538 slnum = rsm.sm_mmatch->startpos[no].lnum;
2539 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002540 if (slnum < 0 || elnum < 0)
2541 return NULL;
2542
Bram Moolenaar6100d022016-10-02 16:51:57 +02002543 scol = rsm.sm_mmatch->startpos[no].col;
2544 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002545
2546 list = list_alloc();
2547 if (list == NULL)
2548 return NULL;
2549
2550 s = reg_getline_submatch(slnum) + scol;
2551 if (slnum == elnum)
2552 {
2553 if (list_append_string(list, s, ecol - scol) == FAIL)
2554 error = TRUE;
2555 }
2556 else
2557 {
2558 if (list_append_string(list, s, -1) == FAIL)
2559 error = TRUE;
2560 for (i = 1; i < elnum - slnum; i++)
2561 {
2562 s = reg_getline_submatch(slnum + i);
2563 if (list_append_string(list, s, -1) == FAIL)
2564 error = TRUE;
2565 }
2566 s = reg_getline_submatch(elnum);
2567 if (list_append_string(list, s, ecol) == FAIL)
2568 error = TRUE;
2569 }
2570 }
2571 else
2572 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002573 s = rsm.sm_match->startp[no];
2574 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002575 return NULL;
2576 list = list_alloc();
2577 if (list == NULL)
2578 return NULL;
2579 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002580 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002581 error = TRUE;
2582 }
2583
2584 if (error)
2585 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002586 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002587 return NULL;
2588 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002589 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002590 return list;
2591}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002592#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002593
Bram Moolenaarf4140482020-02-15 23:06:45 +01002594/*
2595 * Initialize the values used for matching against multiple lines
2596 */
2597 static void
2598init_regexec_multi(
2599 regmmatch_T *rmp,
2600 win_T *win, // window in which to search or NULL
2601 buf_T *buf, // buffer in which to search
2602 linenr_T lnum) // nr of line to start looking for match
2603{
2604 rex.reg_match = NULL;
2605 rex.reg_mmatch = rmp;
2606 rex.reg_buf = buf;
2607 rex.reg_win = win;
2608 rex.reg_firstlnum = lnum;
2609 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2610 rex.reg_line_lbr = FALSE;
2611 rex.reg_ic = rmp->rmm_ic;
2612 rex.reg_icombine = FALSE;
2613 rex.reg_maxcol = rmp->rmm_maxcol;
2614}
2615
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002616#include "regexp_bt.c"
2617
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002618static regengine_T bt_regengine =
2619{
2620 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002621 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002622 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002623 bt_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002624};
2625
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002626#include "regexp_nfa.c"
2627
2628static regengine_T nfa_regengine =
2629{
2630 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002631 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002632 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002633 nfa_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002634};
2635
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002636// Which regexp engine to use? Needed for vim_regcomp().
2637// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002638static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002639
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002640#ifdef DEBUG
2641static char_u regname[][30] = {
2642 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002643 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002644 "NFA Regexp Engine"
2645 };
2646#endif
2647
2648/*
2649 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002650 * Returns the program in allocated memory.
2651 * Use vim_regfree() to free the memory.
2652 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002653 */
2654 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002655vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002656{
2657 regprog_T *prog = NULL;
2658 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002659 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002660
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002661 regexp_engine = p_re;
2662
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002663 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002664 if (STRNCMP(expr, "\\%#=", 4) == 0)
2665 {
2666 int newengine = expr[4] - '0';
2667
2668 if (newengine == AUTOMATIC_ENGINE
2669 || newengine == BACKTRACKING_ENGINE
2670 || newengine == NFA_ENGINE)
2671 {
2672 regexp_engine = expr[4] - '0';
2673 expr += 5;
2674#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002675 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002676 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002677#endif
2678 }
2679 else
2680 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002681 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002682 regexp_engine = AUTOMATIC_ENGINE;
2683 }
2684 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002685#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002686 bt_regengine.expr = expr;
2687 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002688#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002689 // reg_iswordc() uses rex.reg_buf
2690 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002691
2692 /*
2693 * First try the NFA engine, unless backtracking was requested.
2694 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002695 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002696 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002697 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002698 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002699 else
2700 prog = bt_regengine.regcomp(expr, re_flags);
2701
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002702 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002703 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002704 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002705#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002706 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002707 {
2708 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002709 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002710 if (f)
2711 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002712 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002713 fclose(f);
2714 }
2715 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002716 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002717 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002718 }
2719#endif
2720 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002721 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002722 * The NFA engine also fails for patterns that it can't handle well
2723 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002724 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002725 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002726 if (regexp_engine == AUTOMATIC_ENGINE
2727 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002728 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002729 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002730#ifdef FEAT_EVAL
2731 report_re_switch(expr);
2732#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002733 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002734 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002735 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002736
Bram Moolenaarfda37292014-11-05 14:27:36 +01002737 if (prog != NULL)
2738 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002739 // Store the info needed to call regcomp() again when the engine turns
2740 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002741 prog->re_engine = regexp_engine;
2742 prog->re_flags = re_flags;
2743 }
2744
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002745 return prog;
2746}
2747
2748/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002749 * Free a compiled regexp program, returned by vim_regcomp().
2750 */
2751 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002752vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002753{
2754 if (prog != NULL)
2755 prog->engine->regfree(prog);
2756}
2757
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002758#if defined(EXITFREE) || defined(PROTO)
2759 void
2760free_regexp_stuff(void)
2761{
2762 ga_clear(&regstack);
2763 ga_clear(&backpos);
2764 vim_free(reg_tofree);
2765 vim_free(reg_prev_sub);
2766}
2767#endif
2768
Bram Moolenaarfda37292014-11-05 14:27:36 +01002769#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002770 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002771report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002772{
2773 if (p_verbose > 0)
2774 {
2775 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002776 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2777 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002778 verbose_leave();
2779 }
2780}
2781#endif
2782
Bram Moolenaar651fca82021-11-29 20:39:38 +00002783#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002784/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002785 * Return whether "prog" is currently being executed.
2786 */
2787 int
2788regprog_in_use(regprog_T *prog)
2789{
2790 return prog->re_in_use;
2791}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002792#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002793
2794/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002795 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002796 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002797 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002798 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002799 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002800 *
2801 * Return TRUE if there is a match, FALSE if not.
2802 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002803 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002804vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002805 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002806 char_u *line, // string to match against
2807 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01002808 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002809{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002810 int result;
2811 regexec_T rex_save;
2812 int rex_in_use_save = rex_in_use;
2813
Bram Moolenaar0270f382018-07-17 05:43:58 +02002814 // Cannot use the same prog recursively, it contains state.
2815 if (rmp->regprog->re_in_use)
2816 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002817 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002818 return FALSE;
2819 }
2820 rmp->regprog->re_in_use = TRUE;
2821
Bram Moolenaar6100d022016-10-02 16:51:57 +02002822 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002823 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002824 rex_save = rex;
2825 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002826
Bram Moolenaar6100d022016-10-02 16:51:57 +02002827 rex.reg_startp = NULL;
2828 rex.reg_endp = NULL;
2829 rex.reg_startpos = NULL;
2830 rex.reg_endpos = NULL;
2831
2832 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002833 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002834
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002835 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002836 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2837 && result == NFA_TOO_EXPENSIVE)
2838 {
2839 int save_p_re = p_re;
2840 int re_flags = rmp->regprog->re_flags;
2841 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2842
2843 p_re = BACKTRACKING_ENGINE;
2844 vim_regfree(rmp->regprog);
2845 if (pat != NULL)
2846 {
2847#ifdef FEAT_EVAL
2848 report_re_switch(pat);
2849#endif
2850 rmp->regprog = vim_regcomp(pat, re_flags);
2851 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002852 {
2853 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002854 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002855 rmp->regprog->re_in_use = FALSE;
2856 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002857 vim_free(pat);
2858 }
2859
2860 p_re = save_p_re;
2861 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002862
2863 rex_in_use = rex_in_use_save;
2864 if (rex_in_use)
2865 rex = rex_save;
2866
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002867 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002868}
2869
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002870/*
2871 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002872 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002873 */
2874 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002875vim_regexec_prog(
2876 regprog_T **prog,
2877 int ignore_case,
2878 char_u *line,
2879 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002880{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002881 int r;
2882 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002883
2884 regmatch.regprog = *prog;
2885 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002886 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002887 *prog = regmatch.regprog;
2888 return r;
2889}
2890
2891/*
2892 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002893 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002894 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002895 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002896vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002897{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002898 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002899}
2900
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002901/*
2902 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002903 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002904 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002905 */
2906 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002907vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002908{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002909 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002910}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002911
2912/*
2913 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002914 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2915 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002916 * Uses curbuf for line count and 'iskeyword'.
2917 *
2918 * Return zero if there is no match. Return number of lines contained in the
2919 * match otherwise.
2920 */
2921 long
Bram Moolenaar05540972016-01-30 20:31:25 +01002922vim_regexec_multi(
2923 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002924 win_T *win, // window in which to search or NULL
2925 buf_T *buf, // buffer in which to search
2926 linenr_T lnum, // nr of line to start looking for match
2927 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002928 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002929{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002930 int result;
2931 regexec_T rex_save;
2932 int rex_in_use_save = rex_in_use;
2933
Bram Moolenaar0270f382018-07-17 05:43:58 +02002934 // Cannot use the same prog recursively, it contains state.
2935 if (rmp->regprog->re_in_use)
2936 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002937 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002938 return FALSE;
2939 }
2940 rmp->regprog->re_in_use = TRUE;
2941
Bram Moolenaar6100d022016-10-02 16:51:57 +02002942 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002943 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002944 rex_save = rex;
2945 rex_in_use = TRUE;
2946
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002947 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01002948 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002949 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002950
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002951 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002952 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2953 && result == NFA_TOO_EXPENSIVE)
2954 {
2955 int save_p_re = p_re;
2956 int re_flags = rmp->regprog->re_flags;
2957 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2958
2959 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002960 if (pat != NULL)
2961 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002962 regprog_T *prev_prog = rmp->regprog;
2963
Bram Moolenaarfda37292014-11-05 14:27:36 +01002964#ifdef FEAT_EVAL
2965 report_re_switch(pat);
2966#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002967#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002968 // checking for \z misuse was already done when compiling for NFA,
2969 // allow all here
2970 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002971#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01002972 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002973#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002974 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002975#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002976 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002977 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002978 // Somehow compiling the pattern failed now, put back the
2979 // previous one to avoid "regprog" becoming NULL.
2980 rmp->regprog = prev_prog;
2981 }
2982 else
2983 {
2984 vim_regfree(prev_prog);
2985
Bram Moolenaar41499802018-07-18 06:02:09 +02002986 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002987 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01002988 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002989 rmp->regprog->re_in_use = FALSE;
2990 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002991 vim_free(pat);
2992 }
2993 p_re = save_p_re;
2994 }
2995
Bram Moolenaar6100d022016-10-02 16:51:57 +02002996 rex_in_use = rex_in_use_save;
2997 if (rex_in_use)
2998 rex = rex_save;
2999
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003000 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003001}