blob: 32661f6d8a64f0a7ca0aabffd37ae6c94962bba8 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
24static int dummy_timeout_flag = 0;
Bram Moolenaar616592e2022-06-17 15:17:10 +010025static volatile int *timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010026#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
54 void
55init_regexp_timeout(long msec)
56{
57 timeout_flag = start_timeout(msec);
58}
59
60 void
61disable_regexp_timeout(void)
62{
63 stop_timeout();
Bram Moolenaar1f30caf2022-06-19 14:36:35 +010064 timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010065}
66#endif
67
Bram Moolenaar071d4272004-06-13 20:20:40 +000068/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020069 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +000070 * number; the start node begins in the second byte. It's used to catch the
71 * most severe mutilation of the program by the caller.
72 */
73
74#define REGMAGIC 0234
75
76/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000077 * Utility definitions.
78 */
79#define UCHARAT(p) ((int)*(char_u *)(p))
80
Bram Moolenaar63d9e732019-12-05 21:10:38 +010081// Used for an error (down from) vim_regcomp(): give the error message, set
82// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010083#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
84#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
85#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
86#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +010087#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010088#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +000089#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +000090
Bram Moolenaar95f09602016-11-10 20:01:45 +010091
Bram Moolenaar071d4272004-06-13 20:20:40 +000092#define MAX_LIMIT (32767L << 16L)
93
Bram Moolenaar071d4272004-06-13 20:20:40 +000094#define NOT_MULTI 0
95#define MULTI_ONE 1
96#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020097
98// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +010099#define RA_FAIL 1 // something failed, abort
100#define RA_CONT 2 // continue in inner loop
101#define RA_BREAK 3 // break inner loop
102#define RA_MATCH 4 // successful match
103#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200104
Bram Moolenaar071d4272004-06-13 20:20:40 +0000105/*
106 * Return NOT_MULTI if c is not a "multi" operator.
107 * Return MULTI_ONE if c is a single "multi" operator.
108 * Return MULTI_MULT if c is a multi "multi" operator.
109 */
110 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100111re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112{
113 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
114 return MULTI_ONE;
115 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
116 return MULTI_MULT;
117 return NOT_MULTI;
118}
119
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000120static char_u *reg_prev_sub = NULL;
121
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122/*
123 * REGEXP_INRANGE contains all characters which are always special in a []
124 * range after '\'.
125 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
126 * These are:
127 * \n - New line (NL).
128 * \r - Carriage Return (CR).
129 * \t - Tab (TAB).
130 * \e - Escape (ESC).
131 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000132 * \d - Character code in decimal, eg \d123
133 * \o - Character code in octal, eg \o80
134 * \x - Character code in hex, eg \x4a
135 * \u - Multibyte character code, eg \u20ac
136 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137 */
138static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000139static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140
Bram Moolenaar071d4272004-06-13 20:20:40 +0000141/*
142 * Translate '\x' to its control character, except "\n", which is Magic.
143 */
144 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100145backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000146{
147 switch (c)
148 {
149 case 'r': return CAR;
150 case 't': return TAB;
151 case 'e': return ESC;
152 case 'b': return BS;
153 }
154 return c;
155}
156
157/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000158 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000159 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
160 * recognized. Otherwise "pp" is advanced to after the item.
161 */
162 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100163get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000164{
165 static const char *(class_names[]) =
166 {
167 "alnum:]",
168#define CLASS_ALNUM 0
169 "alpha:]",
170#define CLASS_ALPHA 1
171 "blank:]",
172#define CLASS_BLANK 2
173 "cntrl:]",
174#define CLASS_CNTRL 3
175 "digit:]",
176#define CLASS_DIGIT 4
177 "graph:]",
178#define CLASS_GRAPH 5
179 "lower:]",
180#define CLASS_LOWER 6
181 "print:]",
182#define CLASS_PRINT 7
183 "punct:]",
184#define CLASS_PUNCT 8
185 "space:]",
186#define CLASS_SPACE 9
187 "upper:]",
188#define CLASS_UPPER 10
189 "xdigit:]",
190#define CLASS_XDIGIT 11
191 "tab:]",
192#define CLASS_TAB 12
193 "return:]",
194#define CLASS_RETURN 13
195 "backspace:]",
196#define CLASS_BACKSPACE 14
197 "escape:]",
198#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100199 "ident:]",
200#define CLASS_IDENT 16
201 "keyword:]",
202#define CLASS_KEYWORD 17
203 "fname:]",
204#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000205 };
206#define CLASS_NONE 99
207 int i;
208
209 if ((*pp)[1] == ':')
210 {
K.Takataeeec2542021-06-02 13:28:16 +0200211 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000212 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
213 {
214 *pp += STRLEN(class_names[i]) + 2;
215 return i;
216 }
217 }
218 return CLASS_NONE;
219}
220
221/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222 * Specific version of character class functions.
223 * Using a table to keep this fast.
224 */
225static short class_tab[256];
226
227#define RI_DIGIT 0x01
228#define RI_HEX 0x02
229#define RI_OCTAL 0x04
230#define RI_WORD 0x08
231#define RI_HEAD 0x10
232#define RI_ALPHA 0x20
233#define RI_LOWER 0x40
234#define RI_UPPER 0x80
235#define RI_WHITE 0x100
236
237 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100238init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000239{
240 int i;
241 static int done = FALSE;
242
243 if (done)
244 return;
245
246 for (i = 0; i < 256; ++i)
247 {
248 if (i >= '0' && i <= '7')
249 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
250 else if (i >= '8' && i <= '9')
251 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
252 else if (i >= 'a' && i <= 'f')
253 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
256 else if (i >= 'A' && i <= 'F')
257 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000259 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
260 else if (i == '_')
261 class_tab[i] = RI_WORD + RI_HEAD;
262 else
263 class_tab[i] = 0;
264 }
265 class_tab[' '] |= RI_WHITE;
266 class_tab['\t'] |= RI_WHITE;
267 done = TRUE;
268}
269
kylo252ae6f1d82022-02-16 19:24:07 +0000270#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
271#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
272#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
273#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
274#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
275#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
276#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
277#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
278#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000279
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100280// flags for regflags
281#define RF_ICASE 1 // ignore case
282#define RF_NOICASE 2 // don't ignore case
283#define RF_HASNL 4 // can match a NL
284#define RF_ICOMBINE 8 // ignore combining characters
285#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000286
287/*
288 * Global work variables for vim_regcomp().
289 */
290
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100291static char_u *regparse; // Input-scan pointer.
292static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100293static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000294#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100295static int regnzpar; // \z() count.
296static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100298static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000299#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100300static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000301#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000302
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100303static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000304
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100305static int reg_string; // matching with a string instead of a buffer
306 // line
307static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000308
309/*
310 * META contains all characters that may be magic, except '^' and '$'.
311 */
312
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100313// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000314static char_u META_flags[] = {
315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100317// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000318 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100319// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100321// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000322 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100323// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000324 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100325// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100327// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
329};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000330
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100331static int curchr; // currently parsed character
332// Previous character. Note: prevchr is sometimes -1 when we are not at the
333// start, eg in /[ ^I]^ the pattern was never found even if it existed,
334// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200335static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100336static int prevprevchr; // previous-previous character
337static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100339// arguments for reg()
340#define REG_NOPAREN 0 // toplevel reg()
341#define REG_PAREN 1 // \(\)
342#define REG_ZPAREN 2 // \z(\)
343#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200345typedef struct
346{
347 char_u *regparse;
348 int prevchr_len;
349 int curchr;
350 int prevchr;
351 int prevprevchr;
352 int nextchr;
353 int at_start;
354 int prev_at_start;
355 int regnpar;
356} parse_state_T;
357
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100359static int getchr(void);
360static void skipchr_keepstart(void);
361static int peekchr(void);
362static void skipchr(void);
363static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100364static long gethexchrs(int maxinputlen);
365static long getoctchrs(void);
366static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100367static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100368static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200369static int cstrncmp(char_u *s1, char_u *s2, int *n);
370static char_u *cstrchr(char_u *, int);
371static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100372static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100373#ifdef FEAT_EVAL
374static void report_re_switch(char_u *pat);
375#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000376
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200377static regengine_T bt_regengine;
378static regengine_T nfa_regengine;
379
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380/*
381 * Return TRUE if compiled regular expression "prog" can match a line break.
382 */
383 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100384re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000385{
386 return (prog->regflags & RF_HASNL);
387}
388
389/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000390 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
391 * Returns a character representing the class. Zero means that no item was
392 * recognized. Otherwise "pp" is advanced to after the item.
393 */
394 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100395get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000396{
397 int c;
398 int l = 1;
399 char_u *p = *pp;
400
Bram Moolenaar985079c2019-02-16 17:07:47 +0100401 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000402 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000403 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000404 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000405 if (p[l + 2] == '=' && p[l + 3] == ']')
406 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000407 if (has_mbyte)
408 c = mb_ptr2char(p + 2);
409 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000410 c = p[2];
411 *pp += l + 4;
412 return c;
413 }
414 }
415 return 0;
416}
417
418/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000419 * Check for a collating element "[.a.]". "pp" points to the '['.
420 * Returns a character. Zero means that no item was recognized. Otherwise
421 * "pp" is advanced to after the item.
422 * Currently only single characters are recognized!
423 */
424 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100425get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000426{
427 int c;
428 int l = 1;
429 char_u *p = *pp;
430
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100431 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000432 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000433 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000434 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000435 if (p[l + 2] == '.' && p[l + 3] == ']')
436 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000437 if (has_mbyte)
438 c = mb_ptr2char(p + 2);
439 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000440 c = p[2];
441 *pp += l + 4;
442 return c;
443 }
444 }
445 return 0;
446}
447
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100448static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
449static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200450
451 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100452get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200453{
454 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
455 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
456}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000457
458/*
459 * Skip over a "[]" range.
460 * "p" must point to the character after the '['.
461 * The returned pointer is on the matching ']', or the terminating NUL.
462 */
463 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100464skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000465{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000466 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000467
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100468 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469 ++p;
470 if (*p == ']' || *p == '-')
471 ++p;
472 while (*p != NUL && *p != ']')
473 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000474 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000475 p += l;
476 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000477 if (*p == '-')
478 {
479 ++p;
480 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100481 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000482 }
483 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200484 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000485 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200486 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000487 p += 2;
488 else if (*p == '[')
489 {
490 if (get_char_class(&p) == CLASS_NONE
491 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200492 && get_coll_element(&p) == 0
493 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100494 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000495 }
496 else
497 ++p;
498 }
499
500 return p;
501}
502
503/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200505 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506 * Take care of characters with a backslash in front of it.
507 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508 */
509 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100510skip_regexp(
511 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200512 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200513 int magic)
514{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100515 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200516}
517
518/*
519 * Call skip_regexp() and when the delimiter does not match give an error and
520 * return NULL.
521 */
522 char_u *
523skip_regexp_err(
524 char_u *startp,
525 int delim,
526 int magic)
527{
528 char_u *p = skip_regexp(startp, delim, magic);
529
530 if (*p != delim)
531 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000532 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200533 return NULL;
534 }
535 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200536}
537
538/*
539 * skip_regexp() with extra arguments:
540 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
541 * expression and change "\?" to "?". If "*newp" is not NULL the expression
542 * is changed in-place.
543 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100544 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200545 */
546 char_u *
547skip_regexp_ex(
548 char_u *startp,
549 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100550 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200551 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100552 int *dropped,
553 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000554{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100555 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000556 char_u *p = startp;
557
558 if (magic)
559 mymagic = MAGIC_ON;
560 else
561 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200562 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100564 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000565 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100566 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000567 break;
568 if ((p[0] == '[' && mymagic >= MAGIC_ON)
569 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
570 {
571 p = skip_anyof(p + 1);
572 if (p[0] == NUL)
573 break;
574 }
575 else if (p[0] == '\\' && p[1] != NUL)
576 {
577 if (dirc == '?' && newp != NULL && p[1] == '?')
578 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100579 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000580 if (*newp == NULL)
581 {
582 *newp = vim_strsave(startp);
583 if (*newp != NULL)
584 p = *newp + (p - startp);
585 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200586 if (dropped != NULL)
587 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000588 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000589 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590 else
591 ++p;
592 }
593 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100594 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595 if (*p == 'v')
596 mymagic = MAGIC_ALL;
597 else if (*p == 'V')
598 mymagic = MAGIC_NONE;
599 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000600 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100601 if (magic_val != NULL)
602 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000603 return p;
604}
605
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200606/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200607 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200608 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100609static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200610static int at_start; // True when on the first character
611static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100612
Bram Moolenaar071d4272004-06-13 20:20:40 +0000613/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200614 * Start parsing at "str".
615 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000616 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100617initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000618{
619 regparse = str;
620 prevchr_len = 0;
621 curchr = prevprevchr = prevchr = nextchr = -1;
622 at_start = TRUE;
623 prev_at_start = FALSE;
624}
625
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200626/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200627 * Save the current parse state, so that it can be restored and parsing
628 * starts in the same state again.
629 */
630 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100631save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200632{
633 ps->regparse = regparse;
634 ps->prevchr_len = prevchr_len;
635 ps->curchr = curchr;
636 ps->prevchr = prevchr;
637 ps->prevprevchr = prevprevchr;
638 ps->nextchr = nextchr;
639 ps->at_start = at_start;
640 ps->prev_at_start = prev_at_start;
641 ps->regnpar = regnpar;
642}
643
644/*
645 * Restore a previously saved parse state.
646 */
647 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100648restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200649{
650 regparse = ps->regparse;
651 prevchr_len = ps->prevchr_len;
652 curchr = ps->curchr;
653 prevchr = ps->prevchr;
654 prevprevchr = ps->prevprevchr;
655 nextchr = ps->nextchr;
656 at_start = ps->at_start;
657 prev_at_start = ps->prev_at_start;
658 regnpar = ps->regnpar;
659}
660
661
662/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200663 * Get the next character without advancing.
664 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100666peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000668 static int after_slash = FALSE;
669
Bram Moolenaar071d4272004-06-13 20:20:40 +0000670 if (curchr == -1)
671 {
672 switch (curchr = regparse[0])
673 {
674 case '.':
675 case '[':
676 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100677 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000678 if (reg_magic >= MAGIC_ON)
679 curchr = Magic(curchr);
680 break;
681 case '(':
682 case ')':
683 case '{':
684 case '%':
685 case '+':
686 case '=':
687 case '?':
688 case '@':
689 case '!':
690 case '&':
691 case '|':
692 case '<':
693 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100694 case '#': // future ext.
695 case '"': // future ext.
696 case '\'': // future ext.
697 case ',': // future ext.
698 case '-': // future ext.
699 case ':': // future ext.
700 case ';': // future ext.
701 case '`': // future ext.
702 case '/': // Can't be used in / command
703 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000704 if (reg_magic == MAGIC_ALL)
705 curchr = Magic(curchr);
706 break;
707 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100708 // * is not magic as the very first character, eg "?*ptr", when
709 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
710 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000711 if (reg_magic >= MAGIC_ON
712 && !at_start
713 && !(prev_at_start && prevchr == Magic('^'))
714 && (after_slash
715 || (prevchr != Magic('(')
716 && prevchr != Magic('&')
717 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000718 curchr = Magic('*');
719 break;
720 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100721 // '^' is only magic as the very first character and if it's after
722 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000723 if (reg_magic >= MAGIC_OFF
724 && (at_start
725 || reg_magic == MAGIC_ALL
726 || prevchr == Magic('(')
727 || prevchr == Magic('|')
728 || prevchr == Magic('&')
729 || prevchr == Magic('n')
730 || (no_Magic(prevchr) == '('
731 && prevprevchr == Magic('%'))))
732 {
733 curchr = Magic('^');
734 at_start = TRUE;
735 prev_at_start = FALSE;
736 }
737 break;
738 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100739 // '$' is only magic as the very last char and if it's in front of
740 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000741 if (reg_magic >= MAGIC_OFF)
742 {
743 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200744 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000745
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100746 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000747 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200748 || p[1] == 'm' || p[1] == 'M'
749 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
750 {
751 if (p[1] == 'v')
752 is_magic_all = TRUE;
753 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
754 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000755 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200756 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000757 if (p[0] == NUL
758 || (p[0] == '\\'
759 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
760 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200761 || (is_magic_all
762 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763 || reg_magic == MAGIC_ALL)
764 curchr = Magic('$');
765 }
766 break;
767 case '\\':
768 {
769 int c = regparse[1];
770
771 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100772 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000773 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774 {
775 /*
776 * META contains everything that may be magic sometimes,
777 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200778 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000779 * magicness. Therefore, \ is so meta-magic that it is
780 * not in META.
781 */
782 curchr = -1;
783 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100784 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000785 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000786 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000787 peekchr();
788 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000789 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000790 curchr = toggle_Magic(curchr);
791 }
792 else if (vim_strchr(REGEXP_ABBR, c))
793 {
794 /*
795 * Handle abbreviations, like "\t" for TAB -- webb
796 */
797 curchr = backslash_trans(c);
798 }
799 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
800 curchr = toggle_Magic(c);
801 else
802 {
803 /*
804 * Next character can never be (made) magic?
805 * Then backslashing it won't do anything.
806 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 if (has_mbyte)
808 curchr = (*mb_ptr2char)(regparse + 1);
809 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000810 curchr = c;
811 }
812 break;
813 }
814
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815 default:
816 if (has_mbyte)
817 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000818 }
819 }
820
821 return curchr;
822}
823
824/*
825 * Eat one lexed character. Do this in a way that we can undo it.
826 */
827 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100828skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000829{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100830 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831 if (*regparse == '\\')
832 prevchr_len = 1;
833 else
834 prevchr_len = 0;
835 if (regparse[prevchr_len] != NUL)
836 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000837 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100838 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000839 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000840 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000841 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000842 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000843 ++prevchr_len;
844 }
845 regparse += prevchr_len;
846 prev_at_start = at_start;
847 at_start = FALSE;
848 prevprevchr = prevchr;
849 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100850 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851 nextchr = -1;
852}
853
854/*
855 * Skip a character while keeping the value of prev_at_start for at_start.
856 * prevchr and prevprevchr are also kept.
857 */
858 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100859skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860{
861 int as = prev_at_start;
862 int pr = prevchr;
863 int prpr = prevprevchr;
864
865 skipchr();
866 at_start = as;
867 prevchr = pr;
868 prevprevchr = prpr;
869}
870
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200871/*
872 * Get the next character from the pattern. We know about magic and such, so
873 * therefore we need a lexical analyzer.
874 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100876getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877{
878 int chr = peekchr();
879
880 skipchr();
881 return chr;
882}
883
884/*
885 * put character back. Works only once!
886 */
887 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100888ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889{
890 nextchr = curchr;
891 curchr = prevchr;
892 prevchr = prevprevchr;
893 at_start = prev_at_start;
894 prev_at_start = FALSE;
895
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100896 // Backup regparse, so that it's at the same position as before the
897 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000898 regparse -= prevchr_len;
899}
900
901/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000902 * Get and return the value of the hex string at the current position.
903 * Return -1 if there is no valid hex number.
904 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000905 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000906 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000907 * The parameter controls the maximum number of input characters. This will be
908 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
909 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100910 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100911gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000912{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100913 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000914 int c;
915 int i;
916
917 for (i = 0; i < maxinputlen; ++i)
918 {
919 c = regparse[0];
920 if (!vim_isxdigit(c))
921 break;
922 nr <<= 4;
923 nr |= hex2nr(c);
924 ++regparse;
925 }
926
927 if (i == 0)
928 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100929 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000930}
931
932/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200933 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000934 * current position. Return -1 for invalid. Consumes all digits.
935 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100936 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100937getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000938{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100939 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000940 int c;
941 int i;
942
943 for (i = 0; ; ++i)
944 {
945 c = regparse[0];
946 if (c < '0' || c > '9')
947 break;
948 nr *= 10;
949 nr += c - '0';
950 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100951 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000952 }
953
954 if (i == 0)
955 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100956 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000957}
958
959/*
960 * get and return the value of the octal string immediately after the current
961 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
962 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
963 * treat 8 or 9 as recognised characters. Position is updated:
964 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000965 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000966 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100967 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100968getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000969{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100970 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000971 int c;
972 int i;
973
974 for (i = 0; i < 3 && nr < 040; ++i)
975 {
976 c = regparse[0];
977 if (c < '0' || c > '7')
978 break;
979 nr <<= 3;
980 nr |= hex2nr(c);
981 ++regparse;
982 }
983
984 if (i == 0)
985 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100986 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000987}
988
989/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 * read_limits - Read two integers to be taken as a minimum and maximum.
991 * If the first character is '-', then the range is reversed.
992 * Should end with 'end'. If minval is missing, zero is default, if maxval is
993 * missing, a very big number is the default.
994 */
995 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100996read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997{
998 int reverse = FALSE;
999 char_u *first_char;
1000 long tmp;
1001
1002 if (*regparse == '-')
1003 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001004 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001005 regparse++;
1006 reverse = TRUE;
1007 }
1008 first_char = regparse;
1009 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001010 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001011 {
1012 if (vim_isdigit(*++regparse))
1013 *maxval = getdigits(&regparse);
1014 else
1015 *maxval = MAX_LIMIT;
1016 }
1017 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001018 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001019 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001020 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001021 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001022 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001023 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001024 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001025 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001026
1027 /*
1028 * Reverse the range if there was a '-', or make sure it is in the right
1029 * order otherwise.
1030 */
1031 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1032 {
1033 tmp = *minval;
1034 *minval = *maxval;
1035 *maxval = tmp;
1036 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001037 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001038 return OK;
1039}
1040
1041/*
1042 * vim_regexec and friends
1043 */
1044
1045/*
1046 * Global work variables for vim_regexec().
1047 */
1048
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001049static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001050#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001051static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001052#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001053static void reg_nextline(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001054static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055
1056/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1058 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001059 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001060 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001061static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001062static unsigned reg_tofreelen;
1063
1064/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001065 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001066 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001067 * done:
1068 * single-line multi-line
1069 * reg_match &regmatch_T NULL
1070 * reg_mmatch NULL &regmmatch_T
1071 * reg_startp reg_match->startp <invalid>
1072 * reg_endp reg_match->endp <invalid>
1073 * reg_startpos <invalid> reg_mmatch->startpos
1074 * reg_endpos <invalid> reg_mmatch->endpos
1075 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001076 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001077 * reg_firstlnum <invalid> first line in which to search
1078 * reg_maxline 0 last line nr
1079 * reg_line_lbr FALSE or TRUE FALSE
1080 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001081typedef struct {
1082 regmatch_T *reg_match;
1083 regmmatch_T *reg_mmatch;
1084 char_u **reg_startp;
1085 char_u **reg_endp;
1086 lpos_T *reg_startpos;
1087 lpos_T *reg_endpos;
1088 win_T *reg_win;
1089 buf_T *reg_buf;
1090 linenr_T reg_firstlnum;
1091 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001092 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001093
Bram Moolenaar0270f382018-07-17 05:43:58 +02001094 // The current match-position is stord in these variables:
1095 linenr_T lnum; // line number, relative to first line
1096 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001097 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001098
1099 int need_clear_subexpr; // subexpressions still need to be cleared
1100#ifdef FEAT_SYN_HL
1101 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1102 // cleared
1103#endif
1104
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001105 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1106 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1107 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001108 int reg_ic;
1109
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001110 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1111 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001112 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001113
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001114 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1115 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001116 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001117
1118 // State for the NFA engine regexec.
1119 int nfa_has_zend; // NFA regexp \ze operator encountered.
1120 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1121 int nfa_nsubexpr; // Number of sub expressions actually being used
1122 // during execution. 1 if only the whole match
1123 // (subexpr 0) is used.
1124 // listid is global, so that it increases on recursive calls to
1125 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1126 // all the states.
1127 int nfa_listid;
1128 int nfa_alt_listid;
1129
1130#ifdef FEAT_SYN_HL
1131 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1132#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001133} regexec_T;
1134
1135static regexec_T rex;
1136static int rex_in_use = FALSE;
1137
Bram Moolenaar071d4272004-06-13 20:20:40 +00001138/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001139 * Return TRUE if character 'c' is included in 'iskeyword' option for
1140 * "reg_buf" buffer.
1141 */
1142 static int
1143reg_iswordc(int c)
1144{
1145 return vim_iswordc_buf(c, rex.reg_buf);
1146}
1147
1148/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001149 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1150 */
1151 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001152reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001153{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001154 // when looking behind for a match/no-match lnum is negative. But we
1155 // can't go before line 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001156 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001157 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001158 if (lnum > rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001159 // Must have matched the "\n" in the last line.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001160 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001161 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001162}
1163
Bram Moolenaar071d4272004-06-13 20:20:40 +00001164#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001165static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1166static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1167static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1168static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001169#endif
1170
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001171// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001172#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001173
Bram Moolenaar071d4272004-06-13 20:20:40 +00001174#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001175/*
1176 * Create a new extmatch and mark it as referenced once.
1177 */
1178 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001179make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001180{
1181 reg_extmatch_T *em;
1182
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001183 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001184 if (em != NULL)
1185 em->refcnt = 1;
1186 return em;
1187}
1188
1189/*
1190 * Add a reference to an extmatch.
1191 */
1192 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001193ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001194{
1195 if (em != NULL)
1196 em->refcnt++;
1197 return em;
1198}
1199
1200/*
1201 * Remove a reference to an extmatch. If there are no references left, free
1202 * the info.
1203 */
1204 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001205unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206{
1207 int i;
1208
1209 if (em != NULL && --em->refcnt <= 0)
1210 {
1211 for (i = 0; i < NSUBEXP; ++i)
1212 vim_free(em->matches[i]);
1213 vim_free(em);
1214 }
1215}
1216#endif
1217
1218/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001219 * Get class of previous character.
1220 */
1221 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001222reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001223{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001224 if (rex.input > rex.line)
1225 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001226 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001227 return -1;
1228}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001229
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001230/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001231 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001232 */
1233 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001234reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001235{
1236 pos_T top, bot;
1237 linenr_T lnum;
1238 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001239 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001240 int mode;
1241 colnr_T start, end;
1242 colnr_T start2, end2;
1243 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001244 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001245
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001246 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001247 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001248 return FALSE;
1249
1250 if (VIsual_active)
1251 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001252 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001253 {
1254 top = VIsual;
1255 bot = wp->w_cursor;
1256 }
1257 else
1258 {
1259 top = wp->w_cursor;
1260 bot = VIsual;
1261 }
1262 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001263 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001264 }
1265 else
1266 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001267 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001268 {
1269 top = curbuf->b_visual.vi_start;
1270 bot = curbuf->b_visual.vi_end;
1271 }
1272 else
1273 {
1274 top = curbuf->b_visual.vi_end;
1275 bot = curbuf->b_visual.vi_start;
1276 }
1277 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001278 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001279 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001280 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001281 if (lnum < top.lnum || lnum > bot.lnum)
1282 return FALSE;
1283
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001284 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001285 if (mode == 'v')
1286 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001287 if ((lnum == top.lnum && col < top.col)
1288 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1289 return FALSE;
1290 }
1291 else if (mode == Ctrl_V)
1292 {
1293 getvvcol(wp, &top, &start, NULL, &end);
1294 getvvcol(wp, &bot, &start2, NULL, &end2);
1295 if (start2 < start)
1296 start = start2;
1297 if (end2 > end)
1298 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001299 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001300 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001301
1302 // getvvcol() flushes rex.line, need to get it again
1303 rex.line = reg_getline(rex.lnum);
1304 rex.input = rex.line + col;
1305
1306 cols = win_linetabsize(wp, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001307 if (cols < start || cols > end - (*p_sel == 'e'))
1308 return FALSE;
1309 }
1310 return TRUE;
1311}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001312
Bram Moolenaar071d4272004-06-13 20:20:40 +00001313/*
1314 * Check the regexp program for its magic number.
1315 * Return TRUE if it's wrong.
1316 */
1317 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001318prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001320 regprog_T *prog;
1321
Bram Moolenaar6100d022016-10-02 16:51:57 +02001322 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001323 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001324 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001325 return FALSE;
1326
1327 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001328 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001329 emsg(_(e_corrupted_regexp_program));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 return TRUE;
1331 }
1332 return FALSE;
1333}
1334
1335/*
1336 * Cleanup the subexpressions, if this wasn't done yet.
1337 * This construction is used to clear the subexpressions only when they are
1338 * used (to increase speed).
1339 */
1340 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001341cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001342{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001343 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001344 {
1345 if (REG_MULTI)
1346 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001347 // Use 0xff to set lnum to -1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001348 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1349 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001350 }
1351 else
1352 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001353 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1354 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001355 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001356 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001357 }
1358}
1359
1360#ifdef FEAT_SYN_HL
1361 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001362cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001363{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001364 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001365 {
1366 if (REG_MULTI)
1367 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001368 // Use 0xff to set lnum to -1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001369 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1370 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1371 }
1372 else
1373 {
1374 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1375 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1376 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001377 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001378 }
1379}
1380#endif
1381
1382/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001383 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001384 */
1385 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001386reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001388 rex.line = reg_getline(++rex.lnum);
1389 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390 fast_breakcheck();
1391}
1392
1393/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001394 * Check whether a backreference matches.
1395 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001396 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1397 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001398 */
1399 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001400match_with_backref(
1401 linenr_T start_lnum,
1402 colnr_T start_col,
1403 linenr_T end_lnum,
1404 colnr_T end_col,
1405 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001406{
1407 linenr_T clnum = start_lnum;
1408 colnr_T ccol = start_col;
1409 int len;
1410 char_u *p;
1411
1412 if (bytelen != NULL)
1413 *bytelen = 0;
1414 for (;;)
1415 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001416 // Since getting one line may invalidate the other, need to make copy.
1417 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001418 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001419 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001420 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001421 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1422 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001423 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001424 vim_free(reg_tofree);
1425 reg_tofree = alloc(len);
1426 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001427 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001428 reg_tofreelen = len;
1429 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001430 STRCPY(reg_tofree, rex.line);
1431 rex.input = reg_tofree + (rex.input - rex.line);
1432 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001433 }
1434
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001435 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001436 p = reg_getline(clnum);
1437 if (clnum == end_lnum)
1438 len = end_col - ccol;
1439 else
1440 len = (int)STRLEN(p + ccol);
1441
Bram Moolenaar0270f382018-07-17 05:43:58 +02001442 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001443 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001444 if (bytelen != NULL)
1445 *bytelen += len;
1446 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001447 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001448 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001449 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001450
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001451 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001452 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001453 if (bytelen != NULL)
1454 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001455 ++clnum;
1456 ccol = 0;
1457 if (got_int)
1458 return RA_FAIL;
1459 }
1460
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001461 // found a match! Note that rex.line may now point to a copy of the line,
1462 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001463 return RA_MATCH;
1464}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001465
Bram Moolenaarfb031402014-09-09 17:18:49 +02001466/*
1467 * Used in a place where no * or \+ can follow.
1468 */
1469 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001470re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001471{
1472 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001473 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001474 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001475 rc_did_emsg = TRUE;
1476 return FAIL;
1477 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001478 return OK;
1479}
1480
Bram Moolenaar071d4272004-06-13 20:20:40 +00001481typedef struct
1482{
1483 int a, b, c;
1484} decomp_T;
1485
1486
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001487// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001488static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001489{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001490 {0x5e2,0,0}, // 0xfb20 alt ayin
1491 {0x5d0,0,0}, // 0xfb21 alt alef
1492 {0x5d3,0,0}, // 0xfb22 alt dalet
1493 {0x5d4,0,0}, // 0xfb23 alt he
1494 {0x5db,0,0}, // 0xfb24 alt kaf
1495 {0x5dc,0,0}, // 0xfb25 alt lamed
1496 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1497 {0x5e8,0,0}, // 0xfb27 alt resh
1498 {0x5ea,0,0}, // 0xfb28 alt tav
1499 {'+', 0, 0}, // 0xfb29 alt plus
1500 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1501 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1502 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1503 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1504 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1505 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1506 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1507 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1508 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1509 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1510 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1511 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1512 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1513 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1514 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1515 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1516 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1517 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1518 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1519 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1520 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1521 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1522 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1523 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1524 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1525 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1526 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1527 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1528 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1529 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1530 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1531 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1532 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1533 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1534 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1535 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1536 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1537 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001538};
1539
1540 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001541mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001542{
1543 decomp_T d;
1544
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001545 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001546 {
1547 d = decomp_table[c - 0xfb20];
1548 *c1 = d.a;
1549 *c2 = d.b;
1550 *c3 = d.c;
1551 }
1552 else
1553 {
1554 *c1 = c;
1555 *c2 = *c3 = 0;
1556 }
1557}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001558
1559/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001560 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001561 * Return 0 if strings match, non-zero otherwise.
1562 * Correct the length "*n" when composing characters are ignored.
1563 */
1564 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001565cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001566{
1567 int result;
1568
Bram Moolenaar6100d022016-10-02 16:51:57 +02001569 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570 result = STRNCMP(s1, s2, *n);
1571 else
1572 result = MB_STRNICMP(s1, s2, *n);
1573
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001574 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001575 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001576 {
1577 char_u *str1, *str2;
1578 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001579 int junk;
1580
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001581 // we have to handle the strcmp ourselves, since it is necessary to
1582 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001583 str1 = s1;
1584 str2 = s2;
1585 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001586 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001587 {
1588 c1 = mb_ptr2char_adv(&str1);
1589 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001590
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001591 // Decompose the character if necessary, into 'base' characters.
1592 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001593 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001594 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001595 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001596 mb_decompose(c1, &c11, &junk, &junk);
1597 mb_decompose(c2, &c12, &junk, &junk);
1598 c1 = c11;
1599 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001600 if (c11 != c12
1601 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001602 break;
1603 }
1604 }
1605 result = c2 - c1;
1606 if (result == 0)
1607 *n = (int)(str2 - s2);
1608 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609
1610 return result;
1611}
1612
1613/*
1614 * cstrchr: This function is used a lot for simple searches, keep it fast!
1615 */
1616 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001617cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001618{
1619 char_u *p;
1620 int cc;
1621
Bram Moolenaara12a1612019-01-24 16:39:02 +01001622 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001623 return vim_strchr(s, c);
1624
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001625 // tolower() and toupper() can be slow, comparing twice should be a lot
1626 // faster (esp. when using MS Visual C++!).
1627 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001628 if (enc_utf8 && c > 0x80)
1629 cc = utf_fold(c);
1630 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001631 if (MB_ISUPPER(c))
1632 cc = MB_TOLOWER(c);
1633 else if (MB_ISLOWER(c))
1634 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001635 else
1636 return vim_strchr(s, c);
1637
Bram Moolenaar071d4272004-06-13 20:20:40 +00001638 if (has_mbyte)
1639 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001640 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001641 {
1642 if (enc_utf8 && c > 0x80)
1643 {
1644 if (utf_fold(utf_ptr2char(p)) == cc)
1645 return p;
1646 }
1647 else if (*p == c || *p == cc)
1648 return p;
1649 }
1650 }
1651 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001652 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001653 for (p = s; *p != NUL; ++p)
1654 if (*p == c || *p == cc)
1655 return p;
1656
1657 return NULL;
1658}
1659
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001660////////////////////////////////////////////////////////////////
1661// regsub stuff //
1662////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001663
Bram Moolenaar071d4272004-06-13 20:20:40 +00001664/*
1665 * We should define ftpr as a pointer to a function returning a pointer to
1666 * a function returning a pointer to a function ...
1667 * This is impossible, so we declare a pointer to a function returning a
Bram Moolenaar30d64132020-09-06 17:09:12 +02001668 * void pointer. This should work for all compilers.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001669 */
Bram Moolenaar30d64132020-09-06 17:09:12 +02001670typedef void (*(*fptr_T)(int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001671
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001672static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001673
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001674 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001675do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001676{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001677 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001678
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001679 return (fptr_T)NULL;
1680}
1681
1682 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001683do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001684{
1685 *d = MB_TOUPPER(c);
1686
1687 return (fptr_T)do_Upper;
1688}
1689
1690 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001691do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001692{
1693 *d = MB_TOLOWER(c);
1694
1695 return (fptr_T)NULL;
1696}
1697
1698 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001699do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001700{
1701 *d = MB_TOLOWER(c);
1702
1703 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001704}
1705
1706/*
1707 * regtilde(): Replace tildes in the pattern by the old pattern.
1708 *
1709 * Short explanation of the tilde: It stands for the previous replacement
1710 * pattern. If that previous pattern also contains a ~ we should go back a
1711 * step further... But we insert the previous pattern into the current one
1712 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001713 * This still does not handle the case where "magic" changes. So require the
1714 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001715 *
1716 * The tildes are parsed once before the first call to vim_regsub().
1717 */
1718 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001719regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001720{
1721 char_u *newsub = source;
1722 char_u *tmpsub;
1723 char_u *p;
1724 int len;
1725 int prevlen;
1726
1727 for (p = newsub; *p; ++p)
1728 {
1729 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1730 {
1731 if (reg_prev_sub != NULL)
1732 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001733 // length = len(newsub) - 1 + len(prev_sub) + 1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001734 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02001735 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001736 if (tmpsub != NULL)
1737 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001738 // copy prefix
1739 len = (int)(p - newsub); // not including ~
Bram Moolenaar071d4272004-06-13 20:20:40 +00001740 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001741 // interpret tilde
Bram Moolenaar071d4272004-06-13 20:20:40 +00001742 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001743 // copy postfix
Bram Moolenaar071d4272004-06-13 20:20:40 +00001744 if (!magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001745 ++p; // back off backslash
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 STRCPY(tmpsub + len + prevlen, p + 1);
1747
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001748 if (newsub != source) // already allocated newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001749 vim_free(newsub);
1750 newsub = tmpsub;
1751 p = newsub + len + prevlen;
1752 }
1753 }
1754 else if (magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001755 STRMOVE(p, p + 1); // remove '~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001756 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001757 STRMOVE(p, p + 2); // remove '\~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001758 --p;
1759 }
1760 else
1761 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001762 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001764 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001765 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001766 }
1767 }
1768
1769 vim_free(reg_prev_sub);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001770 if (newsub != source) // newsub was allocated, just keep it
Bram Moolenaar071d4272004-06-13 20:20:40 +00001771 reg_prev_sub = newsub;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001772 else // no ~ found, need to save newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001773 reg_prev_sub = vim_strsave(newsub);
1774 return newsub;
1775}
1776
1777#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001778static int can_f_submatch = FALSE; // TRUE when submatch() can be used
Bram Moolenaar071d4272004-06-13 20:20:40 +00001779
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001780// These pointers are used for reg_submatch(). Needed for when the
1781// substitution string is an expression that contains a call to substitute()
1782// and submatch().
Bram Moolenaar6100d022016-10-02 16:51:57 +02001783typedef struct {
1784 regmatch_T *sm_match;
1785 regmmatch_T *sm_mmatch;
1786 linenr_T sm_firstlnum;
1787 linenr_T sm_maxline;
1788 int sm_line_lbr;
1789} regsubmatch_T;
1790
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001791static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
Bram Moolenaar071d4272004-06-13 20:20:40 +00001792#endif
1793
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001794#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001795
1796/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001797 * Put the submatches in "argv[argskip]" which is a list passed into
1798 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001799 */
1800 static int
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001801fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, int argcount)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001802{
1803 listitem_T *li;
1804 int i;
1805 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001806 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001807
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001808 if (argcount == argskip)
1809 // called function doesn't take a submatches argument
1810 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001811
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001812 // Relies on sl_list to be the first item in staticList10_T.
1813 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001814
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001815 // There are always 10 list items in staticList10_T.
1816 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001817 for (i = 0; i < 10; ++i)
1818 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001819 s = rsm.sm_match->startp[i];
1820 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001821 s = NULL;
1822 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02001823 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001824 li->li_tv.v_type = VAR_STRING;
1825 li->li_tv.vval.v_string = s;
1826 li = li->li_next;
1827 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001828 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001829}
1830
1831 static void
1832clear_submatch_list(staticList10_T *sl)
1833{
1834 int i;
1835
1836 for (i = 0; i < 10; ++i)
1837 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1838}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001839#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001840
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841/*
1842 * vim_regsub() - perform substitutions after a vim_regexec() or
1843 * vim_regexec_multi() match.
1844 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001845 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
1846 * Oterwise nothing is copied, only compue the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001847 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001848 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
1849 *
1850 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
1851 * double them to keep them, and insert a backslash before a CR to avoid it
1852 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001853 *
1854 * Note: The matched text must not change between the call of
1855 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1856 * references invalid!
1857 *
1858 * Returns the size of the replacement, including terminating NUL.
1859 */
1860 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001861vim_regsub(
1862 regmatch_T *rmp,
1863 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001864 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001865 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001866 int destlen,
1867 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001868{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001869 int result;
1870 regexec_T rex_save;
1871 int rex_in_use_save = rex_in_use;
1872
1873 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001874 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001875 rex_save = rex;
1876 rex_in_use = TRUE;
1877
1878 rex.reg_match = rmp;
1879 rex.reg_mmatch = NULL;
1880 rex.reg_maxline = 0;
1881 rex.reg_buf = curbuf;
1882 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001883 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001884
1885 rex_in_use = rex_in_use_save;
1886 if (rex_in_use)
1887 rex = rex_save;
1888
1889 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001890}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001891
1892 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001893vim_regsub_multi(
1894 regmmatch_T *rmp,
1895 linenr_T lnum,
1896 char_u *source,
1897 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001898 int destlen,
1899 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001900{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001901 int result;
1902 regexec_T rex_save;
1903 int rex_in_use_save = rex_in_use;
1904
1905 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001906 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001907 rex_save = rex;
1908 rex_in_use = TRUE;
1909
1910 rex.reg_match = NULL;
1911 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001912 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02001913 rex.reg_firstlnum = lnum;
1914 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1915 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001916 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001917
1918 rex_in_use = rex_in_use_save;
1919 if (rex_in_use)
1920 rex = rex_save;
1921
1922 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923}
1924
1925 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001926vim_regsub_both(
1927 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001928 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001929 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001930 int destlen,
1931 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001932{
1933 char_u *src;
1934 char_u *dst;
1935 char_u *s;
1936 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001937 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001938 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01001939 fptr_T func_all = (fptr_T)NULL;
1940 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001941 linenr_T clnum = 0; // init for GCC
1942 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00001943#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001944 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001945#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001946 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001947
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001948 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001949 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001950 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001951 emsg(_(e_null_argument));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001952 return 0;
1953 }
1954 if (prog_magic_wrong())
1955 return 0;
1956 src = source;
1957 dst = dest;
1958
1959 /*
1960 * When the substitute part starts with "\=" evaluate it as an expression.
1961 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001962 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001963 {
1964#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001965 // To make sure that the length doesn't change between checking the
1966 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01001967 // resulting string is saved from the call with
1968 // "flags & REGSUB_COPY" == 0 to the call with
1969 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001970 if (copy)
1971 {
1972 if (eval_result != NULL)
1973 {
1974 STRCPY(dest, eval_result);
1975 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01001976 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001977 }
1978 }
1979 else
1980 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001981 int prev_can_f_submatch = can_f_submatch;
1982 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001983
Paul Ollis65745772022-06-05 16:55:54 +01001984 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001985
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001986 // The expression may contain substitute(), which calls us
1987 // recursively. Make sure submatch() gets the text from the first
1988 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001989 if (can_f_submatch)
1990 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001991 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001992 rsm.sm_match = rex.reg_match;
1993 rsm.sm_mmatch = rex.reg_mmatch;
1994 rsm.sm_firstlnum = rex.reg_firstlnum;
1995 rsm.sm_maxline = rex.reg_maxline;
1996 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001997
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001998 if (expr != NULL)
1999 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002000 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002001 char_u buf[NUMBUFLEN];
2002 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002003 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002004 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002005
2006 rettv.v_type = VAR_STRING;
2007 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002008 argv[0].v_type = VAR_LIST;
2009 argv[0].vval.v_list = &matchList.sl_list;
2010 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002011 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002012 funcexe.fe_argv_func = fill_submatch_list;
2013 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002014 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002015 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002016 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002017 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002018 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002019 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002020 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002021 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002022
Bram Moolenaar6100d022016-10-02 16:51:57 +02002023 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002024 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002025 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002026 }
LemonBoyf3b48952022-05-05 13:53:03 +01002027 else if (expr->v_type == VAR_INSTR)
2028 {
2029 exe_typval_instr(expr, &rettv);
2030 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002031 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002032 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002033 clear_submatch_list(&matchList);
2034
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002035 if (rettv.v_type == VAR_UNKNOWN)
2036 // something failed, no need to report another error
2037 eval_result = NULL;
2038 else
2039 {
2040 eval_result = tv_get_string_buf_chk(&rettv, buf);
2041 if (eval_result != NULL)
2042 eval_result = vim_strsave(eval_result);
2043 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002044 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002045 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002046 else if (substitute_instr != NULL)
2047 // Execute instructions from ISN_SUBSTITUTE.
2048 eval_result = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002049 else
Bram Moolenaarb171fb12020-06-24 20:34:03 +02002050 eval_result = eval_to_string(source + 2, TRUE);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002051
Bram Moolenaar071d4272004-06-13 20:20:40 +00002052 if (eval_result != NULL)
2053 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002054 int had_backslash = FALSE;
2055
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002056 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002057 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002058 // Change NL to CR, so that it becomes a line break,
2059 // unless called from vim_regexec_nl().
2060 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002061 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002062 *s = CAR;
2063 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002064 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002065 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002066 /* Change NL to CR here too, so that this works:
2067 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2068 * abc\
2069 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002070 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002071 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002072 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002073 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002074 had_backslash = TRUE;
2075 }
2076 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002077 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002078 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002079 // Backslashes will be consumed, need to double them.
Bram Moolenaar06975a42010-03-23 16:27:22 +01002080 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
2081 if (s != NULL)
2082 {
2083 vim_free(eval_result);
2084 eval_result = s;
2085 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002086 }
2087
2088 dst += STRLEN(eval_result);
2089 }
2090
Bram Moolenaar6100d022016-10-02 16:51:57 +02002091 can_f_submatch = prev_can_f_submatch;
2092 if (can_f_submatch)
2093 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002094 }
2095#endif
2096 }
2097 else
2098 while ((c = *src++) != NUL)
2099 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002100 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002101 no = 0;
2102 else if (c == '\\' && *src != NUL)
2103 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002104 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002105 {
2106 ++src;
2107 no = 0;
2108 }
2109 else if ('0' <= *src && *src <= '9')
2110 {
2111 no = *src++ - '0';
2112 }
2113 else if (vim_strchr((char_u *)"uUlLeE", *src))
2114 {
2115 switch (*src++)
2116 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002117 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002118 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002119 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002120 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002121 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002122 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002123 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002124 continue;
2125 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002126 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002127 continue;
2128 }
2129 }
2130 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002131 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002132 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002133 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2134 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002135 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002136 if (copy)
2137 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002138 if (dst + 3 > dest + destlen)
2139 {
2140 iemsg("vim_regsub_both(): not enough space");
2141 return 0;
2142 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002143 *dst++ = c;
2144 *dst++ = *src++;
2145 *dst++ = *src++;
2146 }
2147 else
2148 {
2149 dst += 3;
2150 src += 2;
2151 }
2152 continue;
2153 }
2154
Bram Moolenaar071d4272004-06-13 20:20:40 +00002155 if (c == '\\' && *src != NUL)
2156 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002157 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002158 switch (*src)
2159 {
2160 case 'r': c = CAR; ++src; break;
2161 case 'n': c = NL; ++src; break;
2162 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002163 // Oh no! \e already has meaning in subst pat :-(
2164 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002165 case 'b': c = Ctrl_H; ++src; break;
2166
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002167 // If "backslash" is TRUE the backslash will be removed
2168 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002169 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002170 {
2171 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002172 {
2173 if (dst + 1 > dest + destlen)
2174 {
2175 iemsg("vim_regsub_both(): not enough space");
2176 return 0;
2177 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002178 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002179 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002180 ++dst;
2181 }
2182 c = *src++;
2183 }
2184 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002185 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002186 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002187
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002188 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002189 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002190 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002191 func_one = (fptr_T)(func_one(&cc, c));
2192 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002193 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002194 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002195 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002196 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002197
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002198 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002199 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002200 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002201 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002202
Bram Moolenaar071d4272004-06-13 20:20:40 +00002203 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002204 {
2205 if (dst + charlen > dest + destlen)
2206 {
2207 iemsg("vim_regsub_both(): not enough space");
2208 return 0;
2209 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002210 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002211 }
2212 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002213 if (enc_utf8)
2214 {
2215 int clen = utf_ptr2len(src - 1);
2216
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002217 // If the character length is shorter than "totlen", there
2218 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002219 if (clen < totlen)
2220 {
2221 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002222 {
2223 if (dst + totlen - clen > dest + destlen)
2224 {
2225 iemsg("vim_regsub_both(): not enough space");
2226 return 0;
2227 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002228 mch_memmove(dst + 1, src - 1 + clen,
2229 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002230 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002231 dst += totlen - clen;
2232 }
2233 }
2234 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002235 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002236 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002237 {
2238 if (dst + 1 > dest + destlen)
2239 {
2240 iemsg("vim_regsub_both(): not enough space");
2241 return 0;
2242 }
2243 *dst = cc;
2244 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002245 dst++;
2246 }
2247 else
2248 {
2249 if (REG_MULTI)
2250 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002251 clnum = rex.reg_mmatch->startpos[no].lnum;
2252 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002253 s = NULL;
2254 else
2255 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002256 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2257 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2258 len = rex.reg_mmatch->endpos[no].col
2259 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002260 else
2261 len = (int)STRLEN(s);
2262 }
2263 }
2264 else
2265 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002266 s = rex.reg_match->startp[no];
2267 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002268 s = NULL;
2269 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002270 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002271 }
2272 if (s != NULL)
2273 {
2274 for (;;)
2275 {
2276 if (len == 0)
2277 {
2278 if (REG_MULTI)
2279 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002280 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002281 break;
2282 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002283 {
2284 if (dst + 1 > dest + destlen)
2285 {
2286 iemsg("vim_regsub_both(): not enough space");
2287 return 0;
2288 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002289 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002290 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002291 ++dst;
2292 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002293 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2294 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002295 else
2296 len = (int)STRLEN(s);
2297 }
2298 else
2299 break;
2300 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002301 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002302 {
2303 if (copy)
Bram Moolenaare29a27f2021-07-20 21:07:36 +02002304 iemsg(_(e_damaged_match_string));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002305 goto exit;
2306 }
2307 else
2308 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002309 if ((flags & REGSUB_BACKSLASH)
2310 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002311 {
2312 /*
2313 * Insert a backslash in front of a CR, otherwise
2314 * it will be replaced by a line break.
2315 * Number of backslashes will be halved later,
2316 * double them here.
2317 */
2318 if (copy)
2319 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002320 if (dst + 2 > dest + destlen)
2321 {
2322 iemsg("vim_regsub_both(): not enough space");
2323 return 0;
2324 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002325 dst[0] = '\\';
2326 dst[1] = *s;
2327 }
2328 dst += 2;
2329 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002330 else
2331 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002332 if (has_mbyte)
2333 c = mb_ptr2char(s);
2334 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002335 c = *s;
2336
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002337 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002338 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002339 func_one = (fptr_T)(func_one(&cc, c));
2340 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002341 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002342 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002343 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002344 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002345
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002346 if (has_mbyte)
2347 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002348 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002349 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002350
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002351 // Copy composing characters separately, one
2352 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002353 if (enc_utf8)
2354 l = utf_ptr2len(s) - 1;
2355 else
2356 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002357
2358 s += l;
2359 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002360 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002361 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002362 {
2363 if (dst + charlen > dest + destlen)
2364 {
2365 iemsg("vim_regsub_both(): not enough space");
2366 return 0;
2367 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002368 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002369 }
2370 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002371 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002372 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002373 {
2374 if (dst + 1 > dest + destlen)
2375 {
2376 iemsg("vim_regsub_both(): not enough space");
2377 return 0;
2378 }
2379 *dst = cc;
2380 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002381 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002382 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002383
Bram Moolenaar071d4272004-06-13 20:20:40 +00002384 ++s;
2385 --len;
2386 }
2387 }
2388 }
2389 no = -1;
2390 }
2391 }
2392 if (copy)
2393 *dst = NUL;
2394
2395exit:
2396 return (int)((dst - dest) + 1);
2397}
2398
2399#ifdef FEAT_EVAL
2400/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002401 * Call reg_getline() with the line numbers from the submatch. If a
2402 * substitute() was used the reg_maxline and other values have been
2403 * overwritten.
2404 */
2405 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002406reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002407{
2408 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002409 linenr_T save_first = rex.reg_firstlnum;
2410 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002411
Bram Moolenaar6100d022016-10-02 16:51:57 +02002412 rex.reg_firstlnum = rsm.sm_firstlnum;
2413 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002414
2415 s = reg_getline(lnum);
2416
Bram Moolenaar6100d022016-10-02 16:51:57 +02002417 rex.reg_firstlnum = save_first;
2418 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002419 return s;
2420}
2421
2422/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002423 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002424 * allocated memory.
2425 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2426 */
2427 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002428reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002429{
2430 char_u *retval = NULL;
2431 char_u *s;
2432 int len;
2433 int round;
2434 linenr_T lnum;
2435
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002436 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002437 return NULL;
2438
Bram Moolenaar6100d022016-10-02 16:51:57 +02002439 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002440 {
2441 /*
2442 * First round: compute the length and allocate memory.
2443 * Second round: copy the text.
2444 */
2445 for (round = 1; round <= 2; ++round)
2446 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002447 lnum = rsm.sm_mmatch->startpos[no].lnum;
2448 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002449 return NULL;
2450
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002451 s = reg_getline_submatch(lnum);
2452 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002453 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002454 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002455 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002456 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002457 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002458 len = rsm.sm_mmatch->endpos[no].col
2459 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002460 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002461 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002462 ++len;
2463 }
2464 else
2465 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002466 // Multiple lines: take start line from start col, middle
2467 // lines completely and end line up to end col.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002468 len = (int)STRLEN(s);
2469 if (round == 2)
2470 {
2471 STRCPY(retval, s);
2472 retval[len] = '\n';
2473 }
2474 ++len;
2475 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002476 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002477 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002478 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002479 if (round == 2)
2480 STRCPY(retval + len, s);
2481 len += (int)STRLEN(s);
2482 if (round == 2)
2483 retval[len] = '\n';
2484 ++len;
2485 }
2486 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002487 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002488 rsm.sm_mmatch->endpos[no].col);
2489 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002490 if (round == 2)
2491 retval[len] = NUL;
2492 ++len;
2493 }
2494
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002495 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002496 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002497 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002498 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002499 return NULL;
2500 }
2501 }
2502 }
2503 else
2504 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002505 s = rsm.sm_match->startp[no];
2506 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002507 retval = NULL;
2508 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002509 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002510 }
2511
2512 return retval;
2513}
Bram Moolenaar41571762014-04-02 19:00:58 +02002514
2515/*
2516 * Used for the submatch() function with the optional non-zero argument: get
2517 * the list of strings from the n'th submatch in allocated memory with NULs
2518 * represented in NLs.
2519 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2520 * command, for a non-existing submatch and for any error.
2521 */
2522 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002523reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002524{
2525 char_u *s;
2526 linenr_T slnum;
2527 linenr_T elnum;
2528 colnr_T scol;
2529 colnr_T ecol;
2530 int i;
2531 list_T *list;
2532 int error = FALSE;
2533
2534 if (!can_f_submatch || no < 0)
2535 return NULL;
2536
Bram Moolenaar6100d022016-10-02 16:51:57 +02002537 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002538 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002539 slnum = rsm.sm_mmatch->startpos[no].lnum;
2540 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002541 if (slnum < 0 || elnum < 0)
2542 return NULL;
2543
Bram Moolenaar6100d022016-10-02 16:51:57 +02002544 scol = rsm.sm_mmatch->startpos[no].col;
2545 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002546
2547 list = list_alloc();
2548 if (list == NULL)
2549 return NULL;
2550
2551 s = reg_getline_submatch(slnum) + scol;
2552 if (slnum == elnum)
2553 {
2554 if (list_append_string(list, s, ecol - scol) == FAIL)
2555 error = TRUE;
2556 }
2557 else
2558 {
2559 if (list_append_string(list, s, -1) == FAIL)
2560 error = TRUE;
2561 for (i = 1; i < elnum - slnum; i++)
2562 {
2563 s = reg_getline_submatch(slnum + i);
2564 if (list_append_string(list, s, -1) == FAIL)
2565 error = TRUE;
2566 }
2567 s = reg_getline_submatch(elnum);
2568 if (list_append_string(list, s, ecol) == FAIL)
2569 error = TRUE;
2570 }
2571 }
2572 else
2573 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002574 s = rsm.sm_match->startp[no];
2575 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002576 return NULL;
2577 list = list_alloc();
2578 if (list == NULL)
2579 return NULL;
2580 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002581 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002582 error = TRUE;
2583 }
2584
2585 if (error)
2586 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002587 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002588 return NULL;
2589 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002590 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002591 return list;
2592}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002593#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002594
Bram Moolenaarf4140482020-02-15 23:06:45 +01002595/*
2596 * Initialize the values used for matching against multiple lines
2597 */
2598 static void
2599init_regexec_multi(
2600 regmmatch_T *rmp,
2601 win_T *win, // window in which to search or NULL
2602 buf_T *buf, // buffer in which to search
2603 linenr_T lnum) // nr of line to start looking for match
2604{
2605 rex.reg_match = NULL;
2606 rex.reg_mmatch = rmp;
2607 rex.reg_buf = buf;
2608 rex.reg_win = win;
2609 rex.reg_firstlnum = lnum;
2610 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2611 rex.reg_line_lbr = FALSE;
2612 rex.reg_ic = rmp->rmm_ic;
2613 rex.reg_icombine = FALSE;
2614 rex.reg_maxcol = rmp->rmm_maxcol;
2615}
2616
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002617#include "regexp_bt.c"
2618
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002619static regengine_T bt_regengine =
2620{
2621 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002622 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002623 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002624 bt_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002625};
2626
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002627#include "regexp_nfa.c"
2628
2629static regengine_T nfa_regengine =
2630{
2631 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002632 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002633 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002634 nfa_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002635};
2636
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002637// Which regexp engine to use? Needed for vim_regcomp().
2638// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002639static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002640
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002641#ifdef DEBUG
2642static char_u regname[][30] = {
2643 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002644 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002645 "NFA Regexp Engine"
2646 };
2647#endif
2648
2649/*
2650 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002651 * Returns the program in allocated memory.
2652 * Use vim_regfree() to free the memory.
2653 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002654 */
2655 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002656vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002657{
2658 regprog_T *prog = NULL;
2659 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002660 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002661
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002662 regexp_engine = p_re;
2663
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002664 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002665 if (STRNCMP(expr, "\\%#=", 4) == 0)
2666 {
2667 int newengine = expr[4] - '0';
2668
2669 if (newengine == AUTOMATIC_ENGINE
2670 || newengine == BACKTRACKING_ENGINE
2671 || newengine == NFA_ENGINE)
2672 {
2673 regexp_engine = expr[4] - '0';
2674 expr += 5;
2675#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002676 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002677 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002678#endif
2679 }
2680 else
2681 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002682 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002683 regexp_engine = AUTOMATIC_ENGINE;
2684 }
2685 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002686#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002687 bt_regengine.expr = expr;
2688 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002689#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002690 // reg_iswordc() uses rex.reg_buf
2691 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002692
2693 /*
2694 * First try the NFA engine, unless backtracking was requested.
2695 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002696 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002697 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002698 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002699 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002700 else
2701 prog = bt_regengine.regcomp(expr, re_flags);
2702
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002703 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002704 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002705 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002706#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002707 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002708 {
2709 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002710 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002711 if (f)
2712 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002713 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002714 fclose(f);
2715 }
2716 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002717 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002718 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002719 }
2720#endif
2721 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002722 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002723 * The NFA engine also fails for patterns that it can't handle well
2724 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002725 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002726 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002727 if (regexp_engine == AUTOMATIC_ENGINE
2728 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002729 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002730 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002731#ifdef FEAT_EVAL
2732 report_re_switch(expr);
2733#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002734 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002735 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002736 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002737
Bram Moolenaarfda37292014-11-05 14:27:36 +01002738 if (prog != NULL)
2739 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002740 // Store the info needed to call regcomp() again when the engine turns
2741 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002742 prog->re_engine = regexp_engine;
2743 prog->re_flags = re_flags;
2744 }
2745
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002746 return prog;
2747}
2748
2749/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002750 * Free a compiled regexp program, returned by vim_regcomp().
2751 */
2752 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002753vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002754{
2755 if (prog != NULL)
2756 prog->engine->regfree(prog);
2757}
2758
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002759#if defined(EXITFREE) || defined(PROTO)
2760 void
2761free_regexp_stuff(void)
2762{
2763 ga_clear(&regstack);
2764 ga_clear(&backpos);
2765 vim_free(reg_tofree);
2766 vim_free(reg_prev_sub);
2767}
2768#endif
2769
Bram Moolenaarfda37292014-11-05 14:27:36 +01002770#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002771 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002772report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002773{
2774 if (p_verbose > 0)
2775 {
2776 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002777 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2778 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002779 verbose_leave();
2780 }
2781}
2782#endif
2783
Bram Moolenaar651fca82021-11-29 20:39:38 +00002784#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002785/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002786 * Return whether "prog" is currently being executed.
2787 */
2788 int
2789regprog_in_use(regprog_T *prog)
2790{
2791 return prog->re_in_use;
2792}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002793#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002794
2795/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002796 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002797 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002798 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002799 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002800 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002801 *
2802 * Return TRUE if there is a match, FALSE if not.
2803 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002804 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002805vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002806 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002807 char_u *line, // string to match against
2808 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01002809 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002810{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002811 int result;
2812 regexec_T rex_save;
2813 int rex_in_use_save = rex_in_use;
2814
Bram Moolenaar0270f382018-07-17 05:43:58 +02002815 // Cannot use the same prog recursively, it contains state.
2816 if (rmp->regprog->re_in_use)
2817 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002818 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002819 return FALSE;
2820 }
2821 rmp->regprog->re_in_use = TRUE;
2822
Bram Moolenaar6100d022016-10-02 16:51:57 +02002823 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002824 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002825 rex_save = rex;
2826 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002827
Bram Moolenaar6100d022016-10-02 16:51:57 +02002828 rex.reg_startp = NULL;
2829 rex.reg_endp = NULL;
2830 rex.reg_startpos = NULL;
2831 rex.reg_endpos = NULL;
2832
2833 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002834 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002835
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002836 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002837 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2838 && result == NFA_TOO_EXPENSIVE)
2839 {
2840 int save_p_re = p_re;
2841 int re_flags = rmp->regprog->re_flags;
2842 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2843
2844 p_re = BACKTRACKING_ENGINE;
2845 vim_regfree(rmp->regprog);
2846 if (pat != NULL)
2847 {
2848#ifdef FEAT_EVAL
2849 report_re_switch(pat);
2850#endif
2851 rmp->regprog = vim_regcomp(pat, re_flags);
2852 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002853 {
2854 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002855 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002856 rmp->regprog->re_in_use = FALSE;
2857 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002858 vim_free(pat);
2859 }
2860
2861 p_re = save_p_re;
2862 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002863
2864 rex_in_use = rex_in_use_save;
2865 if (rex_in_use)
2866 rex = rex_save;
2867
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002868 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002869}
2870
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002871/*
2872 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002873 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002874 */
2875 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002876vim_regexec_prog(
2877 regprog_T **prog,
2878 int ignore_case,
2879 char_u *line,
2880 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002881{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002882 int r;
2883 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002884
2885 regmatch.regprog = *prog;
2886 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002887 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002888 *prog = regmatch.regprog;
2889 return r;
2890}
2891
2892/*
2893 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002894 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002895 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002896 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002897vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002898{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002899 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002900}
2901
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002902/*
2903 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002904 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002905 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002906 */
2907 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002908vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002909{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002910 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002911}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002912
2913/*
2914 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002915 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2916 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002917 * Uses curbuf for line count and 'iskeyword'.
2918 *
2919 * Return zero if there is no match. Return number of lines contained in the
2920 * match otherwise.
2921 */
2922 long
Bram Moolenaar05540972016-01-30 20:31:25 +01002923vim_regexec_multi(
2924 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002925 win_T *win, // window in which to search or NULL
2926 buf_T *buf, // buffer in which to search
2927 linenr_T lnum, // nr of line to start looking for match
2928 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002929 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002930{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002931 int result;
2932 regexec_T rex_save;
2933 int rex_in_use_save = rex_in_use;
2934
Bram Moolenaar0270f382018-07-17 05:43:58 +02002935 // Cannot use the same prog recursively, it contains state.
2936 if (rmp->regprog->re_in_use)
2937 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002938 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002939 return FALSE;
2940 }
2941 rmp->regprog->re_in_use = TRUE;
2942
Bram Moolenaar6100d022016-10-02 16:51:57 +02002943 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002944 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002945 rex_save = rex;
2946 rex_in_use = TRUE;
2947
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002948 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01002949 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002950 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002951
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002952 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002953 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2954 && result == NFA_TOO_EXPENSIVE)
2955 {
2956 int save_p_re = p_re;
2957 int re_flags = rmp->regprog->re_flags;
2958 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2959
2960 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002961 if (pat != NULL)
2962 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002963 regprog_T *prev_prog = rmp->regprog;
2964
Bram Moolenaarfda37292014-11-05 14:27:36 +01002965#ifdef FEAT_EVAL
2966 report_re_switch(pat);
2967#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002968#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002969 // checking for \z misuse was already done when compiling for NFA,
2970 // allow all here
2971 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002972#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01002973 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002974#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002975 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002976#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002977 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002978 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002979 // Somehow compiling the pattern failed now, put back the
2980 // previous one to avoid "regprog" becoming NULL.
2981 rmp->regprog = prev_prog;
2982 }
2983 else
2984 {
2985 vim_regfree(prev_prog);
2986
Bram Moolenaar41499802018-07-18 06:02:09 +02002987 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002988 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01002989 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002990 rmp->regprog->re_in_use = FALSE;
2991 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002992 vim_free(pat);
2993 }
2994 p_re = save_p_re;
2995 }
2996
Bram Moolenaar6100d022016-10-02 16:51:57 +02002997 rex_in_use = rex_in_use_save;
2998 if (rex_in_use)
2999 rex = rex_save;
3000
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003001 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003002}