blob: bec046437f0ee7ad2fc8fb2321bd658066c9e3f2 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
Paul Ollis65745772022-06-05 16:55:54 +010023#ifdef FEAT_RELTIME
Bram Moolenaar155f2d12022-06-20 13:38:33 +010024static sig_atomic_t dummy_timeout_flag = 0;
25static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010026#endif
27
Bram Moolenaar071d4272004-06-13 20:20:40 +000028/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000029 * Magic characters have a special meaning, they don't match literally.
30 * Magic characters are negative. This separates them from literal characters
31 * (possibly multi-byte). Only ASCII characters can be Magic.
32 */
33#define Magic(x) ((int)(x) - 256)
34#define un_Magic(x) ((x) + 256)
35#define is_Magic(x) ((x) < 0)
36
Bram Moolenaar071d4272004-06-13 20:20:40 +000037 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010038no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000039{
40 if (is_Magic(x))
41 return un_Magic(x);
42 return x;
43}
44
45 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010046toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000047{
48 if (is_Magic(x))
49 return un_Magic(x);
50 return Magic(x);
51}
52
Paul Ollis65745772022-06-05 16:55:54 +010053#ifdef FEAT_RELTIME
54 void
55init_regexp_timeout(long msec)
56{
57 timeout_flag = start_timeout(msec);
58}
59
60 void
61disable_regexp_timeout(void)
62{
63 stop_timeout();
Bram Moolenaar1f30caf2022-06-19 14:36:35 +010064 timeout_flag = &dummy_timeout_flag;
Paul Ollis65745772022-06-05 16:55:54 +010065}
66#endif
67
Bram Moolenaar071d4272004-06-13 20:20:40 +000068/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020069 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +000070 * number; the start node begins in the second byte. It's used to catch the
71 * most severe mutilation of the program by the caller.
72 */
73
74#define REGMAGIC 0234
75
76/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000077 * Utility definitions.
78 */
79#define UCHARAT(p) ((int)*(char_u *)(p))
80
Bram Moolenaar63d9e732019-12-05 21:10:38 +010081// Used for an error (down from) vim_regcomp(): give the error message, set
82// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010083#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
84#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
85#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
86#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +010087#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010088#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +000089#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +000090
Bram Moolenaar95f09602016-11-10 20:01:45 +010091
Bram Moolenaar071d4272004-06-13 20:20:40 +000092#define MAX_LIMIT (32767L << 16L)
93
Bram Moolenaar071d4272004-06-13 20:20:40 +000094#define NOT_MULTI 0
95#define MULTI_ONE 1
96#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020097
98// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +010099#define RA_FAIL 1 // something failed, abort
100#define RA_CONT 2 // continue in inner loop
101#define RA_BREAK 3 // break inner loop
102#define RA_MATCH 4 // successful match
103#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200104
Bram Moolenaar071d4272004-06-13 20:20:40 +0000105/*
106 * Return NOT_MULTI if c is not a "multi" operator.
107 * Return MULTI_ONE if c is a single "multi" operator.
108 * Return MULTI_MULT if c is a multi "multi" operator.
109 */
110 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100111re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000112{
113 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
114 return MULTI_ONE;
115 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
116 return MULTI_MULT;
117 return NOT_MULTI;
118}
119
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000120static char_u *reg_prev_sub = NULL;
121
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122/*
123 * REGEXP_INRANGE contains all characters which are always special in a []
124 * range after '\'.
125 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
126 * These are:
127 * \n - New line (NL).
128 * \r - Carriage Return (CR).
129 * \t - Tab (TAB).
130 * \e - Escape (ESC).
131 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000132 * \d - Character code in decimal, eg \d123
133 * \o - Character code in octal, eg \o80
134 * \x - Character code in hex, eg \x4a
135 * \u - Multibyte character code, eg \u20ac
136 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000137 */
138static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000139static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140
Bram Moolenaar071d4272004-06-13 20:20:40 +0000141/*
142 * Translate '\x' to its control character, except "\n", which is Magic.
143 */
144 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100145backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000146{
147 switch (c)
148 {
149 case 'r': return CAR;
150 case 't': return TAB;
151 case 'e': return ESC;
152 case 'b': return BS;
153 }
154 return c;
155}
156
157/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000158 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000159 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
160 * recognized. Otherwise "pp" is advanced to after the item.
161 */
162 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100163get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000164{
165 static const char *(class_names[]) =
166 {
167 "alnum:]",
168#define CLASS_ALNUM 0
169 "alpha:]",
170#define CLASS_ALPHA 1
171 "blank:]",
172#define CLASS_BLANK 2
173 "cntrl:]",
174#define CLASS_CNTRL 3
175 "digit:]",
176#define CLASS_DIGIT 4
177 "graph:]",
178#define CLASS_GRAPH 5
179 "lower:]",
180#define CLASS_LOWER 6
181 "print:]",
182#define CLASS_PRINT 7
183 "punct:]",
184#define CLASS_PUNCT 8
185 "space:]",
186#define CLASS_SPACE 9
187 "upper:]",
188#define CLASS_UPPER 10
189 "xdigit:]",
190#define CLASS_XDIGIT 11
191 "tab:]",
192#define CLASS_TAB 12
193 "return:]",
194#define CLASS_RETURN 13
195 "backspace:]",
196#define CLASS_BACKSPACE 14
197 "escape:]",
198#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100199 "ident:]",
200#define CLASS_IDENT 16
201 "keyword:]",
202#define CLASS_KEYWORD 17
203 "fname:]",
204#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000205 };
206#define CLASS_NONE 99
207 int i;
208
209 if ((*pp)[1] == ':')
210 {
K.Takataeeec2542021-06-02 13:28:16 +0200211 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000212 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
213 {
214 *pp += STRLEN(class_names[i]) + 2;
215 return i;
216 }
217 }
218 return CLASS_NONE;
219}
220
221/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000222 * Specific version of character class functions.
223 * Using a table to keep this fast.
224 */
225static short class_tab[256];
226
227#define RI_DIGIT 0x01
228#define RI_HEX 0x02
229#define RI_OCTAL 0x04
230#define RI_WORD 0x08
231#define RI_HEAD 0x10
232#define RI_ALPHA 0x20
233#define RI_LOWER 0x40
234#define RI_UPPER 0x80
235#define RI_WHITE 0x100
236
237 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100238init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000239{
240 int i;
241 static int done = FALSE;
242
243 if (done)
244 return;
245
246 for (i = 0; i < 256; ++i)
247 {
248 if (i >= '0' && i <= '7')
249 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
250 else if (i >= '8' && i <= '9')
251 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
252 else if (i >= 'a' && i <= 'f')
253 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000254 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000255 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
256 else if (i >= 'A' && i <= 'F')
257 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000259 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
260 else if (i == '_')
261 class_tab[i] = RI_WORD + RI_HEAD;
262 else
263 class_tab[i] = 0;
264 }
265 class_tab[' '] |= RI_WHITE;
266 class_tab['\t'] |= RI_WHITE;
267 done = TRUE;
268}
269
kylo252ae6f1d82022-02-16 19:24:07 +0000270#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
271#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
272#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
273#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
274#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
275#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
276#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
277#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
278#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000279
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100280// flags for regflags
281#define RF_ICASE 1 // ignore case
282#define RF_NOICASE 2 // don't ignore case
283#define RF_HASNL 4 // can match a NL
284#define RF_ICOMBINE 8 // ignore combining characters
285#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000286
287/*
288 * Global work variables for vim_regcomp().
289 */
290
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100291static char_u *regparse; // Input-scan pointer.
292static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100293static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000294#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100295static int regnzpar; // \z() count.
296static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000297#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100298static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000299#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100300static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000301#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000302
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100303static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000304
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100305static int reg_string; // matching with a string instead of a buffer
306 // line
307static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000308
309/*
310 * META contains all characters that may be magic, except '^' and '$'.
311 */
312
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100313// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000314static char_u META_flags[] = {
315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100317// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000318 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100319// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000320 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100321// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000322 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100323// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000324 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100325// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000326 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100327// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
329};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000330
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100331static int curchr; // currently parsed character
332// Previous character. Note: prevchr is sometimes -1 when we are not at the
333// start, eg in /[ ^I]^ the pattern was never found even if it existed,
334// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200335static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100336static int prevprevchr; // previous-previous character
337static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000338
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100339// arguments for reg()
340#define REG_NOPAREN 0 // toplevel reg()
341#define REG_PAREN 1 // \(\)
342#define REG_ZPAREN 2 // \z(\)
343#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200345typedef struct
346{
347 char_u *regparse;
348 int prevchr_len;
349 int curchr;
350 int prevchr;
351 int prevprevchr;
352 int nextchr;
353 int at_start;
354 int prev_at_start;
355 int regnpar;
356} parse_state_T;
357
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100359static int getchr(void);
360static void skipchr_keepstart(void);
361static int peekchr(void);
362static void skipchr(void);
363static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100364static long gethexchrs(int maxinputlen);
365static long getoctchrs(void);
366static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100367static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100368static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200369static int cstrncmp(char_u *s1, char_u *s2, int *n);
370static char_u *cstrchr(char_u *, int);
371static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100372static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100373#ifdef FEAT_EVAL
374static void report_re_switch(char_u *pat);
375#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000376
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200377static regengine_T bt_regengine;
378static regengine_T nfa_regengine;
379
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380/*
381 * Return TRUE if compiled regular expression "prog" can match a line break.
382 */
383 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100384re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000385{
386 return (prog->regflags & RF_HASNL);
387}
388
389/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000390 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
391 * Returns a character representing the class. Zero means that no item was
392 * recognized. Otherwise "pp" is advanced to after the item.
393 */
394 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100395get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000396{
397 int c;
398 int l = 1;
399 char_u *p = *pp;
400
Bram Moolenaar985079c2019-02-16 17:07:47 +0100401 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000402 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000403 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000404 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000405 if (p[l + 2] == '=' && p[l + 3] == ']')
406 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000407 if (has_mbyte)
408 c = mb_ptr2char(p + 2);
409 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000410 c = p[2];
411 *pp += l + 4;
412 return c;
413 }
414 }
415 return 0;
416}
417
418/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000419 * Check for a collating element "[.a.]". "pp" points to the '['.
420 * Returns a character. Zero means that no item was recognized. Otherwise
421 * "pp" is advanced to after the item.
422 * Currently only single characters are recognized!
423 */
424 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100425get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000426{
427 int c;
428 int l = 1;
429 char_u *p = *pp;
430
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100431 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000432 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000433 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000434 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000435 if (p[l + 2] == '.' && p[l + 3] == ']')
436 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000437 if (has_mbyte)
438 c = mb_ptr2char(p + 2);
439 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000440 c = p[2];
441 *pp += l + 4;
442 return c;
443 }
444 }
445 return 0;
446}
447
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100448static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
449static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200450
451 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100452get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200453{
454 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
455 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
456}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000457
458/*
459 * Skip over a "[]" range.
460 * "p" must point to the character after the '['.
461 * The returned pointer is on the matching ']', or the terminating NUL.
462 */
463 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100464skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000465{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000466 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000467
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100468 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469 ++p;
470 if (*p == ']' || *p == '-')
471 ++p;
472 while (*p != NUL && *p != ']')
473 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000474 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000475 p += l;
476 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000477 if (*p == '-')
478 {
479 ++p;
480 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100481 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000482 }
483 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200484 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000485 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200486 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000487 p += 2;
488 else if (*p == '[')
489 {
490 if (get_char_class(&p) == CLASS_NONE
491 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200492 && get_coll_element(&p) == 0
493 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100494 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000495 }
496 else
497 ++p;
498 }
499
500 return p;
501}
502
503/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200505 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506 * Take care of characters with a backslash in front of it.
507 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508 */
509 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100510skip_regexp(
511 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200512 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200513 int magic)
514{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100515 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200516}
517
518/*
519 * Call skip_regexp() and when the delimiter does not match give an error and
520 * return NULL.
521 */
522 char_u *
523skip_regexp_err(
524 char_u *startp,
525 int delim,
526 int magic)
527{
528 char_u *p = skip_regexp(startp, delim, magic);
529
530 if (*p != delim)
531 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000532 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200533 return NULL;
534 }
535 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200536}
537
538/*
539 * skip_regexp() with extra arguments:
540 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
541 * expression and change "\?" to "?". If "*newp" is not NULL the expression
542 * is changed in-place.
543 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100544 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200545 */
546 char_u *
547skip_regexp_ex(
548 char_u *startp,
549 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100550 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200551 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100552 int *dropped,
553 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000554{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100555 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000556 char_u *p = startp;
557
558 if (magic)
559 mymagic = MAGIC_ON;
560 else
561 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200562 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000563
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100564 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000565 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100566 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000567 break;
568 if ((p[0] == '[' && mymagic >= MAGIC_ON)
569 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
570 {
571 p = skip_anyof(p + 1);
572 if (p[0] == NUL)
573 break;
574 }
575 else if (p[0] == '\\' && p[1] != NUL)
576 {
577 if (dirc == '?' && newp != NULL && p[1] == '?')
578 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100579 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000580 if (*newp == NULL)
581 {
582 *newp = vim_strsave(startp);
583 if (*newp != NULL)
584 p = *newp + (p - startp);
585 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200586 if (dropped != NULL)
587 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000588 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000589 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000590 else
591 ++p;
592 }
593 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100594 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000595 if (*p == 'v')
596 mymagic = MAGIC_ALL;
597 else if (*p == 'V')
598 mymagic = MAGIC_NONE;
599 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000600 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100601 if (magic_val != NULL)
602 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000603 return p;
604}
605
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200606/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200607 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200608 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100609static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200610static int at_start; // True when on the first character
611static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100612
Bram Moolenaar071d4272004-06-13 20:20:40 +0000613/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200614 * Start parsing at "str".
615 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000616 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100617initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000618{
619 regparse = str;
620 prevchr_len = 0;
621 curchr = prevprevchr = prevchr = nextchr = -1;
622 at_start = TRUE;
623 prev_at_start = FALSE;
624}
625
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200626/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200627 * Save the current parse state, so that it can be restored and parsing
628 * starts in the same state again.
629 */
630 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100631save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200632{
633 ps->regparse = regparse;
634 ps->prevchr_len = prevchr_len;
635 ps->curchr = curchr;
636 ps->prevchr = prevchr;
637 ps->prevprevchr = prevprevchr;
638 ps->nextchr = nextchr;
639 ps->at_start = at_start;
640 ps->prev_at_start = prev_at_start;
641 ps->regnpar = regnpar;
642}
643
644/*
645 * Restore a previously saved parse state.
646 */
647 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100648restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200649{
650 regparse = ps->regparse;
651 prevchr_len = ps->prevchr_len;
652 curchr = ps->curchr;
653 prevchr = ps->prevchr;
654 prevprevchr = ps->prevprevchr;
655 nextchr = ps->nextchr;
656 at_start = ps->at_start;
657 prev_at_start = ps->prev_at_start;
658 regnpar = ps->regnpar;
659}
660
661
662/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200663 * Get the next character without advancing.
664 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100666peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000668 static int after_slash = FALSE;
669
Bram Moolenaar071d4272004-06-13 20:20:40 +0000670 if (curchr == -1)
671 {
672 switch (curchr = regparse[0])
673 {
674 case '.':
675 case '[':
676 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100677 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000678 if (reg_magic >= MAGIC_ON)
679 curchr = Magic(curchr);
680 break;
681 case '(':
682 case ')':
683 case '{':
684 case '%':
685 case '+':
686 case '=':
687 case '?':
688 case '@':
689 case '!':
690 case '&':
691 case '|':
692 case '<':
693 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100694 case '#': // future ext.
695 case '"': // future ext.
696 case '\'': // future ext.
697 case ',': // future ext.
698 case '-': // future ext.
699 case ':': // future ext.
700 case ';': // future ext.
701 case '`': // future ext.
702 case '/': // Can't be used in / command
703 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000704 if (reg_magic == MAGIC_ALL)
705 curchr = Magic(curchr);
706 break;
707 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100708 // * is not magic as the very first character, eg "?*ptr", when
709 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
710 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000711 if (reg_magic >= MAGIC_ON
712 && !at_start
713 && !(prev_at_start && prevchr == Magic('^'))
714 && (after_slash
715 || (prevchr != Magic('(')
716 && prevchr != Magic('&')
717 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000718 curchr = Magic('*');
719 break;
720 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100721 // '^' is only magic as the very first character and if it's after
722 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000723 if (reg_magic >= MAGIC_OFF
724 && (at_start
725 || reg_magic == MAGIC_ALL
726 || prevchr == Magic('(')
727 || prevchr == Magic('|')
728 || prevchr == Magic('&')
729 || prevchr == Magic('n')
730 || (no_Magic(prevchr) == '('
731 && prevprevchr == Magic('%'))))
732 {
733 curchr = Magic('^');
734 at_start = TRUE;
735 prev_at_start = FALSE;
736 }
737 break;
738 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100739 // '$' is only magic as the very last char and if it's in front of
740 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000741 if (reg_magic >= MAGIC_OFF)
742 {
743 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200744 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000745
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100746 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000747 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200748 || p[1] == 'm' || p[1] == 'M'
749 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
750 {
751 if (p[1] == 'v')
752 is_magic_all = TRUE;
753 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
754 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000755 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200756 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000757 if (p[0] == NUL
758 || (p[0] == '\\'
759 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
760 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200761 || (is_magic_all
762 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000763 || reg_magic == MAGIC_ALL)
764 curchr = Magic('$');
765 }
766 break;
767 case '\\':
768 {
769 int c = regparse[1];
770
771 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100772 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000773 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000774 {
775 /*
776 * META contains everything that may be magic sometimes,
777 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200778 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000779 * magicness. Therefore, \ is so meta-magic that it is
780 * not in META.
781 */
782 curchr = -1;
783 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100784 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000785 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000786 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000787 peekchr();
788 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000789 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000790 curchr = toggle_Magic(curchr);
791 }
792 else if (vim_strchr(REGEXP_ABBR, c))
793 {
794 /*
795 * Handle abbreviations, like "\t" for TAB -- webb
796 */
797 curchr = backslash_trans(c);
798 }
799 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
800 curchr = toggle_Magic(c);
801 else
802 {
803 /*
804 * Next character can never be (made) magic?
805 * Then backslashing it won't do anything.
806 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000807 if (has_mbyte)
808 curchr = (*mb_ptr2char)(regparse + 1);
809 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000810 curchr = c;
811 }
812 break;
813 }
814
Bram Moolenaar071d4272004-06-13 20:20:40 +0000815 default:
816 if (has_mbyte)
817 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000818 }
819 }
820
821 return curchr;
822}
823
824/*
825 * Eat one lexed character. Do this in a way that we can undo it.
826 */
827 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100828skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000829{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100830 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831 if (*regparse == '\\')
832 prevchr_len = 1;
833 else
834 prevchr_len = 0;
835 if (regparse[prevchr_len] != NUL)
836 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000837 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100838 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000839 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000840 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000841 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000842 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000843 ++prevchr_len;
844 }
845 regparse += prevchr_len;
846 prev_at_start = at_start;
847 at_start = FALSE;
848 prevprevchr = prevchr;
849 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100850 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000851 nextchr = -1;
852}
853
854/*
855 * Skip a character while keeping the value of prev_at_start for at_start.
856 * prevchr and prevprevchr are also kept.
857 */
858 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100859skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000860{
861 int as = prev_at_start;
862 int pr = prevchr;
863 int prpr = prevprevchr;
864
865 skipchr();
866 at_start = as;
867 prevchr = pr;
868 prevprevchr = prpr;
869}
870
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200871/*
872 * Get the next character from the pattern. We know about magic and such, so
873 * therefore we need a lexical analyzer.
874 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100876getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000877{
878 int chr = peekchr();
879
880 skipchr();
881 return chr;
882}
883
884/*
885 * put character back. Works only once!
886 */
887 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100888ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000889{
890 nextchr = curchr;
891 curchr = prevchr;
892 prevchr = prevprevchr;
893 at_start = prev_at_start;
894 prev_at_start = FALSE;
895
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100896 // Backup regparse, so that it's at the same position as before the
897 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000898 regparse -= prevchr_len;
899}
900
901/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000902 * Get and return the value of the hex string at the current position.
903 * Return -1 if there is no valid hex number.
904 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000905 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000906 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000907 * The parameter controls the maximum number of input characters. This will be
908 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
909 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100910 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100911gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000912{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100913 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000914 int c;
915 int i;
916
917 for (i = 0; i < maxinputlen; ++i)
918 {
919 c = regparse[0];
920 if (!vim_isxdigit(c))
921 break;
922 nr <<= 4;
923 nr |= hex2nr(c);
924 ++regparse;
925 }
926
927 if (i == 0)
928 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100929 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000930}
931
932/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200933 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000934 * current position. Return -1 for invalid. Consumes all digits.
935 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100936 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100937getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000938{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100939 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000940 int c;
941 int i;
942
943 for (i = 0; ; ++i)
944 {
945 c = regparse[0];
946 if (c < '0' || c > '9')
947 break;
948 nr *= 10;
949 nr += c - '0';
950 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100951 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000952 }
953
954 if (i == 0)
955 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100956 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000957}
958
959/*
960 * get and return the value of the octal string immediately after the current
961 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
962 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
963 * treat 8 or 9 as recognised characters. Position is updated:
964 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000965 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000966 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100967 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100968getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000969{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100970 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000971 int c;
972 int i;
973
974 for (i = 0; i < 3 && nr < 040; ++i)
975 {
976 c = regparse[0];
977 if (c < '0' || c > '7')
978 break;
979 nr <<= 3;
980 nr |= hex2nr(c);
981 ++regparse;
982 }
983
984 if (i == 0)
985 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100986 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000987}
988
989/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000990 * read_limits - Read two integers to be taken as a minimum and maximum.
991 * If the first character is '-', then the range is reversed.
992 * Should end with 'end'. If minval is missing, zero is default, if maxval is
993 * missing, a very big number is the default.
994 */
995 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100996read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000997{
998 int reverse = FALSE;
999 char_u *first_char;
1000 long tmp;
1001
1002 if (*regparse == '-')
1003 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001004 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +00001005 regparse++;
1006 reverse = TRUE;
1007 }
1008 first_char = regparse;
1009 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001010 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +00001011 {
1012 if (vim_isdigit(*++regparse))
1013 *maxval = getdigits(&regparse);
1014 else
1015 *maxval = MAX_LIMIT;
1016 }
1017 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001018 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001019 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001020 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001021 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001022 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001023 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001024 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001025 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001026
1027 /*
1028 * Reverse the range if there was a '-', or make sure it is in the right
1029 * order otherwise.
1030 */
1031 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1032 {
1033 tmp = *minval;
1034 *minval = *maxval;
1035 *maxval = tmp;
1036 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001037 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001038 return OK;
1039}
1040
1041/*
1042 * vim_regexec and friends
1043 */
1044
1045/*
1046 * Global work variables for vim_regexec().
1047 */
1048
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001049static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001050#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001051static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001052#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001053static void reg_nextline(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001054static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055
1056/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1058 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001059 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001060 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001061static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001062static unsigned reg_tofreelen;
1063
1064/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001065 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001066 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001067 * done:
1068 * single-line multi-line
1069 * reg_match &regmatch_T NULL
1070 * reg_mmatch NULL &regmmatch_T
1071 * reg_startp reg_match->startp <invalid>
1072 * reg_endp reg_match->endp <invalid>
1073 * reg_startpos <invalid> reg_mmatch->startpos
1074 * reg_endpos <invalid> reg_mmatch->endpos
1075 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001076 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001077 * reg_firstlnum <invalid> first line in which to search
1078 * reg_maxline 0 last line nr
1079 * reg_line_lbr FALSE or TRUE FALSE
1080 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001081typedef struct {
1082 regmatch_T *reg_match;
1083 regmmatch_T *reg_mmatch;
1084 char_u **reg_startp;
1085 char_u **reg_endp;
1086 lpos_T *reg_startpos;
1087 lpos_T *reg_endpos;
1088 win_T *reg_win;
1089 buf_T *reg_buf;
1090 linenr_T reg_firstlnum;
1091 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001092 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001093
Bram Moolenaar0270f382018-07-17 05:43:58 +02001094 // The current match-position is stord in these variables:
1095 linenr_T lnum; // line number, relative to first line
1096 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001097 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001098
1099 int need_clear_subexpr; // subexpressions still need to be cleared
1100#ifdef FEAT_SYN_HL
1101 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1102 // cleared
1103#endif
1104
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001105 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1106 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1107 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001108 int reg_ic;
1109
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001110 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1111 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001112 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001113
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001114 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1115 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001116 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001117
1118 // State for the NFA engine regexec.
1119 int nfa_has_zend; // NFA regexp \ze operator encountered.
1120 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1121 int nfa_nsubexpr; // Number of sub expressions actually being used
1122 // during execution. 1 if only the whole match
1123 // (subexpr 0) is used.
1124 // listid is global, so that it increases on recursive calls to
1125 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1126 // all the states.
1127 int nfa_listid;
1128 int nfa_alt_listid;
1129
1130#ifdef FEAT_SYN_HL
1131 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1132#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001133} regexec_T;
1134
1135static regexec_T rex;
1136static int rex_in_use = FALSE;
1137
Bram Moolenaar071d4272004-06-13 20:20:40 +00001138/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001139 * Return TRUE if character 'c' is included in 'iskeyword' option for
1140 * "reg_buf" buffer.
1141 */
1142 static int
1143reg_iswordc(int c)
1144{
1145 return vim_iswordc_buf(c, rex.reg_buf);
1146}
1147
1148/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001149 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1150 */
1151 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001152reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001153{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001154 // when looking behind for a match/no-match lnum is negative. But we
1155 // can't go before line 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001156 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001157 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001158 if (lnum > rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001159 // Must have matched the "\n" in the last line.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001160 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001161 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001162}
1163
Bram Moolenaar071d4272004-06-13 20:20:40 +00001164#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001165static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1166static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1167static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1168static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001169#endif
1170
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001171// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001172#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001173
Bram Moolenaar071d4272004-06-13 20:20:40 +00001174#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001175/*
1176 * Create a new extmatch and mark it as referenced once.
1177 */
1178 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001179make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001180{
1181 reg_extmatch_T *em;
1182
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001183 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001184 if (em != NULL)
1185 em->refcnt = 1;
1186 return em;
1187}
1188
1189/*
1190 * Add a reference to an extmatch.
1191 */
1192 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001193ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001194{
1195 if (em != NULL)
1196 em->refcnt++;
1197 return em;
1198}
1199
1200/*
1201 * Remove a reference to an extmatch. If there are no references left, free
1202 * the info.
1203 */
1204 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001205unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001206{
1207 int i;
1208
1209 if (em != NULL && --em->refcnt <= 0)
1210 {
1211 for (i = 0; i < NSUBEXP; ++i)
1212 vim_free(em->matches[i]);
1213 vim_free(em);
1214 }
1215}
1216#endif
1217
1218/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001219 * Get class of previous character.
1220 */
1221 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001222reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001223{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001224 if (rex.input > rex.line)
1225 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001226 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001227 return -1;
1228}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001229
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001230/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001231 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001232 */
1233 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001234reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001235{
1236 pos_T top, bot;
1237 linenr_T lnum;
1238 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001239 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001240 int mode;
1241 colnr_T start, end;
1242 colnr_T start2, end2;
1243 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001244 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001245
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001246 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001247 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001248 return FALSE;
1249
1250 if (VIsual_active)
1251 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001252 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001253 {
1254 top = VIsual;
1255 bot = wp->w_cursor;
1256 }
1257 else
1258 {
1259 top = wp->w_cursor;
1260 bot = VIsual;
1261 }
1262 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001263 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001264 }
1265 else
1266 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001267 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001268 {
1269 top = curbuf->b_visual.vi_start;
1270 bot = curbuf->b_visual.vi_end;
1271 }
1272 else
1273 {
1274 top = curbuf->b_visual.vi_end;
1275 bot = curbuf->b_visual.vi_start;
1276 }
1277 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001278 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001279 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001280 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001281 if (lnum < top.lnum || lnum > bot.lnum)
1282 return FALSE;
1283
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001284 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001285 if (mode == 'v')
1286 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001287 if ((lnum == top.lnum && col < top.col)
1288 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1289 return FALSE;
1290 }
1291 else if (mode == Ctrl_V)
1292 {
1293 getvvcol(wp, &top, &start, NULL, &end);
1294 getvvcol(wp, &bot, &start2, NULL, &end2);
1295 if (start2 < start)
1296 start = start2;
1297 if (end2 > end)
1298 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001299 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001300 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001301
1302 // getvvcol() flushes rex.line, need to get it again
1303 rex.line = reg_getline(rex.lnum);
1304 rex.input = rex.line + col;
1305
Bram Moolenaar7f9969c2022-07-25 18:13:54 +01001306 cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001307 if (cols < start || cols > end - (*p_sel == 'e'))
1308 return FALSE;
1309 }
1310 return TRUE;
1311}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001312
Bram Moolenaar071d4272004-06-13 20:20:40 +00001313/*
1314 * Check the regexp program for its magic number.
1315 * Return TRUE if it's wrong.
1316 */
1317 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001318prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001320 regprog_T *prog;
1321
Bram Moolenaar6100d022016-10-02 16:51:57 +02001322 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001323 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001324 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001325 return FALSE;
1326
1327 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001328 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001329 emsg(_(e_corrupted_regexp_program));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 return TRUE;
1331 }
1332 return FALSE;
1333}
1334
1335/*
1336 * Cleanup the subexpressions, if this wasn't done yet.
1337 * This construction is used to clear the subexpressions only when they are
1338 * used (to increase speed).
1339 */
1340 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001341cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001342{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001343 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001344 {
1345 if (REG_MULTI)
1346 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001347 // Use 0xff to set lnum to -1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001348 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1349 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001350 }
1351 else
1352 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001353 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1354 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001355 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001356 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001357 }
1358}
1359
1360#ifdef FEAT_SYN_HL
1361 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001362cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001363{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001364 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001365 {
1366 if (REG_MULTI)
1367 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001368 // Use 0xff to set lnum to -1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001369 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1370 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1371 }
1372 else
1373 {
1374 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1375 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1376 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001377 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001378 }
1379}
1380#endif
1381
1382/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001383 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001384 */
1385 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001386reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001387{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001388 rex.line = reg_getline(++rex.lnum);
1389 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001390 fast_breakcheck();
1391}
1392
1393/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001394 * Check whether a backreference matches.
1395 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001396 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1397 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001398 */
1399 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001400match_with_backref(
1401 linenr_T start_lnum,
1402 colnr_T start_col,
1403 linenr_T end_lnum,
1404 colnr_T end_col,
1405 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001406{
1407 linenr_T clnum = start_lnum;
1408 colnr_T ccol = start_col;
1409 int len;
1410 char_u *p;
1411
1412 if (bytelen != NULL)
1413 *bytelen = 0;
1414 for (;;)
1415 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001416 // Since getting one line may invalidate the other, need to make copy.
1417 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001418 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001419 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001420 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001421 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1422 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001423 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001424 vim_free(reg_tofree);
1425 reg_tofree = alloc(len);
1426 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001427 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001428 reg_tofreelen = len;
1429 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001430 STRCPY(reg_tofree, rex.line);
1431 rex.input = reg_tofree + (rex.input - rex.line);
1432 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001433 }
1434
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001435 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001436 p = reg_getline(clnum);
1437 if (clnum == end_lnum)
1438 len = end_col - ccol;
1439 else
1440 len = (int)STRLEN(p + ccol);
1441
Bram Moolenaar0270f382018-07-17 05:43:58 +02001442 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001443 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001444 if (bytelen != NULL)
1445 *bytelen += len;
1446 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001447 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001448 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001449 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001450
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001451 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001452 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001453 if (bytelen != NULL)
1454 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001455 ++clnum;
1456 ccol = 0;
1457 if (got_int)
1458 return RA_FAIL;
1459 }
1460
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001461 // found a match! Note that rex.line may now point to a copy of the line,
1462 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001463 return RA_MATCH;
1464}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001465
Bram Moolenaarfb031402014-09-09 17:18:49 +02001466/*
1467 * Used in a place where no * or \+ can follow.
1468 */
1469 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001470re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001471{
1472 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001473 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001474 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001475 rc_did_emsg = TRUE;
1476 return FAIL;
1477 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001478 return OK;
1479}
1480
Bram Moolenaar071d4272004-06-13 20:20:40 +00001481typedef struct
1482{
1483 int a, b, c;
1484} decomp_T;
1485
1486
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001487// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001488static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001489{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001490 {0x5e2,0,0}, // 0xfb20 alt ayin
1491 {0x5d0,0,0}, // 0xfb21 alt alef
1492 {0x5d3,0,0}, // 0xfb22 alt dalet
1493 {0x5d4,0,0}, // 0xfb23 alt he
1494 {0x5db,0,0}, // 0xfb24 alt kaf
1495 {0x5dc,0,0}, // 0xfb25 alt lamed
1496 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1497 {0x5e8,0,0}, // 0xfb27 alt resh
1498 {0x5ea,0,0}, // 0xfb28 alt tav
1499 {'+', 0, 0}, // 0xfb29 alt plus
1500 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1501 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1502 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1503 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1504 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1505 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1506 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1507 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1508 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1509 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1510 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1511 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1512 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1513 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1514 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1515 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1516 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1517 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1518 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1519 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1520 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1521 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1522 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1523 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1524 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1525 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1526 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1527 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1528 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1529 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1530 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1531 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1532 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1533 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1534 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1535 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1536 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1537 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001538};
1539
1540 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001541mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001542{
1543 decomp_T d;
1544
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001545 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001546 {
1547 d = decomp_table[c - 0xfb20];
1548 *c1 = d.a;
1549 *c2 = d.b;
1550 *c3 = d.c;
1551 }
1552 else
1553 {
1554 *c1 = c;
1555 *c2 = *c3 = 0;
1556 }
1557}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001558
1559/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001560 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001561 * Return 0 if strings match, non-zero otherwise.
1562 * Correct the length "*n" when composing characters are ignored.
1563 */
1564 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001565cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001566{
1567 int result;
1568
Bram Moolenaar6100d022016-10-02 16:51:57 +02001569 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570 result = STRNCMP(s1, s2, *n);
1571 else
1572 result = MB_STRNICMP(s1, s2, *n);
1573
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001574 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001575 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001576 {
1577 char_u *str1, *str2;
1578 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001579 int junk;
1580
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001581 // we have to handle the strcmp ourselves, since it is necessary to
1582 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001583 str1 = s1;
1584 str2 = s2;
1585 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001586 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001587 {
1588 c1 = mb_ptr2char_adv(&str1);
1589 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001590
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001591 // Decompose the character if necessary, into 'base' characters.
1592 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001593 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001594 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001595 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001596 mb_decompose(c1, &c11, &junk, &junk);
1597 mb_decompose(c2, &c12, &junk, &junk);
1598 c1 = c11;
1599 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001600 if (c11 != c12
1601 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001602 break;
1603 }
1604 }
1605 result = c2 - c1;
1606 if (result == 0)
1607 *n = (int)(str2 - s2);
1608 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609
1610 return result;
1611}
1612
1613/*
1614 * cstrchr: This function is used a lot for simple searches, keep it fast!
1615 */
1616 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001617cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001618{
1619 char_u *p;
1620 int cc;
1621
Bram Moolenaara12a1612019-01-24 16:39:02 +01001622 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001623 return vim_strchr(s, c);
1624
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001625 // tolower() and toupper() can be slow, comparing twice should be a lot
1626 // faster (esp. when using MS Visual C++!).
1627 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001628 if (enc_utf8 && c > 0x80)
1629 cc = utf_fold(c);
1630 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001631 if (MB_ISUPPER(c))
1632 cc = MB_TOLOWER(c);
1633 else if (MB_ISLOWER(c))
1634 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001635 else
1636 return vim_strchr(s, c);
1637
Bram Moolenaar071d4272004-06-13 20:20:40 +00001638 if (has_mbyte)
1639 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001640 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001641 {
1642 if (enc_utf8 && c > 0x80)
1643 {
Bram Moolenaarf5094052022-07-29 16:22:25 +01001644 int uc = utf_ptr2char(p);
1645
1646 // Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
1647 // not 0xff.
1648 if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001649 return p;
1650 }
1651 else if (*p == c || *p == cc)
1652 return p;
1653 }
1654 }
1655 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001656 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001657 for (p = s; *p != NUL; ++p)
1658 if (*p == c || *p == cc)
1659 return p;
1660
1661 return NULL;
1662}
1663
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001664////////////////////////////////////////////////////////////////
1665// regsub stuff //
1666////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001667
Bram Moolenaar071d4272004-06-13 20:20:40 +00001668/*
1669 * We should define ftpr as a pointer to a function returning a pointer to
1670 * a function returning a pointer to a function ...
1671 * This is impossible, so we declare a pointer to a function returning a
Bram Moolenaar30d64132020-09-06 17:09:12 +02001672 * void pointer. This should work for all compilers.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001673 */
Bram Moolenaar30d64132020-09-06 17:09:12 +02001674typedef void (*(*fptr_T)(int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001675
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001676static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001677
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001678 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001679do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001680{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001681 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001682
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001683 return (fptr_T)NULL;
1684}
1685
1686 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001687do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001688{
1689 *d = MB_TOUPPER(c);
1690
1691 return (fptr_T)do_Upper;
1692}
1693
1694 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001695do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001696{
1697 *d = MB_TOLOWER(c);
1698
1699 return (fptr_T)NULL;
1700}
1701
1702 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001703do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001704{
1705 *d = MB_TOLOWER(c);
1706
1707 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001708}
1709
1710/*
1711 * regtilde(): Replace tildes in the pattern by the old pattern.
1712 *
1713 * Short explanation of the tilde: It stands for the previous replacement
1714 * pattern. If that previous pattern also contains a ~ we should go back a
1715 * step further... But we insert the previous pattern into the current one
1716 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001717 * This still does not handle the case where "magic" changes. So require the
1718 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001719 *
1720 * The tildes are parsed once before the first call to vim_regsub().
1721 */
1722 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001723regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001724{
1725 char_u *newsub = source;
1726 char_u *tmpsub;
1727 char_u *p;
1728 int len;
1729 int prevlen;
1730
1731 for (p = newsub; *p; ++p)
1732 {
1733 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1734 {
1735 if (reg_prev_sub != NULL)
1736 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001737 // length = len(newsub) - 1 + len(prev_sub) + 1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001738 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02001739 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001740 if (tmpsub != NULL)
1741 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001742 // copy prefix
1743 len = (int)(p - newsub); // not including ~
Bram Moolenaar071d4272004-06-13 20:20:40 +00001744 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001745 // interpret tilde
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001747 // copy postfix
Bram Moolenaar071d4272004-06-13 20:20:40 +00001748 if (!magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001749 ++p; // back off backslash
Bram Moolenaar071d4272004-06-13 20:20:40 +00001750 STRCPY(tmpsub + len + prevlen, p + 1);
1751
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001752 if (newsub != source) // already allocated newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001753 vim_free(newsub);
1754 newsub = tmpsub;
1755 p = newsub + len + prevlen;
1756 }
1757 }
1758 else if (magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001759 STRMOVE(p, p + 1); // remove '~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001760 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001761 STRMOVE(p, p + 2); // remove '\~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001762 --p;
1763 }
1764 else
1765 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001766 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001767 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001768 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001769 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001770 }
1771 }
1772
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001773 // Store a copy of newsub in reg_prev_sub. It is always allocated,
1774 // because recursive calls may make the returned string invalid.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001775 vim_free(reg_prev_sub);
Bram Moolenaar32acf1f2022-07-07 22:20:31 +01001776 reg_prev_sub = vim_strsave(newsub);
1777
Bram Moolenaar071d4272004-06-13 20:20:40 +00001778 return newsub;
1779}
1780
1781#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001782static int can_f_submatch = FALSE; // TRUE when submatch() can be used
Bram Moolenaar071d4272004-06-13 20:20:40 +00001783
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001784// These pointers are used for reg_submatch(). Needed for when the
1785// substitution string is an expression that contains a call to substitute()
1786// and submatch().
Bram Moolenaar6100d022016-10-02 16:51:57 +02001787typedef struct {
1788 regmatch_T *sm_match;
1789 regmmatch_T *sm_mmatch;
1790 linenr_T sm_firstlnum;
1791 linenr_T sm_maxline;
1792 int sm_line_lbr;
1793} regsubmatch_T;
1794
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001795static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
Bram Moolenaar071d4272004-06-13 20:20:40 +00001796#endif
1797
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001798#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001799
1800/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001801 * Put the submatches in "argv[argskip]" which is a list passed into
1802 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001803 */
1804 static int
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001805fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, int argcount)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001806{
1807 listitem_T *li;
1808 int i;
1809 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001810 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001811
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001812 if (argcount == argskip)
1813 // called function doesn't take a submatches argument
1814 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001815
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001816 // Relies on sl_list to be the first item in staticList10_T.
1817 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001818
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001819 // There are always 10 list items in staticList10_T.
1820 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001821 for (i = 0; i < 10; ++i)
1822 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001823 s = rsm.sm_match->startp[i];
1824 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001825 s = NULL;
1826 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02001827 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001828 li->li_tv.v_type = VAR_STRING;
1829 li->li_tv.vval.v_string = s;
1830 li = li->li_next;
1831 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001832 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001833}
1834
1835 static void
1836clear_submatch_list(staticList10_T *sl)
1837{
1838 int i;
1839
1840 for (i = 0; i < 10; ++i)
1841 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1842}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001843#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001844
Bram Moolenaar071d4272004-06-13 20:20:40 +00001845/*
1846 * vim_regsub() - perform substitutions after a vim_regexec() or
1847 * vim_regexec_multi() match.
1848 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001849 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
1850 * Oterwise nothing is copied, only compue the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001851 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001852 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
1853 *
1854 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
1855 * double them to keep them, and insert a backslash before a CR to avoid it
1856 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001857 *
1858 * Note: The matched text must not change between the call of
1859 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1860 * references invalid!
1861 *
1862 * Returns the size of the replacement, including terminating NUL.
1863 */
1864 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001865vim_regsub(
1866 regmatch_T *rmp,
1867 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001868 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001869 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001870 int destlen,
1871 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001872{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001873 int result;
1874 regexec_T rex_save;
1875 int rex_in_use_save = rex_in_use;
1876
1877 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001878 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001879 rex_save = rex;
1880 rex_in_use = TRUE;
1881
1882 rex.reg_match = rmp;
1883 rex.reg_mmatch = NULL;
1884 rex.reg_maxline = 0;
1885 rex.reg_buf = curbuf;
1886 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001887 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001888
1889 rex_in_use = rex_in_use_save;
1890 if (rex_in_use)
1891 rex = rex_save;
1892
1893 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001894}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001895
1896 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001897vim_regsub_multi(
1898 regmmatch_T *rmp,
1899 linenr_T lnum,
1900 char_u *source,
1901 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001902 int destlen,
1903 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001904{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001905 int result;
1906 regexec_T rex_save;
1907 int rex_in_use_save = rex_in_use;
1908
1909 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001910 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001911 rex_save = rex;
1912 rex_in_use = TRUE;
1913
1914 rex.reg_match = NULL;
1915 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001916 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02001917 rex.reg_firstlnum = lnum;
1918 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1919 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001920 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001921
1922 rex_in_use = rex_in_use_save;
1923 if (rex_in_use)
1924 rex = rex_save;
1925
1926 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001927}
1928
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001929#if defined(FEAT_EVAL) || defined(PROTO)
1930// When nesting more than a couple levels it's probably a mistake.
1931# define MAX_REGSUB_NESTING 4
1932static char_u *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
1933
1934# if defined(EXITFREE) || defined(PROTO)
1935 void
1936free_resub_eval_result(void)
1937{
1938 int i;
1939
1940 for (i = 0; i < MAX_REGSUB_NESTING; ++i)
1941 VIM_CLEAR(eval_result[i]);
1942}
1943# endif
1944#endif
1945
Bram Moolenaar071d4272004-06-13 20:20:40 +00001946 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001947vim_regsub_both(
1948 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001949 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001950 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001951 int destlen,
1952 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001953{
1954 char_u *src;
1955 char_u *dst;
1956 char_u *s;
1957 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001958 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001959 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01001960 fptr_T func_all = (fptr_T)NULL;
1961 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001962 linenr_T clnum = 0; // init for GCC
1963 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00001964#ifdef FEAT_EVAL
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001965 static int nesting = 0;
1966 int nested;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001967#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001968 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001969
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001970 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001971 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001972 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001973 emsg(_(e_null_argument));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001974 return 0;
1975 }
1976 if (prog_magic_wrong())
1977 return 0;
Bram Moolenaar44ddf192022-06-21 22:15:25 +01001978#ifdef FEAT_EVAL
1979 if (nesting == MAX_REGSUB_NESTING)
1980 {
1981 emsg(_(e_substitute_nesting_too_deep));
1982 return 0;
1983 }
1984 nested = nesting;
1985#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001986 src = source;
1987 dst = dest;
1988
1989 /*
1990 * When the substitute part starts with "\=" evaluate it as an expression.
1991 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001992 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001993 {
1994#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001995 // To make sure that the length doesn't change between checking the
1996 // length and copying the string, and to speed up things, the
Paul Ollis65745772022-06-05 16:55:54 +01001997 // resulting string is saved from the call with
1998 // "flags & REGSUB_COPY" == 0 to the call with
1999 // "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002000 if (copy)
2001 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002002 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002003 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002004 STRCPY(dest, eval_result[nested]);
2005 dst += STRLEN(eval_result[nested]);
2006 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002007 }
2008 }
2009 else
2010 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002011 int prev_can_f_submatch = can_f_submatch;
2012 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002013
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002014 VIM_CLEAR(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002015
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002016 // The expression may contain substitute(), which calls us
2017 // recursively. Make sure submatch() gets the text from the first
2018 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002019 if (can_f_submatch)
2020 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002021 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002022 rsm.sm_match = rex.reg_match;
2023 rsm.sm_mmatch = rex.reg_mmatch;
2024 rsm.sm_firstlnum = rex.reg_firstlnum;
2025 rsm.sm_maxline = rex.reg_maxline;
2026 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002027
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002028 // Although unlikely, it is possible that the expression invokes a
2029 // substitute command (it might fail, but still). Therefore keep
Bram Moolenaarabd56da2022-06-23 20:46:27 +01002030 // an array of eval results.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002031 ++nesting;
2032
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002033 if (expr != NULL)
2034 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002035 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002036 char_u buf[NUMBUFLEN];
2037 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002038 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002039 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002040
2041 rettv.v_type = VAR_STRING;
2042 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002043 argv[0].v_type = VAR_LIST;
2044 argv[0].vval.v_list = &matchList.sl_list;
2045 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02002046 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002047 funcexe.fe_argv_func = fill_submatch_list;
2048 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002049 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002050 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002051 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002052 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002053 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002054 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002055 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002056 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002057
Bram Moolenaar6100d022016-10-02 16:51:57 +02002058 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002059 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002060 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002061 }
LemonBoyf3b48952022-05-05 13:53:03 +01002062 else if (expr->v_type == VAR_INSTR)
2063 {
2064 exe_typval_instr(expr, &rettv);
2065 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002066 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002067 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002068 clear_submatch_list(&matchList);
2069
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002070 if (rettv.v_type == VAR_UNKNOWN)
2071 // something failed, no need to report another error
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002072 eval_result[nested] = NULL;
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002073 else
2074 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002075 eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
2076 if (eval_result[nested] != NULL)
2077 eval_result[nested] = vim_strsave(eval_result[nested]);
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002078 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002079 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002080 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002081 else if (substitute_instr != NULL)
2082 // Execute instructions from ISN_SUBSTITUTE.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002083 eval_result[nested] = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002084 else
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002085 eval_result[nested] = eval_to_string(source + 2, TRUE);
2086 --nesting;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002087
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002088 if (eval_result[nested] != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002089 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002090 int had_backslash = FALSE;
2091
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002092 for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002093 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002094 // Change NL to CR, so that it becomes a line break,
2095 // unless called from vim_regexec_nl().
2096 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002097 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002098 *s = CAR;
2099 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002100 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002101 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002102 /* Change NL to CR here too, so that this works:
2103 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2104 * abc\
2105 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002106 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002107 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002108 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002109 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002110 had_backslash = TRUE;
2111 }
2112 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002113 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002114 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002115 // Backslashes will be consumed, need to double them.
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002116 s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
Bram Moolenaar06975a42010-03-23 16:27:22 +01002117 if (s != NULL)
2118 {
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002119 vim_free(eval_result[nested]);
2120 eval_result[nested] = s;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002121 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002122 }
2123
Bram Moolenaar44ddf192022-06-21 22:15:25 +01002124 dst += STRLEN(eval_result[nested]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002125 }
2126
Bram Moolenaar6100d022016-10-02 16:51:57 +02002127 can_f_submatch = prev_can_f_submatch;
2128 if (can_f_submatch)
2129 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002130 }
2131#endif
2132 }
2133 else
2134 while ((c = *src++) != NUL)
2135 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002136 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002137 no = 0;
2138 else if (c == '\\' && *src != NUL)
2139 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002140 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002141 {
2142 ++src;
2143 no = 0;
2144 }
2145 else if ('0' <= *src && *src <= '9')
2146 {
2147 no = *src++ - '0';
2148 }
2149 else if (vim_strchr((char_u *)"uUlLeE", *src))
2150 {
2151 switch (*src++)
2152 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002153 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002154 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002155 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002156 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002157 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002158 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002159 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002160 continue;
2161 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002162 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002163 continue;
2164 }
2165 }
2166 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002167 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002168 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002169 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2170 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002171 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002172 if (copy)
2173 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002174 if (dst + 3 > dest + destlen)
2175 {
2176 iemsg("vim_regsub_both(): not enough space");
2177 return 0;
2178 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002179 *dst++ = c;
2180 *dst++ = *src++;
2181 *dst++ = *src++;
2182 }
2183 else
2184 {
2185 dst += 3;
2186 src += 2;
2187 }
2188 continue;
2189 }
2190
Bram Moolenaar071d4272004-06-13 20:20:40 +00002191 if (c == '\\' && *src != NUL)
2192 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002193 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002194 switch (*src)
2195 {
2196 case 'r': c = CAR; ++src; break;
2197 case 'n': c = NL; ++src; break;
2198 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002199 // Oh no! \e already has meaning in subst pat :-(
2200 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002201 case 'b': c = Ctrl_H; ++src; break;
2202
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002203 // If "backslash" is TRUE the backslash will be removed
2204 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002205 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002206 {
2207 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002208 {
2209 if (dst + 1 > dest + destlen)
2210 {
2211 iemsg("vim_regsub_both(): not enough space");
2212 return 0;
2213 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002214 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002215 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002216 ++dst;
2217 }
2218 c = *src++;
2219 }
2220 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002221 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002222 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002223
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002224 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002225 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002226 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002227 func_one = (fptr_T)(func_one(&cc, c));
2228 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002229 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002230 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002231 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002232 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002233
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002234 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002235 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002236 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002237 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002238
Bram Moolenaar071d4272004-06-13 20:20:40 +00002239 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002240 {
2241 if (dst + charlen > dest + destlen)
2242 {
2243 iemsg("vim_regsub_both(): not enough space");
2244 return 0;
2245 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002246 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002247 }
2248 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002249 if (enc_utf8)
2250 {
2251 int clen = utf_ptr2len(src - 1);
2252
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002253 // If the character length is shorter than "totlen", there
2254 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002255 if (clen < totlen)
2256 {
2257 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002258 {
2259 if (dst + totlen - clen > dest + destlen)
2260 {
2261 iemsg("vim_regsub_both(): not enough space");
2262 return 0;
2263 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002264 mch_memmove(dst + 1, src - 1 + clen,
2265 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002266 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002267 dst += totlen - clen;
2268 }
2269 }
2270 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002271 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002272 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002273 {
2274 if (dst + 1 > dest + destlen)
2275 {
2276 iemsg("vim_regsub_both(): not enough space");
2277 return 0;
2278 }
2279 *dst = cc;
2280 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002281 dst++;
2282 }
2283 else
2284 {
2285 if (REG_MULTI)
2286 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002287 clnum = rex.reg_mmatch->startpos[no].lnum;
2288 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002289 s = NULL;
2290 else
2291 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002292 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2293 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2294 len = rex.reg_mmatch->endpos[no].col
2295 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002296 else
2297 len = (int)STRLEN(s);
2298 }
2299 }
2300 else
2301 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002302 s = rex.reg_match->startp[no];
2303 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002304 s = NULL;
2305 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002306 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002307 }
2308 if (s != NULL)
2309 {
2310 for (;;)
2311 {
2312 if (len == 0)
2313 {
2314 if (REG_MULTI)
2315 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002316 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002317 break;
2318 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002319 {
2320 if (dst + 1 > dest + destlen)
2321 {
2322 iemsg("vim_regsub_both(): not enough space");
2323 return 0;
2324 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002325 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002326 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002327 ++dst;
2328 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002329 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2330 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002331 else
2332 len = (int)STRLEN(s);
2333 }
2334 else
2335 break;
2336 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002337 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002338 {
2339 if (copy)
Bram Moolenaare29a27f2021-07-20 21:07:36 +02002340 iemsg(_(e_damaged_match_string));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002341 goto exit;
2342 }
2343 else
2344 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002345 if ((flags & REGSUB_BACKSLASH)
2346 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002347 {
2348 /*
2349 * Insert a backslash in front of a CR, otherwise
2350 * it will be replaced by a line break.
2351 * Number of backslashes will be halved later,
2352 * double them here.
2353 */
2354 if (copy)
2355 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002356 if (dst + 2 > dest + destlen)
2357 {
2358 iemsg("vim_regsub_both(): not enough space");
2359 return 0;
2360 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002361 dst[0] = '\\';
2362 dst[1] = *s;
2363 }
2364 dst += 2;
2365 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002366 else
2367 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002368 if (has_mbyte)
2369 c = mb_ptr2char(s);
2370 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002371 c = *s;
2372
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002373 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002374 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002375 func_one = (fptr_T)(func_one(&cc, c));
2376 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002377 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002378 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002379 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002380 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002381
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002382 if (has_mbyte)
2383 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002384 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002385 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002386
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002387 // Copy composing characters separately, one
2388 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002389 if (enc_utf8)
2390 l = utf_ptr2len(s) - 1;
2391 else
2392 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002393
2394 s += l;
2395 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002396 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002397 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002398 {
2399 if (dst + charlen > dest + destlen)
2400 {
2401 iemsg("vim_regsub_both(): not enough space");
2402 return 0;
2403 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002404 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002405 }
2406 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002407 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002408 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002409 {
2410 if (dst + 1 > dest + destlen)
2411 {
2412 iemsg("vim_regsub_both(): not enough space");
2413 return 0;
2414 }
2415 *dst = cc;
2416 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002417 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002418 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002419
Bram Moolenaar071d4272004-06-13 20:20:40 +00002420 ++s;
2421 --len;
2422 }
2423 }
2424 }
2425 no = -1;
2426 }
2427 }
2428 if (copy)
2429 *dst = NUL;
2430
2431exit:
2432 return (int)((dst - dest) + 1);
2433}
2434
2435#ifdef FEAT_EVAL
2436/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002437 * Call reg_getline() with the line numbers from the submatch. If a
2438 * substitute() was used the reg_maxline and other values have been
2439 * overwritten.
2440 */
2441 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002442reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002443{
2444 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002445 linenr_T save_first = rex.reg_firstlnum;
2446 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002447
Bram Moolenaar6100d022016-10-02 16:51:57 +02002448 rex.reg_firstlnum = rsm.sm_firstlnum;
2449 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002450
2451 s = reg_getline(lnum);
2452
Bram Moolenaar6100d022016-10-02 16:51:57 +02002453 rex.reg_firstlnum = save_first;
2454 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002455 return s;
2456}
2457
2458/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002459 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002460 * allocated memory.
2461 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2462 */
2463 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002464reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002465{
2466 char_u *retval = NULL;
2467 char_u *s;
2468 int len;
2469 int round;
2470 linenr_T lnum;
2471
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002472 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002473 return NULL;
2474
Bram Moolenaar6100d022016-10-02 16:51:57 +02002475 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002476 {
2477 /*
2478 * First round: compute the length and allocate memory.
2479 * Second round: copy the text.
2480 */
2481 for (round = 1; round <= 2; ++round)
2482 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002483 lnum = rsm.sm_mmatch->startpos[no].lnum;
2484 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002485 return NULL;
2486
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002487 s = reg_getline_submatch(lnum);
2488 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002489 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002490 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002491 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002492 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002493 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002494 len = rsm.sm_mmatch->endpos[no].col
2495 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002496 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002497 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002498 ++len;
2499 }
2500 else
2501 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002502 // Multiple lines: take start line from start col, middle
2503 // lines completely and end line up to end col.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002504 len = (int)STRLEN(s);
2505 if (round == 2)
2506 {
2507 STRCPY(retval, s);
2508 retval[len] = '\n';
2509 }
2510 ++len;
2511 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002512 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002513 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002514 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002515 if (round == 2)
2516 STRCPY(retval + len, s);
2517 len += (int)STRLEN(s);
2518 if (round == 2)
2519 retval[len] = '\n';
2520 ++len;
2521 }
2522 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002523 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002524 rsm.sm_mmatch->endpos[no].col);
2525 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002526 if (round == 2)
2527 retval[len] = NUL;
2528 ++len;
2529 }
2530
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002531 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002532 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002533 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002534 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002535 return NULL;
2536 }
2537 }
2538 }
2539 else
2540 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002541 s = rsm.sm_match->startp[no];
2542 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002543 retval = NULL;
2544 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002545 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002546 }
2547
2548 return retval;
2549}
Bram Moolenaar41571762014-04-02 19:00:58 +02002550
2551/*
2552 * Used for the submatch() function with the optional non-zero argument: get
2553 * the list of strings from the n'th submatch in allocated memory with NULs
2554 * represented in NLs.
2555 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2556 * command, for a non-existing submatch and for any error.
2557 */
2558 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002559reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002560{
2561 char_u *s;
2562 linenr_T slnum;
2563 linenr_T elnum;
2564 colnr_T scol;
2565 colnr_T ecol;
2566 int i;
2567 list_T *list;
2568 int error = FALSE;
2569
2570 if (!can_f_submatch || no < 0)
2571 return NULL;
2572
Bram Moolenaar6100d022016-10-02 16:51:57 +02002573 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002574 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002575 slnum = rsm.sm_mmatch->startpos[no].lnum;
2576 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002577 if (slnum < 0 || elnum < 0)
2578 return NULL;
2579
Bram Moolenaar6100d022016-10-02 16:51:57 +02002580 scol = rsm.sm_mmatch->startpos[no].col;
2581 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002582
2583 list = list_alloc();
2584 if (list == NULL)
2585 return NULL;
2586
2587 s = reg_getline_submatch(slnum) + scol;
2588 if (slnum == elnum)
2589 {
2590 if (list_append_string(list, s, ecol - scol) == FAIL)
2591 error = TRUE;
2592 }
2593 else
2594 {
2595 if (list_append_string(list, s, -1) == FAIL)
2596 error = TRUE;
2597 for (i = 1; i < elnum - slnum; i++)
2598 {
2599 s = reg_getline_submatch(slnum + i);
2600 if (list_append_string(list, s, -1) == FAIL)
2601 error = TRUE;
2602 }
2603 s = reg_getline_submatch(elnum);
2604 if (list_append_string(list, s, ecol) == FAIL)
2605 error = TRUE;
2606 }
2607 }
2608 else
2609 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002610 s = rsm.sm_match->startp[no];
2611 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002612 return NULL;
2613 list = list_alloc();
2614 if (list == NULL)
2615 return NULL;
2616 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002617 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002618 error = TRUE;
2619 }
2620
2621 if (error)
2622 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002623 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002624 return NULL;
2625 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002626 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002627 return list;
2628}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002629#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002630
Bram Moolenaarf4140482020-02-15 23:06:45 +01002631/*
2632 * Initialize the values used for matching against multiple lines
2633 */
2634 static void
2635init_regexec_multi(
2636 regmmatch_T *rmp,
2637 win_T *win, // window in which to search or NULL
2638 buf_T *buf, // buffer in which to search
2639 linenr_T lnum) // nr of line to start looking for match
2640{
2641 rex.reg_match = NULL;
2642 rex.reg_mmatch = rmp;
2643 rex.reg_buf = buf;
2644 rex.reg_win = win;
2645 rex.reg_firstlnum = lnum;
2646 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2647 rex.reg_line_lbr = FALSE;
2648 rex.reg_ic = rmp->rmm_ic;
2649 rex.reg_icombine = FALSE;
2650 rex.reg_maxcol = rmp->rmm_maxcol;
2651}
2652
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002653#include "regexp_bt.c"
2654
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002655static regengine_T bt_regengine =
2656{
2657 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002658 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002659 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002660 bt_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002661};
2662
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002663#include "regexp_nfa.c"
2664
2665static regengine_T nfa_regengine =
2666{
2667 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002668 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002669 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002670 nfa_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002671};
2672
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002673// Which regexp engine to use? Needed for vim_regcomp().
2674// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002675static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002676
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002677#ifdef DEBUG
2678static char_u regname[][30] = {
2679 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002680 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002681 "NFA Regexp Engine"
2682 };
2683#endif
2684
2685/*
2686 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002687 * Returns the program in allocated memory.
2688 * Use vim_regfree() to free the memory.
2689 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002690 */
2691 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002692vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002693{
2694 regprog_T *prog = NULL;
2695 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002696 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002697
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002698 regexp_engine = p_re;
2699
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002700 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002701 if (STRNCMP(expr, "\\%#=", 4) == 0)
2702 {
2703 int newengine = expr[4] - '0';
2704
2705 if (newengine == AUTOMATIC_ENGINE
2706 || newengine == BACKTRACKING_ENGINE
2707 || newengine == NFA_ENGINE)
2708 {
2709 regexp_engine = expr[4] - '0';
2710 expr += 5;
2711#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002712 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002713 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002714#endif
2715 }
2716 else
2717 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002718 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002719 regexp_engine = AUTOMATIC_ENGINE;
2720 }
2721 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002722#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002723 bt_regengine.expr = expr;
2724 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002725#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002726 // reg_iswordc() uses rex.reg_buf
2727 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002728
2729 /*
2730 * First try the NFA engine, unless backtracking was requested.
2731 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002732 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002733 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002734 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002735 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002736 else
2737 prog = bt_regengine.regcomp(expr, re_flags);
2738
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002739 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002740 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002741 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002742#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002743 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002744 {
2745 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002746 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002747 if (f)
2748 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002749 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002750 fclose(f);
2751 }
2752 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002753 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002754 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002755 }
2756#endif
2757 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002758 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002759 * The NFA engine also fails for patterns that it can't handle well
2760 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002761 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002762 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002763 if (regexp_engine == AUTOMATIC_ENGINE
2764 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002765 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002766 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002767#ifdef FEAT_EVAL
2768 report_re_switch(expr);
2769#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002770 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002771 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002772 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002773
Bram Moolenaarfda37292014-11-05 14:27:36 +01002774 if (prog != NULL)
2775 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002776 // Store the info needed to call regcomp() again when the engine turns
2777 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002778 prog->re_engine = regexp_engine;
2779 prog->re_flags = re_flags;
2780 }
2781
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002782 return prog;
2783}
2784
2785/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002786 * Free a compiled regexp program, returned by vim_regcomp().
2787 */
2788 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002789vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002790{
2791 if (prog != NULL)
2792 prog->engine->regfree(prog);
2793}
2794
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002795#if defined(EXITFREE) || defined(PROTO)
2796 void
2797free_regexp_stuff(void)
2798{
2799 ga_clear(&regstack);
2800 ga_clear(&backpos);
2801 vim_free(reg_tofree);
2802 vim_free(reg_prev_sub);
2803}
2804#endif
2805
Bram Moolenaarfda37292014-11-05 14:27:36 +01002806#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002807 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002808report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002809{
2810 if (p_verbose > 0)
2811 {
2812 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002813 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2814 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002815 verbose_leave();
2816 }
2817}
2818#endif
2819
Bram Moolenaar651fca82021-11-29 20:39:38 +00002820#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002821/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002822 * Return whether "prog" is currently being executed.
2823 */
2824 int
2825regprog_in_use(regprog_T *prog)
2826{
2827 return prog->re_in_use;
2828}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002829#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002830
2831/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002832 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002833 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002834 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002835 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002836 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002837 *
2838 * Return TRUE if there is a match, FALSE if not.
2839 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002840 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002841vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002842 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002843 char_u *line, // string to match against
2844 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01002845 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002846{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002847 int result;
2848 regexec_T rex_save;
2849 int rex_in_use_save = rex_in_use;
2850
Bram Moolenaar0270f382018-07-17 05:43:58 +02002851 // Cannot use the same prog recursively, it contains state.
2852 if (rmp->regprog->re_in_use)
2853 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002854 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002855 return FALSE;
2856 }
2857 rmp->regprog->re_in_use = TRUE;
2858
Bram Moolenaar6100d022016-10-02 16:51:57 +02002859 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002860 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002861 rex_save = rex;
2862 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002863
Bram Moolenaar6100d022016-10-02 16:51:57 +02002864 rex.reg_startp = NULL;
2865 rex.reg_endp = NULL;
2866 rex.reg_startpos = NULL;
2867 rex.reg_endpos = NULL;
2868
2869 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002870 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002871
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002872 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002873 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2874 && result == NFA_TOO_EXPENSIVE)
2875 {
2876 int save_p_re = p_re;
2877 int re_flags = rmp->regprog->re_flags;
2878 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2879
2880 p_re = BACKTRACKING_ENGINE;
2881 vim_regfree(rmp->regprog);
2882 if (pat != NULL)
2883 {
2884#ifdef FEAT_EVAL
2885 report_re_switch(pat);
2886#endif
2887 rmp->regprog = vim_regcomp(pat, re_flags);
2888 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002889 {
2890 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002891 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002892 rmp->regprog->re_in_use = FALSE;
2893 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002894 vim_free(pat);
2895 }
2896
2897 p_re = save_p_re;
2898 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002899
2900 rex_in_use = rex_in_use_save;
2901 if (rex_in_use)
2902 rex = rex_save;
2903
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002904 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002905}
2906
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002907/*
2908 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002909 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002910 */
2911 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002912vim_regexec_prog(
2913 regprog_T **prog,
2914 int ignore_case,
2915 char_u *line,
2916 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002917{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002918 int r;
2919 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002920
2921 regmatch.regprog = *prog;
2922 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002923 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002924 *prog = regmatch.regprog;
2925 return r;
2926}
2927
2928/*
2929 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002930 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002931 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002932 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002933vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002934{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002935 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002936}
2937
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002938/*
2939 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002940 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002941 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002942 */
2943 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002944vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002945{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002946 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002947}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002948
2949/*
2950 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002951 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2952 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002953 * Uses curbuf for line count and 'iskeyword'.
2954 *
2955 * Return zero if there is no match. Return number of lines contained in the
2956 * match otherwise.
2957 */
2958 long
Bram Moolenaar05540972016-01-30 20:31:25 +01002959vim_regexec_multi(
2960 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002961 win_T *win, // window in which to search or NULL
2962 buf_T *buf, // buffer in which to search
2963 linenr_T lnum, // nr of line to start looking for match
2964 colnr_T col, // column to start looking for match
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002965 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002966{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002967 int result;
2968 regexec_T rex_save;
2969 int rex_in_use_save = rex_in_use;
2970
Bram Moolenaar0270f382018-07-17 05:43:58 +02002971 // Cannot use the same prog recursively, it contains state.
2972 if (rmp->regprog->re_in_use)
2973 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002974 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002975 return FALSE;
2976 }
2977 rmp->regprog->re_in_use = TRUE;
2978
Bram Moolenaar6100d022016-10-02 16:51:57 +02002979 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002980 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002981 rex_save = rex;
2982 rex_in_use = TRUE;
2983
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002984 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01002985 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002986 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002987
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002988 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002989 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2990 && result == NFA_TOO_EXPENSIVE)
2991 {
2992 int save_p_re = p_re;
2993 int re_flags = rmp->regprog->re_flags;
2994 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2995
2996 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002997 if (pat != NULL)
2998 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002999 regprog_T *prev_prog = rmp->regprog;
3000
Bram Moolenaarfda37292014-11-05 14:27:36 +01003001#ifdef FEAT_EVAL
3002 report_re_switch(pat);
3003#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003004#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003005 // checking for \z misuse was already done when compiling for NFA,
3006 // allow all here
3007 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003008#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01003009 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003010#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02003011 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02003012#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003013 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02003014 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01003015 // Somehow compiling the pattern failed now, put back the
3016 // previous one to avoid "regprog" becoming NULL.
3017 rmp->regprog = prev_prog;
3018 }
3019 else
3020 {
3021 vim_regfree(prev_prog);
3022
Bram Moolenaar41499802018-07-18 06:02:09 +02003023 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01003024 result = rmp->regprog->engine->regexec_multi(
Paul Ollis65745772022-06-05 16:55:54 +01003025 rmp, win, buf, lnum, col, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02003026 rmp->regprog->re_in_use = FALSE;
3027 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01003028 vim_free(pat);
3029 }
3030 p_re = save_p_re;
3031 }
3032
Bram Moolenaar6100d022016-10-02 16:51:57 +02003033 rex_in_use = rex_in_use_save;
3034 if (rex_in_use)
3035 rex = rex_save;
3036
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003037 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003038}