blob: 4bf137a5b128b8df6b0f434c1cd4f38c1503f29d [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
Bram Moolenaar63d9e732019-12-05 21:10:38 +010015// show/save debugging data when BT engine is used
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020016# define BT_REGEXP_DUMP
Bram Moolenaar63d9e732019-12-05 21:10:38 +010017// save the debugging data to a file instead of displaying it
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020018# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
23/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000024 * Magic characters have a special meaning, they don't match literally.
25 * Magic characters are negative. This separates them from literal characters
26 * (possibly multi-byte). Only ASCII characters can be Magic.
27 */
28#define Magic(x) ((int)(x) - 256)
29#define un_Magic(x) ((x) + 256)
30#define is_Magic(x) ((x) < 0)
31
Bram Moolenaar071d4272004-06-13 20:20:40 +000032 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010033no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000034{
35 if (is_Magic(x))
36 return un_Magic(x);
37 return x;
38}
39
40 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010041toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000042{
43 if (is_Magic(x))
44 return un_Magic(x);
45 return Magic(x);
46}
47
48/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020049 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +000050 * number; the start node begins in the second byte. It's used to catch the
51 * most severe mutilation of the program by the caller.
52 */
53
54#define REGMAGIC 0234
55
56/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000057 * Utility definitions.
58 */
59#define UCHARAT(p) ((int)*(char_u *)(p))
60
Bram Moolenaar63d9e732019-12-05 21:10:38 +010061// Used for an error (down from) vim_regcomp(): give the error message, set
62// rc_did_emsg and return NULL
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010063#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
64#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
65#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
66#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +010067#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010068#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarac78dd42022-01-02 19:25:26 +000069#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +000070
Bram Moolenaar95f09602016-11-10 20:01:45 +010071
Bram Moolenaar071d4272004-06-13 20:20:40 +000072#define MAX_LIMIT (32767L << 16L)
73
Bram Moolenaar071d4272004-06-13 20:20:40 +000074#define NOT_MULTI 0
75#define MULTI_ONE 1
76#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020077
78// return values for regmatch()
Bram Moolenaar63d9e732019-12-05 21:10:38 +010079#define RA_FAIL 1 // something failed, abort
80#define RA_CONT 2 // continue in inner loop
81#define RA_BREAK 3 // break inner loop
82#define RA_MATCH 4 // successful match
83#define RA_NOMATCH 5 // didn't match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020084
Bram Moolenaar071d4272004-06-13 20:20:40 +000085/*
86 * Return NOT_MULTI if c is not a "multi" operator.
87 * Return MULTI_ONE if c is a single "multi" operator.
88 * Return MULTI_MULT if c is a multi "multi" operator.
89 */
90 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010091re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +000092{
93 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
94 return MULTI_ONE;
95 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
96 return MULTI_MULT;
97 return NOT_MULTI;
98}
99
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000100static char_u *reg_prev_sub = NULL;
101
Bram Moolenaar071d4272004-06-13 20:20:40 +0000102/*
103 * REGEXP_INRANGE contains all characters which are always special in a []
104 * range after '\'.
105 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
106 * These are:
107 * \n - New line (NL).
108 * \r - Carriage Return (CR).
109 * \t - Tab (TAB).
110 * \e - Escape (ESC).
111 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000112 * \d - Character code in decimal, eg \d123
113 * \o - Character code in octal, eg \o80
114 * \x - Character code in hex, eg \x4a
115 * \u - Multibyte character code, eg \u20ac
116 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000117 */
118static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000119static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120
Bram Moolenaar071d4272004-06-13 20:20:40 +0000121/*
122 * Translate '\x' to its control character, except "\n", which is Magic.
123 */
124 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100125backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000126{
127 switch (c)
128 {
129 case 'r': return CAR;
130 case 't': return TAB;
131 case 'e': return ESC;
132 case 'b': return BS;
133 }
134 return c;
135}
136
137/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000138 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000139 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
140 * recognized. Otherwise "pp" is advanced to after the item.
141 */
142 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100143get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000144{
145 static const char *(class_names[]) =
146 {
147 "alnum:]",
148#define CLASS_ALNUM 0
149 "alpha:]",
150#define CLASS_ALPHA 1
151 "blank:]",
152#define CLASS_BLANK 2
153 "cntrl:]",
154#define CLASS_CNTRL 3
155 "digit:]",
156#define CLASS_DIGIT 4
157 "graph:]",
158#define CLASS_GRAPH 5
159 "lower:]",
160#define CLASS_LOWER 6
161 "print:]",
162#define CLASS_PRINT 7
163 "punct:]",
164#define CLASS_PUNCT 8
165 "space:]",
166#define CLASS_SPACE 9
167 "upper:]",
168#define CLASS_UPPER 10
169 "xdigit:]",
170#define CLASS_XDIGIT 11
171 "tab:]",
172#define CLASS_TAB 12
173 "return:]",
174#define CLASS_RETURN 13
175 "backspace:]",
176#define CLASS_BACKSPACE 14
177 "escape:]",
178#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100179 "ident:]",
180#define CLASS_IDENT 16
181 "keyword:]",
182#define CLASS_KEYWORD 17
183 "fname:]",
184#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000185 };
186#define CLASS_NONE 99
187 int i;
188
189 if ((*pp)[1] == ':')
190 {
K.Takataeeec2542021-06-02 13:28:16 +0200191 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000192 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
193 {
194 *pp += STRLEN(class_names[i]) + 2;
195 return i;
196 }
197 }
198 return CLASS_NONE;
199}
200
201/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000202 * Specific version of character class functions.
203 * Using a table to keep this fast.
204 */
205static short class_tab[256];
206
207#define RI_DIGIT 0x01
208#define RI_HEX 0x02
209#define RI_OCTAL 0x04
210#define RI_WORD 0x08
211#define RI_HEAD 0x10
212#define RI_ALPHA 0x20
213#define RI_LOWER 0x40
214#define RI_UPPER 0x80
215#define RI_WHITE 0x100
216
217 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100218init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000219{
220 int i;
221 static int done = FALSE;
222
223 if (done)
224 return;
225
226 for (i = 0; i < 256; ++i)
227 {
228 if (i >= '0' && i <= '7')
229 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
230 else if (i >= '8' && i <= '9')
231 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
232 else if (i >= 'a' && i <= 'f')
233 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000234 else if (i >= 'g' && i <= 'z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000235 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
236 else if (i >= 'A' && i <= 'F')
237 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000238 else if (i >= 'G' && i <= 'Z')
Bram Moolenaar071d4272004-06-13 20:20:40 +0000239 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
240 else if (i == '_')
241 class_tab[i] = RI_WORD + RI_HEAD;
242 else
243 class_tab[i] = 0;
244 }
245 class_tab[' '] |= RI_WHITE;
246 class_tab['\t'] |= RI_WHITE;
247 done = TRUE;
248}
249
kylo252ae6f1d82022-02-16 19:24:07 +0000250#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
251#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
252#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
253#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
254#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
255#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
256#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
257#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
258#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000259
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100260// flags for regflags
261#define RF_ICASE 1 // ignore case
262#define RF_NOICASE 2 // don't ignore case
263#define RF_HASNL 4 // can match a NL
264#define RF_ICOMBINE 8 // ignore combining characters
265#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000266
267/*
268 * Global work variables for vim_regcomp().
269 */
270
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100271static char_u *regparse; // Input-scan pointer.
272static int regnpar; // () count.
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100273static int wants_nfa; // regex should use NFA engine
Bram Moolenaar071d4272004-06-13 20:20:40 +0000274#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100275static int regnzpar; // \z() count.
276static int re_has_z; // \z item detected
Bram Moolenaar071d4272004-06-13 20:20:40 +0000277#endif
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100278static unsigned regflags; // RF_ flags for prog
Bram Moolenaar071d4272004-06-13 20:20:40 +0000279#if defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100280static int had_eol; // TRUE when EOL found by vim_regcomp()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000281#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000282
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100283static magic_T reg_magic; // magicness of the pattern
Bram Moolenaar071d4272004-06-13 20:20:40 +0000284
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100285static int reg_string; // matching with a string instead of a buffer
286 // line
287static int reg_strict; // "[abc" is illegal
Bram Moolenaar071d4272004-06-13 20:20:40 +0000288
289/*
290 * META contains all characters that may be magic, except '^' and '$'.
291 */
292
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100293// META[] is used often enough to justify turning it into a table.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000294static char_u META_flags[] = {
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100297// % & ( ) * + .
Bram Moolenaar071d4272004-06-13 20:20:40 +0000298 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100299// 1 2 3 4 5 6 7 8 9 < = > ?
Bram Moolenaar071d4272004-06-13 20:20:40 +0000300 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100301// @ A C D F H I K L M O
Bram Moolenaar071d4272004-06-13 20:20:40 +0000302 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100303// P S U V W X Z [ _
Bram Moolenaar071d4272004-06-13 20:20:40 +0000304 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100305// a c d f h i k l m n o
Bram Moolenaar071d4272004-06-13 20:20:40 +0000306 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100307// p s u v w x z { | ~
Bram Moolenaar071d4272004-06-13 20:20:40 +0000308 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
309};
Bram Moolenaar071d4272004-06-13 20:20:40 +0000310
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100311static int curchr; // currently parsed character
312// Previous character. Note: prevchr is sometimes -1 when we are not at the
313// start, eg in /[ ^I]^ the pattern was never found even if it existed,
314// because ^ was taken to be magic -- webb
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200315static int prevchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100316static int prevprevchr; // previous-previous character
317static int nextchr; // used for ungetchr()
Bram Moolenaar071d4272004-06-13 20:20:40 +0000318
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100319// arguments for reg()
320#define REG_NOPAREN 0 // toplevel reg()
321#define REG_PAREN 1 // \(\)
322#define REG_ZPAREN 2 // \z(\)
323#define REG_NPAREN 3 // \%(\)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000324
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200325typedef struct
326{
327 char_u *regparse;
328 int prevchr_len;
329 int curchr;
330 int prevchr;
331 int prevprevchr;
332 int nextchr;
333 int at_start;
334 int prev_at_start;
335 int regnpar;
336} parse_state_T;
337
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100338static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100339static int getchr(void);
340static void skipchr_keepstart(void);
341static int peekchr(void);
342static void skipchr(void);
343static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100344static long gethexchrs(int maxinputlen);
345static long getoctchrs(void);
346static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100347static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100348static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200349static int cstrncmp(char_u *s1, char_u *s2, int *n);
350static char_u *cstrchr(char_u *, int);
351static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100352static int reg_iswordc(int);
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100353#ifdef FEAT_EVAL
354static void report_re_switch(char_u *pat);
355#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200357static regengine_T bt_regengine;
358static regengine_T nfa_regengine;
359
Bram Moolenaar071d4272004-06-13 20:20:40 +0000360/*
361 * Return TRUE if compiled regular expression "prog" can match a line break.
362 */
363 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100364re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000365{
366 return (prog->regflags & RF_HASNL);
367}
368
369/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000370 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
371 * Returns a character representing the class. Zero means that no item was
372 * recognized. Otherwise "pp" is advanced to after the item.
373 */
374 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100375get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000376{
377 int c;
378 int l = 1;
379 char_u *p = *pp;
380
Bram Moolenaar985079c2019-02-16 17:07:47 +0100381 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000382 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000383 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000384 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000385 if (p[l + 2] == '=' && p[l + 3] == ']')
386 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000387 if (has_mbyte)
388 c = mb_ptr2char(p + 2);
389 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000390 c = p[2];
391 *pp += l + 4;
392 return c;
393 }
394 }
395 return 0;
396}
397
398/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000399 * Check for a collating element "[.a.]". "pp" points to the '['.
400 * Returns a character. Zero means that no item was recognized. Otherwise
401 * "pp" is advanced to after the item.
402 * Currently only single characters are recognized!
403 */
404 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100405get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000406{
407 int c;
408 int l = 1;
409 char_u *p = *pp;
410
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100411 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000412 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000413 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000414 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000415 if (p[l + 2] == '.' && p[l + 3] == ']')
416 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000417 if (has_mbyte)
418 c = mb_ptr2char(p + 2);
419 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000420 c = p[2];
421 *pp += l + 4;
422 return c;
423 }
424 }
425 return 0;
426}
427
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100428static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
429static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200430
431 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100432get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200433{
434 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
435 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
436}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000437
438/*
439 * Skip over a "[]" range.
440 * "p" must point to the character after the '['.
441 * The returned pointer is on the matching ']', or the terminating NUL.
442 */
443 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100444skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000445{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000446 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000447
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100448 if (*p == '^') // Complement of range.
Bram Moolenaardf177f62005-02-22 08:39:57 +0000449 ++p;
450 if (*p == ']' || *p == '-')
451 ++p;
452 while (*p != NUL && *p != ']')
453 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000454 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000455 p += l;
456 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000457 if (*p == '-')
458 {
459 ++p;
460 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100461 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000462 }
463 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200464 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000465 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200466 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000467 p += 2;
468 else if (*p == '[')
469 {
470 if (get_char_class(&p) == CLASS_NONE
471 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200472 && get_coll_element(&p) == 0
473 && *p != NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100474 ++p; // it is not a class name and not NUL
Bram Moolenaardf177f62005-02-22 08:39:57 +0000475 }
476 else
477 ++p;
478 }
479
480 return p;
481}
482
483/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000484 * Skip past regular expression.
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200485 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000486 * Take care of characters with a backslash in front of it.
487 * Skip strings inside [ and ].
Bram Moolenaar071d4272004-06-13 20:20:40 +0000488 */
489 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100490skip_regexp(
491 char_u *startp,
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200492 int delim,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200493 int magic)
494{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100495 return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200496}
497
498/*
499 * Call skip_regexp() and when the delimiter does not match give an error and
500 * return NULL.
501 */
502 char_u *
503skip_regexp_err(
504 char_u *startp,
505 int delim,
506 int magic)
507{
508 char_u *p = skip_regexp(startp, delim, magic);
509
510 if (*p != delim)
511 {
Bram Moolenaara6f79292022-01-04 21:30:47 +0000512 semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
Bram Moolenaar2c5ed4e2020-04-20 19:42:10 +0200513 return NULL;
514 }
515 return p;
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200516}
517
518/*
519 * skip_regexp() with extra arguments:
520 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
521 * expression and change "\?" to "?". If "*newp" is not NULL the expression
522 * is changed in-place.
523 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100524 * If "magic_val" is not NULL, returns the effective magicness of the pattern
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200525 */
526 char_u *
527skip_regexp_ex(
528 char_u *startp,
529 int dirc,
Bram Moolenaar05540972016-01-30 20:31:25 +0100530 int magic,
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200531 char_u **newp,
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100532 int *dropped,
533 magic_T *magic_val)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000534{
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100535 magic_T mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000536 char_u *p = startp;
537
538 if (magic)
539 mymagic = MAGIC_ON;
540 else
541 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200542 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000543
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100544 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000545 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100546 if (p[0] == dirc) // found end of regexp
Bram Moolenaar071d4272004-06-13 20:20:40 +0000547 break;
548 if ((p[0] == '[' && mymagic >= MAGIC_ON)
549 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
550 {
551 p = skip_anyof(p + 1);
552 if (p[0] == NUL)
553 break;
554 }
555 else if (p[0] == '\\' && p[1] != NUL)
556 {
557 if (dirc == '?' && newp != NULL && p[1] == '?')
558 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100559 // change "\?" to "?", make a copy first.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000560 if (*newp == NULL)
561 {
562 *newp = vim_strsave(startp);
563 if (*newp != NULL)
564 p = *newp + (p - startp);
565 }
Bram Moolenaare8c4abb2020-04-02 21:13:25 +0200566 if (dropped != NULL)
567 ++*dropped;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000568 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000569 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000570 else
571 ++p;
572 }
573 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100574 ++p; // skip next character
Bram Moolenaar071d4272004-06-13 20:20:40 +0000575 if (*p == 'v')
576 mymagic = MAGIC_ALL;
577 else if (*p == 'V')
578 mymagic = MAGIC_NONE;
579 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000580 }
Bram Moolenaard93a7fc2021-01-04 12:42:13 +0100581 if (magic_val != NULL)
582 *magic_val = mymagic;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000583 return p;
584}
585
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200586/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200587 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200588 */
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100589static int prevchr_len; // byte length of previous char
Bram Moolenaar0270f382018-07-17 05:43:58 +0200590static int at_start; // True when on the first character
591static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100592
Bram Moolenaar071d4272004-06-13 20:20:40 +0000593/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200594 * Start parsing at "str".
595 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000596 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100597initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000598{
599 regparse = str;
600 prevchr_len = 0;
601 curchr = prevprevchr = prevchr = nextchr = -1;
602 at_start = TRUE;
603 prev_at_start = FALSE;
604}
605
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200606/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200607 * Save the current parse state, so that it can be restored and parsing
608 * starts in the same state again.
609 */
610 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100611save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200612{
613 ps->regparse = regparse;
614 ps->prevchr_len = prevchr_len;
615 ps->curchr = curchr;
616 ps->prevchr = prevchr;
617 ps->prevprevchr = prevprevchr;
618 ps->nextchr = nextchr;
619 ps->at_start = at_start;
620 ps->prev_at_start = prev_at_start;
621 ps->regnpar = regnpar;
622}
623
624/*
625 * Restore a previously saved parse state.
626 */
627 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100628restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200629{
630 regparse = ps->regparse;
631 prevchr_len = ps->prevchr_len;
632 curchr = ps->curchr;
633 prevchr = ps->prevchr;
634 prevprevchr = ps->prevprevchr;
635 nextchr = ps->nextchr;
636 at_start = ps->at_start;
637 prev_at_start = ps->prev_at_start;
638 regnpar = ps->regnpar;
639}
640
641
642/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200643 * Get the next character without advancing.
644 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000645 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100646peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000647{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000648 static int after_slash = FALSE;
649
Bram Moolenaar071d4272004-06-13 20:20:40 +0000650 if (curchr == -1)
651 {
652 switch (curchr = regparse[0])
653 {
654 case '.':
655 case '[':
656 case '~':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100657 // magic when 'magic' is on
Bram Moolenaar071d4272004-06-13 20:20:40 +0000658 if (reg_magic >= MAGIC_ON)
659 curchr = Magic(curchr);
660 break;
661 case '(':
662 case ')':
663 case '{':
664 case '%':
665 case '+':
666 case '=':
667 case '?':
668 case '@':
669 case '!':
670 case '&':
671 case '|':
672 case '<':
673 case '>':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100674 case '#': // future ext.
675 case '"': // future ext.
676 case '\'': // future ext.
677 case ',': // future ext.
678 case '-': // future ext.
679 case ':': // future ext.
680 case ';': // future ext.
681 case '`': // future ext.
682 case '/': // Can't be used in / command
683 // magic only after "\v"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000684 if (reg_magic == MAGIC_ALL)
685 curchr = Magic(curchr);
686 break;
687 case '*':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100688 // * is not magic as the very first character, eg "?*ptr", when
689 // after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
690 // "\(\*" is not magic, thus must be magic if "after_slash"
Bram Moolenaardf177f62005-02-22 08:39:57 +0000691 if (reg_magic >= MAGIC_ON
692 && !at_start
693 && !(prev_at_start && prevchr == Magic('^'))
694 && (after_slash
695 || (prevchr != Magic('(')
696 && prevchr != Magic('&')
697 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000698 curchr = Magic('*');
699 break;
700 case '^':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100701 // '^' is only magic as the very first character and if it's after
702 // "\(", "\|", "\&' or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000703 if (reg_magic >= MAGIC_OFF
704 && (at_start
705 || reg_magic == MAGIC_ALL
706 || prevchr == Magic('(')
707 || prevchr == Magic('|')
708 || prevchr == Magic('&')
709 || prevchr == Magic('n')
710 || (no_Magic(prevchr) == '('
711 && prevprevchr == Magic('%'))))
712 {
713 curchr = Magic('^');
714 at_start = TRUE;
715 prev_at_start = FALSE;
716 }
717 break;
718 case '$':
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100719 // '$' is only magic as the very last char and if it's in front of
720 // either "\|", "\)", "\&", or "\n"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000721 if (reg_magic >= MAGIC_OFF)
722 {
723 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200724 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000725
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100726 // ignore \c \C \m \M \v \V and \Z after '$'
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200728 || p[1] == 'm' || p[1] == 'M'
729 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
730 {
731 if (p[1] == 'v')
732 is_magic_all = TRUE;
733 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
734 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000735 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200736 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000737 if (p[0] == NUL
738 || (p[0] == '\\'
739 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
740 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200741 || (is_magic_all
742 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000743 || reg_magic == MAGIC_ALL)
744 curchr = Magic('$');
745 }
746 break;
747 case '\\':
748 {
749 int c = regparse[1];
750
751 if (c == NUL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100752 curchr = '\\'; // trailing '\'
Bram Moolenaar424bcae2022-01-31 14:59:41 +0000753 else if (c <= '~' && META_flags[c])
Bram Moolenaar071d4272004-06-13 20:20:40 +0000754 {
755 /*
756 * META contains everything that may be magic sometimes,
757 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200758 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000759 * magicness. Therefore, \ is so meta-magic that it is
760 * not in META.
761 */
762 curchr = -1;
763 prev_at_start = at_start;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100764 at_start = FALSE; // be able to say "/\*ptr"
Bram Moolenaar071d4272004-06-13 20:20:40 +0000765 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000766 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000767 peekchr();
768 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000769 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000770 curchr = toggle_Magic(curchr);
771 }
772 else if (vim_strchr(REGEXP_ABBR, c))
773 {
774 /*
775 * Handle abbreviations, like "\t" for TAB -- webb
776 */
777 curchr = backslash_trans(c);
778 }
779 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
780 curchr = toggle_Magic(c);
781 else
782 {
783 /*
784 * Next character can never be (made) magic?
785 * Then backslashing it won't do anything.
786 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000787 if (has_mbyte)
788 curchr = (*mb_ptr2char)(regparse + 1);
789 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000790 curchr = c;
791 }
792 break;
793 }
794
Bram Moolenaar071d4272004-06-13 20:20:40 +0000795 default:
796 if (has_mbyte)
797 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000798 }
799 }
800
801 return curchr;
802}
803
804/*
805 * Eat one lexed character. Do this in a way that we can undo it.
806 */
807 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100808skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000809{
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100810 // peekchr() eats a backslash, do the same here
Bram Moolenaar071d4272004-06-13 20:20:40 +0000811 if (*regparse == '\\')
812 prevchr_len = 1;
813 else
814 prevchr_len = 0;
815 if (regparse[prevchr_len] != NUL)
816 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000817 if (enc_utf8)
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100818 // exclude composing chars that mb_ptr2len does include
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000819 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000820 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000821 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000822 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000823 ++prevchr_len;
824 }
825 regparse += prevchr_len;
826 prev_at_start = at_start;
827 at_start = FALSE;
828 prevprevchr = prevchr;
829 prevchr = curchr;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100830 curchr = nextchr; // use previously unget char, or -1
Bram Moolenaar071d4272004-06-13 20:20:40 +0000831 nextchr = -1;
832}
833
834/*
835 * Skip a character while keeping the value of prev_at_start for at_start.
836 * prevchr and prevprevchr are also kept.
837 */
838 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100839skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840{
841 int as = prev_at_start;
842 int pr = prevchr;
843 int prpr = prevprevchr;
844
845 skipchr();
846 at_start = as;
847 prevchr = pr;
848 prevprevchr = prpr;
849}
850
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200851/*
852 * Get the next character from the pattern. We know about magic and such, so
853 * therefore we need a lexical analyzer.
854 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000855 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100856getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000857{
858 int chr = peekchr();
859
860 skipchr();
861 return chr;
862}
863
864/*
865 * put character back. Works only once!
866 */
867 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100868ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000869{
870 nextchr = curchr;
871 curchr = prevchr;
872 prevchr = prevprevchr;
873 at_start = prev_at_start;
874 prev_at_start = FALSE;
875
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100876 // Backup regparse, so that it's at the same position as before the
877 // getchr().
Bram Moolenaar071d4272004-06-13 20:20:40 +0000878 regparse -= prevchr_len;
879}
880
881/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000882 * Get and return the value of the hex string at the current position.
883 * Return -1 if there is no valid hex number.
884 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000885 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000886 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000887 * The parameter controls the maximum number of input characters. This will be
888 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
889 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100890 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100891gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000892{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100893 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000894 int c;
895 int i;
896
897 for (i = 0; i < maxinputlen; ++i)
898 {
899 c = regparse[0];
900 if (!vim_isxdigit(c))
901 break;
902 nr <<= 4;
903 nr |= hex2nr(c);
904 ++regparse;
905 }
906
907 if (i == 0)
908 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100909 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000910}
911
912/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200913 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000914 * current position. Return -1 for invalid. Consumes all digits.
915 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100916 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100917getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000918{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100919 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000920 int c;
921 int i;
922
923 for (i = 0; ; ++i)
924 {
925 c = regparse[0];
926 if (c < '0' || c > '9')
927 break;
928 nr *= 10;
929 nr += c - '0';
930 ++regparse;
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100931 curchr = -1; // no longer valid
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000932 }
933
934 if (i == 0)
935 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100936 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000937}
938
939/*
940 * get and return the value of the octal string immediately after the current
941 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
942 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
943 * treat 8 or 9 as recognised characters. Position is updated:
944 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000945 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000946 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100947 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100948getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000949{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100950 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000951 int c;
952 int i;
953
954 for (i = 0; i < 3 && nr < 040; ++i)
955 {
956 c = regparse[0];
957 if (c < '0' || c > '7')
958 break;
959 nr <<= 3;
960 nr |= hex2nr(c);
961 ++regparse;
962 }
963
964 if (i == 0)
965 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100966 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000967}
968
969/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000970 * read_limits - Read two integers to be taken as a minimum and maximum.
971 * If the first character is '-', then the range is reversed.
972 * Should end with 'end'. If minval is missing, zero is default, if maxval is
973 * missing, a very big number is the default.
974 */
975 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100976read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000977{
978 int reverse = FALSE;
979 char_u *first_char;
980 long tmp;
981
982 if (*regparse == '-')
983 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100984 // Starts with '-', so reverse the range later
Bram Moolenaar071d4272004-06-13 20:20:40 +0000985 regparse++;
986 reverse = TRUE;
987 }
988 first_char = regparse;
989 *minval = getdigits(&regparse);
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100990 if (*regparse == ',') // There is a comma
Bram Moolenaar071d4272004-06-13 20:20:40 +0000991 {
992 if (vim_isdigit(*++regparse))
993 *maxval = getdigits(&regparse);
994 else
995 *maxval = MAX_LIMIT;
996 }
997 else if (VIM_ISDIGIT(*first_char))
Bram Moolenaar63d9e732019-12-05 21:10:38 +0100998 *maxval = *minval; // It was \{n} or \{-n}
Bram Moolenaar071d4272004-06-13 20:20:40 +0000999 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001000 *maxval = MAX_LIMIT; // It was \{} or \{-}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001001 if (*regparse == '\\')
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001002 regparse++; // Allow either \{...} or \{...\}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001003 if (*regparse != '}')
Bram Moolenaar1d423ef2022-01-02 21:26:16 +00001004 EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001005 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001006
1007 /*
1008 * Reverse the range if there was a '-', or make sure it is in the right
1009 * order otherwise.
1010 */
1011 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1012 {
1013 tmp = *minval;
1014 *minval = *maxval;
1015 *maxval = tmp;
1016 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001017 skipchr(); // let's be friends with the lexer again
Bram Moolenaar071d4272004-06-13 20:20:40 +00001018 return OK;
1019}
1020
1021/*
1022 * vim_regexec and friends
1023 */
1024
1025/*
1026 * Global work variables for vim_regexec().
1027 */
1028
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001029static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001030#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001031static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001032#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001033static void reg_nextline(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001034static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001035
1036/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001037 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1038 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001039 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001040 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001041static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001042static unsigned reg_tofreelen;
1043
1044/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001045 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001046 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001047 * done:
1048 * single-line multi-line
1049 * reg_match &regmatch_T NULL
1050 * reg_mmatch NULL &regmmatch_T
1051 * reg_startp reg_match->startp <invalid>
1052 * reg_endp reg_match->endp <invalid>
1053 * reg_startpos <invalid> reg_mmatch->startpos
1054 * reg_endpos <invalid> reg_mmatch->endpos
1055 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001056 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001057 * reg_firstlnum <invalid> first line in which to search
1058 * reg_maxline 0 last line nr
1059 * reg_line_lbr FALSE or TRUE FALSE
1060 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001061typedef struct {
1062 regmatch_T *reg_match;
1063 regmmatch_T *reg_mmatch;
1064 char_u **reg_startp;
1065 char_u **reg_endp;
1066 lpos_T *reg_startpos;
1067 lpos_T *reg_endpos;
1068 win_T *reg_win;
1069 buf_T *reg_buf;
1070 linenr_T reg_firstlnum;
1071 linenr_T reg_maxline;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001072 int reg_line_lbr; // "\n" in string is line break
Bram Moolenaar6100d022016-10-02 16:51:57 +02001073
Bram Moolenaar0270f382018-07-17 05:43:58 +02001074 // The current match-position is stord in these variables:
1075 linenr_T lnum; // line number, relative to first line
1076 char_u *line; // start of current line
Bram Moolenaar64066b92021-11-17 18:22:56 +00001077 char_u *input; // current input, points into "line"
Bram Moolenaar0270f382018-07-17 05:43:58 +02001078
1079 int need_clear_subexpr; // subexpressions still need to be cleared
1080#ifdef FEAT_SYN_HL
1081 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1082 // cleared
1083#endif
1084
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001085 // Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1086 // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1087 // contains '\c' or '\C' the value is overruled.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001088 int reg_ic;
1089
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001090 // Similar to "reg_ic", but only for 'combining' characters. Set with \Z
1091 // flag in the regexp. Defaults to false, always.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001092 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001093
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001094 // Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1095 // there is no maximum.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001096 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001097
1098 // State for the NFA engine regexec.
1099 int nfa_has_zend; // NFA regexp \ze operator encountered.
1100 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1101 int nfa_nsubexpr; // Number of sub expressions actually being used
1102 // during execution. 1 if only the whole match
1103 // (subexpr 0) is used.
1104 // listid is global, so that it increases on recursive calls to
1105 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1106 // all the states.
1107 int nfa_listid;
1108 int nfa_alt_listid;
1109
1110#ifdef FEAT_SYN_HL
1111 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1112#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001113} regexec_T;
1114
1115static regexec_T rex;
1116static int rex_in_use = FALSE;
1117
Bram Moolenaar071d4272004-06-13 20:20:40 +00001118/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001119 * Return TRUE if character 'c' is included in 'iskeyword' option for
1120 * "reg_buf" buffer.
1121 */
1122 static int
1123reg_iswordc(int c)
1124{
1125 return vim_iswordc_buf(c, rex.reg_buf);
1126}
1127
1128/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001129 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1130 */
1131 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001132reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001133{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001134 // when looking behind for a match/no-match lnum is negative. But we
1135 // can't go before line 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001136 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001137 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001138 if (lnum > rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001139 // Must have matched the "\n" in the last line.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001140 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001141 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001142}
1143
Bram Moolenaar071d4272004-06-13 20:20:40 +00001144#ifdef FEAT_SYN_HL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001145static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1146static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1147static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
1148static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
Bram Moolenaar071d4272004-06-13 20:20:40 +00001149#endif
1150
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001151// TRUE if using multi-line regexp.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001152#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001153
Bram Moolenaar071d4272004-06-13 20:20:40 +00001154#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001155/*
1156 * Create a new extmatch and mark it as referenced once.
1157 */
1158 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001159make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001160{
1161 reg_extmatch_T *em;
1162
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001163 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001164 if (em != NULL)
1165 em->refcnt = 1;
1166 return em;
1167}
1168
1169/*
1170 * Add a reference to an extmatch.
1171 */
1172 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001173ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001174{
1175 if (em != NULL)
1176 em->refcnt++;
1177 return em;
1178}
1179
1180/*
1181 * Remove a reference to an extmatch. If there are no references left, free
1182 * the info.
1183 */
1184 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001185unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001186{
1187 int i;
1188
1189 if (em != NULL && --em->refcnt <= 0)
1190 {
1191 for (i = 0; i < NSUBEXP; ++i)
1192 vim_free(em->matches[i]);
1193 vim_free(em);
1194 }
1195}
1196#endif
1197
1198/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001199 * Get class of previous character.
1200 */
1201 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001202reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001203{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001204 if (rex.input > rex.line)
1205 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001206 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001207 return -1;
1208}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001209
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001210/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001211 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001212 */
1213 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001214reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001215{
1216 pos_T top, bot;
1217 linenr_T lnum;
1218 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001219 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001220 int mode;
1221 colnr_T start, end;
1222 colnr_T start2, end2;
1223 colnr_T cols;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001224 colnr_T curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001225
Bram Moolenaar679d66c2022-01-30 16:42:56 +00001226 // Check if the buffer is the current buffer and not using a string.
Bram Moolenaar44a4d942022-01-30 17:17:41 +00001227 if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001228 return FALSE;
1229
1230 if (VIsual_active)
1231 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001232 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001233 {
1234 top = VIsual;
1235 bot = wp->w_cursor;
1236 }
1237 else
1238 {
1239 top = wp->w_cursor;
1240 bot = VIsual;
1241 }
1242 mode = VIsual_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001243 curswant = wp->w_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001244 }
1245 else
1246 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001247 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001248 {
1249 top = curbuf->b_visual.vi_start;
1250 bot = curbuf->b_visual.vi_end;
1251 }
1252 else
1253 {
1254 top = curbuf->b_visual.vi_end;
1255 bot = curbuf->b_visual.vi_start;
1256 }
1257 mode = curbuf->b_visual.vi_mode;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001258 curswant = curbuf->b_visual.vi_curswant;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001259 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001260 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001261 if (lnum < top.lnum || lnum > bot.lnum)
1262 return FALSE;
1263
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001264 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001265 if (mode == 'v')
1266 {
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001267 if ((lnum == top.lnum && col < top.col)
1268 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1269 return FALSE;
1270 }
1271 else if (mode == Ctrl_V)
1272 {
1273 getvvcol(wp, &top, &start, NULL, &end);
1274 getvvcol(wp, &bot, &start2, NULL, &end2);
1275 if (start2 < start)
1276 start = start2;
1277 if (end2 > end)
1278 end = end2;
Bram Moolenaare71c0eb2021-05-30 16:43:11 +02001279 if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001280 end = MAXCOL;
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001281
1282 // getvvcol() flushes rex.line, need to get it again
1283 rex.line = reg_getline(rex.lnum);
1284 rex.input = rex.line + col;
1285
1286 cols = win_linetabsize(wp, rex.line, col);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001287 if (cols < start || cols > end - (*p_sel == 'e'))
1288 return FALSE;
1289 }
1290 return TRUE;
1291}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001292
Bram Moolenaar071d4272004-06-13 20:20:40 +00001293/*
1294 * Check the regexp program for its magic number.
1295 * Return TRUE if it's wrong.
1296 */
1297 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001298prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001299{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001300 regprog_T *prog;
1301
Bram Moolenaar6100d022016-10-02 16:51:57 +02001302 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001303 if (prog->engine == &nfa_regengine)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001304 // For NFA matcher we don't check the magic
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001305 return FALSE;
1306
1307 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001308 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001309 emsg(_(e_corrupted_regexp_program));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001310 return TRUE;
1311 }
1312 return FALSE;
1313}
1314
1315/*
1316 * Cleanup the subexpressions, if this wasn't done yet.
1317 * This construction is used to clear the subexpressions only when they are
1318 * used (to increase speed).
1319 */
1320 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001321cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001322{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001323 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001324 {
1325 if (REG_MULTI)
1326 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001327 // Use 0xff to set lnum to -1
Bram Moolenaar6100d022016-10-02 16:51:57 +02001328 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1329 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 }
1331 else
1332 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001333 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1334 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001335 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001336 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001337 }
1338}
1339
1340#ifdef FEAT_SYN_HL
1341 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001342cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001343{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001344 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001345 {
1346 if (REG_MULTI)
1347 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001348 // Use 0xff to set lnum to -1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001349 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1350 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1351 }
1352 else
1353 {
1354 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1355 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1356 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001357 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001358 }
1359}
1360#endif
1361
1362/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001363 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001364 */
1365 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001366reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001367{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001368 rex.line = reg_getline(++rex.lnum);
1369 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001370 fast_breakcheck();
1371}
1372
1373/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001374 * Check whether a backreference matches.
1375 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001376 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1377 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001378 */
1379 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001380match_with_backref(
1381 linenr_T start_lnum,
1382 colnr_T start_col,
1383 linenr_T end_lnum,
1384 colnr_T end_col,
1385 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001386{
1387 linenr_T clnum = start_lnum;
1388 colnr_T ccol = start_col;
1389 int len;
1390 char_u *p;
1391
1392 if (bytelen != NULL)
1393 *bytelen = 0;
1394 for (;;)
1395 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001396 // Since getting one line may invalidate the other, need to make copy.
1397 // Slow!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001398 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001399 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001400 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001401 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1402 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001403 len += 50; // get some extra
Bram Moolenaar580abea2013-06-14 20:31:28 +02001404 vim_free(reg_tofree);
1405 reg_tofree = alloc(len);
1406 if (reg_tofree == NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001407 return RA_FAIL; // out of memory!
Bram Moolenaar580abea2013-06-14 20:31:28 +02001408 reg_tofreelen = len;
1409 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001410 STRCPY(reg_tofree, rex.line);
1411 rex.input = reg_tofree + (rex.input - rex.line);
1412 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001413 }
1414
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001415 // Get the line to compare with.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001416 p = reg_getline(clnum);
1417 if (clnum == end_lnum)
1418 len = end_col - ccol;
1419 else
1420 len = (int)STRLEN(p + ccol);
1421
Bram Moolenaar0270f382018-07-17 05:43:58 +02001422 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001423 return RA_NOMATCH; // doesn't match
Bram Moolenaar580abea2013-06-14 20:31:28 +02001424 if (bytelen != NULL)
1425 *bytelen += len;
1426 if (clnum == end_lnum)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001427 break; // match and at end!
Bram Moolenaar0270f382018-07-17 05:43:58 +02001428 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001429 return RA_NOMATCH; // text too short
Bram Moolenaar580abea2013-06-14 20:31:28 +02001430
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001431 // Advance to next line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001432 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001433 if (bytelen != NULL)
1434 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001435 ++clnum;
1436 ccol = 0;
1437 if (got_int)
1438 return RA_FAIL;
1439 }
1440
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001441 // found a match! Note that rex.line may now point to a copy of the line,
1442 // that should not matter.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001443 return RA_MATCH;
1444}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001445
Bram Moolenaarfb031402014-09-09 17:18:49 +02001446/*
1447 * Used in a place where no * or \+ can follow.
1448 */
1449 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001450re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001451{
1452 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001453 {
Bram Moolenaard82a47d2022-01-05 20:24:39 +00001454 semsg(_(e_nfa_regexp_cannot_repeat_str), what);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001455 rc_did_emsg = TRUE;
1456 return FAIL;
1457 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001458 return OK;
1459}
1460
Bram Moolenaar071d4272004-06-13 20:20:40 +00001461typedef struct
1462{
1463 int a, b, c;
1464} decomp_T;
1465
1466
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001467// 0xfb20 - 0xfb4f
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001468static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001469{
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001470 {0x5e2,0,0}, // 0xfb20 alt ayin
1471 {0x5d0,0,0}, // 0xfb21 alt alef
1472 {0x5d3,0,0}, // 0xfb22 alt dalet
1473 {0x5d4,0,0}, // 0xfb23 alt he
1474 {0x5db,0,0}, // 0xfb24 alt kaf
1475 {0x5dc,0,0}, // 0xfb25 alt lamed
1476 {0x5dd,0,0}, // 0xfb26 alt mem-sofit
1477 {0x5e8,0,0}, // 0xfb27 alt resh
1478 {0x5ea,0,0}, // 0xfb28 alt tav
1479 {'+', 0, 0}, // 0xfb29 alt plus
1480 {0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
1481 {0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
1482 {0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
1483 {0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
1484 {0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
1485 {0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
1486 {0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
1487 {0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
1488 {0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
1489 {0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
1490 {0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
1491 {0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
1492 {0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
1493 {0xfb37, 0, 0}, // 0xfb37 -- UNUSED
1494 {0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
1495 {0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
1496 {0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
1497 {0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
1498 {0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
1499 {0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
1500 {0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
1501 {0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
1502 {0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
1503 {0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
1504 {0xfb42, 0, 0}, // 0xfb42 -- UNUSED
1505 {0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
1506 {0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
1507 {0xfb45, 0, 0}, // 0xfb45 -- UNUSED
1508 {0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
1509 {0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
1510 {0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
1511 {0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
1512 {0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
1513 {0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
1514 {0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
1515 {0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
1516 {0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
1517 {0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
Bram Moolenaar071d4272004-06-13 20:20:40 +00001518};
1519
1520 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001521mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001522{
1523 decomp_T d;
1524
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001525 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001526 {
1527 d = decomp_table[c - 0xfb20];
1528 *c1 = d.a;
1529 *c2 = d.b;
1530 *c3 = d.c;
1531 }
1532 else
1533 {
1534 *c1 = c;
1535 *c2 = *c3 = 0;
1536 }
1537}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001538
1539/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001540 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001541 * Return 0 if strings match, non-zero otherwise.
1542 * Correct the length "*n" when composing characters are ignored.
1543 */
1544 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001545cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001546{
1547 int result;
1548
Bram Moolenaar6100d022016-10-02 16:51:57 +02001549 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001550 result = STRNCMP(s1, s2, *n);
1551 else
1552 result = MB_STRNICMP(s1, s2, *n);
1553
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001554 // if it failed and it's utf8 and we want to combineignore:
Bram Moolenaar6100d022016-10-02 16:51:57 +02001555 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001556 {
1557 char_u *str1, *str2;
1558 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001559 int junk;
1560
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001561 // we have to handle the strcmp ourselves, since it is necessary to
1562 // deal with the composing characters by ignoring them:
Bram Moolenaar071d4272004-06-13 20:20:40 +00001563 str1 = s1;
1564 str2 = s2;
1565 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001566 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001567 {
1568 c1 = mb_ptr2char_adv(&str1);
1569 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001571 // Decompose the character if necessary, into 'base' characters.
1572 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001573 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001574 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001575 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001576 mb_decompose(c1, &c11, &junk, &junk);
1577 mb_decompose(c2, &c12, &junk, &junk);
1578 c1 = c11;
1579 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001580 if (c11 != c12
1581 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001582 break;
1583 }
1584 }
1585 result = c2 - c1;
1586 if (result == 0)
1587 *n = (int)(str2 - s2);
1588 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001589
1590 return result;
1591}
1592
1593/*
1594 * cstrchr: This function is used a lot for simple searches, keep it fast!
1595 */
1596 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001597cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001598{
1599 char_u *p;
1600 int cc;
1601
Bram Moolenaara12a1612019-01-24 16:39:02 +01001602 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001603 return vim_strchr(s, c);
1604
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001605 // tolower() and toupper() can be slow, comparing twice should be a lot
1606 // faster (esp. when using MS Visual C++!).
1607 // For UTF-8 need to use folded case.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001608 if (enc_utf8 && c > 0x80)
1609 cc = utf_fold(c);
1610 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001611 if (MB_ISUPPER(c))
1612 cc = MB_TOLOWER(c);
1613 else if (MB_ISLOWER(c))
1614 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001615 else
1616 return vim_strchr(s, c);
1617
Bram Moolenaar071d4272004-06-13 20:20:40 +00001618 if (has_mbyte)
1619 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001620 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001621 {
1622 if (enc_utf8 && c > 0x80)
1623 {
1624 if (utf_fold(utf_ptr2char(p)) == cc)
1625 return p;
1626 }
1627 else if (*p == c || *p == cc)
1628 return p;
1629 }
1630 }
1631 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001632 // Faster version for when there are no multi-byte characters.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001633 for (p = s; *p != NUL; ++p)
1634 if (*p == c || *p == cc)
1635 return p;
1636
1637 return NULL;
1638}
1639
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001640////////////////////////////////////////////////////////////////
1641// regsub stuff //
1642////////////////////////////////////////////////////////////////
Bram Moolenaar071d4272004-06-13 20:20:40 +00001643
Bram Moolenaar071d4272004-06-13 20:20:40 +00001644/*
1645 * We should define ftpr as a pointer to a function returning a pointer to
1646 * a function returning a pointer to a function ...
1647 * This is impossible, so we declare a pointer to a function returning a
Bram Moolenaar30d64132020-09-06 17:09:12 +02001648 * void pointer. This should work for all compilers.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001649 */
Bram Moolenaar30d64132020-09-06 17:09:12 +02001650typedef void (*(*fptr_T)(int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001651
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001652static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001653
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001654 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001655do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001656{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001657 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001658
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001659 return (fptr_T)NULL;
1660}
1661
1662 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001663do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001664{
1665 *d = MB_TOUPPER(c);
1666
1667 return (fptr_T)do_Upper;
1668}
1669
1670 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001671do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001672{
1673 *d = MB_TOLOWER(c);
1674
1675 return (fptr_T)NULL;
1676}
1677
1678 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001679do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001680{
1681 *d = MB_TOLOWER(c);
1682
1683 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001684}
1685
1686/*
1687 * regtilde(): Replace tildes in the pattern by the old pattern.
1688 *
1689 * Short explanation of the tilde: It stands for the previous replacement
1690 * pattern. If that previous pattern also contains a ~ we should go back a
1691 * step further... But we insert the previous pattern into the current one
1692 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001693 * This still does not handle the case where "magic" changes. So require the
1694 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001695 *
1696 * The tildes are parsed once before the first call to vim_regsub().
1697 */
1698 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001699regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001700{
1701 char_u *newsub = source;
1702 char_u *tmpsub;
1703 char_u *p;
1704 int len;
1705 int prevlen;
1706
1707 for (p = newsub; *p; ++p)
1708 {
1709 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1710 {
1711 if (reg_prev_sub != NULL)
1712 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001713 // length = len(newsub) - 1 + len(prev_sub) + 1
Bram Moolenaar071d4272004-06-13 20:20:40 +00001714 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02001715 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001716 if (tmpsub != NULL)
1717 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001718 // copy prefix
1719 len = (int)(p - newsub); // not including ~
Bram Moolenaar071d4272004-06-13 20:20:40 +00001720 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001721 // interpret tilde
Bram Moolenaar071d4272004-06-13 20:20:40 +00001722 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001723 // copy postfix
Bram Moolenaar071d4272004-06-13 20:20:40 +00001724 if (!magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001725 ++p; // back off backslash
Bram Moolenaar071d4272004-06-13 20:20:40 +00001726 STRCPY(tmpsub + len + prevlen, p + 1);
1727
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001728 if (newsub != source) // already allocated newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001729 vim_free(newsub);
1730 newsub = tmpsub;
1731 p = newsub + len + prevlen;
1732 }
1733 }
1734 else if (magic)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001735 STRMOVE(p, p + 1); // remove '~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001736 else
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001737 STRMOVE(p, p + 2); // remove '\~'
Bram Moolenaar071d4272004-06-13 20:20:40 +00001738 --p;
1739 }
1740 else
1741 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001742 if (*p == '\\' && p[1]) // skip escaped characters
Bram Moolenaar071d4272004-06-13 20:20:40 +00001743 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001744 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001745 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 }
1747 }
1748
1749 vim_free(reg_prev_sub);
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001750 if (newsub != source) // newsub was allocated, just keep it
Bram Moolenaar071d4272004-06-13 20:20:40 +00001751 reg_prev_sub = newsub;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001752 else // no ~ found, need to save newsub
Bram Moolenaar071d4272004-06-13 20:20:40 +00001753 reg_prev_sub = vim_strsave(newsub);
1754 return newsub;
1755}
1756
1757#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001758static int can_f_submatch = FALSE; // TRUE when submatch() can be used
Bram Moolenaar071d4272004-06-13 20:20:40 +00001759
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001760// These pointers are used for reg_submatch(). Needed for when the
1761// substitution string is an expression that contains a call to substitute()
1762// and submatch().
Bram Moolenaar6100d022016-10-02 16:51:57 +02001763typedef struct {
1764 regmatch_T *sm_match;
1765 regmmatch_T *sm_mmatch;
1766 linenr_T sm_firstlnum;
1767 linenr_T sm_maxline;
1768 int sm_line_lbr;
1769} regsubmatch_T;
1770
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001771static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
Bram Moolenaar071d4272004-06-13 20:20:40 +00001772#endif
1773
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001774#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001775
1776/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001777 * Put the submatches in "argv[argskip]" which is a list passed into
1778 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001779 */
1780 static int
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001781fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, int argcount)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001782{
1783 listitem_T *li;
1784 int i;
1785 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001786 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001787
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001788 if (argcount == argskip)
1789 // called function doesn't take a submatches argument
1790 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001791
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001792 // Relies on sl_list to be the first item in staticList10_T.
1793 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001794
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001795 // There are always 10 list items in staticList10_T.
1796 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001797 for (i = 0; i < 10; ++i)
1798 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001799 s = rsm.sm_match->startp[i];
1800 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001801 s = NULL;
1802 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02001803 s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001804 li->li_tv.v_type = VAR_STRING;
1805 li->li_tv.vval.v_string = s;
1806 li = li->li_next;
1807 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001808 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001809}
1810
1811 static void
1812clear_submatch_list(staticList10_T *sl)
1813{
1814 int i;
1815
1816 for (i = 0; i < 10; ++i)
1817 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1818}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001819#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001820
Bram Moolenaar071d4272004-06-13 20:20:40 +00001821/*
1822 * vim_regsub() - perform substitutions after a vim_regexec() or
1823 * vim_regexec_multi() match.
1824 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001825 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
1826 * Oterwise nothing is copied, only compue the length of the result.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001827 *
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001828 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
1829 *
1830 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
1831 * double them to keep them, and insert a backslash before a CR to avoid it
1832 * being replaced with a line break later.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001833 *
1834 * Note: The matched text must not change between the call of
1835 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1836 * references invalid!
1837 *
1838 * Returns the size of the replacement, including terminating NUL.
1839 */
1840 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001841vim_regsub(
1842 regmatch_T *rmp,
1843 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001844 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001845 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001846 int destlen,
1847 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001848{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001849 int result;
1850 regexec_T rex_save;
1851 int rex_in_use_save = rex_in_use;
1852
1853 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001854 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001855 rex_save = rex;
1856 rex_in_use = TRUE;
1857
1858 rex.reg_match = rmp;
1859 rex.reg_mmatch = NULL;
1860 rex.reg_maxline = 0;
1861 rex.reg_buf = curbuf;
1862 rex.reg_line_lbr = TRUE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001863 result = vim_regsub_both(source, expr, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001864
1865 rex_in_use = rex_in_use_save;
1866 if (rex_in_use)
1867 rex = rex_save;
1868
1869 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001870}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001871
1872 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001873vim_regsub_multi(
1874 regmmatch_T *rmp,
1875 linenr_T lnum,
1876 char_u *source,
1877 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001878 int destlen,
1879 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001880{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001881 int result;
1882 regexec_T rex_save;
1883 int rex_in_use_save = rex_in_use;
1884
1885 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001886 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001887 rex_save = rex;
1888 rex_in_use = TRUE;
1889
1890 rex.reg_match = NULL;
1891 rex.reg_mmatch = rmp;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001892 rex.reg_buf = curbuf; // always works on the current buffer!
Bram Moolenaar6100d022016-10-02 16:51:57 +02001893 rex.reg_firstlnum = lnum;
1894 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1895 rex.reg_line_lbr = FALSE;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001896 result = vim_regsub_both(source, NULL, dest, destlen, flags);
Bram Moolenaar6100d022016-10-02 16:51:57 +02001897
1898 rex_in_use = rex_in_use_save;
1899 if (rex_in_use)
1900 rex = rex_save;
1901
1902 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001903}
1904
1905 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001906vim_regsub_both(
1907 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001908 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001909 char_u *dest,
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001910 int destlen,
1911 int flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001912{
1913 char_u *src;
1914 char_u *dst;
1915 char_u *s;
1916 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001917 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001918 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01001919 fptr_T func_all = (fptr_T)NULL;
1920 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001921 linenr_T clnum = 0; // init for GCC
1922 int len = 0; // init for GCC
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001924 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001925#endif
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001926 int copy = flags & REGSUB_COPY;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001927
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001928 // Be paranoid...
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001929 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001930 {
Bram Moolenaare29a27f2021-07-20 21:07:36 +02001931 emsg(_(e_null_argument));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001932 return 0;
1933 }
1934 if (prog_magic_wrong())
1935 return 0;
1936 src = source;
1937 dst = dest;
1938
1939 /*
1940 * When the substitute part starts with "\=" evaluate it as an expression.
1941 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001942 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001943 {
1944#ifdef FEAT_EVAL
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001945 // To make sure that the length doesn't change between checking the
1946 // length and copying the string, and to speed up things, the
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01001947 // resulting string is saved from the call with "flags & REGSUB_COPY"
1948 // == 0 to the // call with "flags & REGSUB_COPY" != 0.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001949 if (copy)
1950 {
1951 if (eval_result != NULL)
1952 {
1953 STRCPY(dest, eval_result);
1954 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01001955 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001956 }
1957 }
1958 else
1959 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001960 int prev_can_f_submatch = can_f_submatch;
1961 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001962
1963 vim_free(eval_result);
1964
Bram Moolenaar63d9e732019-12-05 21:10:38 +01001965 // The expression may contain substitute(), which calls us
1966 // recursively. Make sure submatch() gets the text from the first
1967 // level.
Bram Moolenaar6100d022016-10-02 16:51:57 +02001968 if (can_f_submatch)
1969 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001970 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001971 rsm.sm_match = rex.reg_match;
1972 rsm.sm_mmatch = rex.reg_mmatch;
1973 rsm.sm_firstlnum = rex.reg_firstlnum;
1974 rsm.sm_maxline = rex.reg_maxline;
1975 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001976
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001977 if (expr != NULL)
1978 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001979 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001980 char_u buf[NUMBUFLEN];
1981 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001982 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02001983 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001984
1985 rettv.v_type = VAR_STRING;
1986 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001987 argv[0].v_type = VAR_LIST;
1988 argv[0].vval.v_list = &matchList.sl_list;
1989 matchList.sl_list.lv_len = 0;
Bram Moolenaara80faa82020-04-12 19:37:17 +02001990 CLEAR_FIELD(funcexe);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00001991 funcexe.fe_argv_func = fill_submatch_list;
1992 funcexe.fe_evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001993 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001994 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001995 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02001996 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001997 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02001998 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001999 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002000 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002001
Bram Moolenaar6100d022016-10-02 16:51:57 +02002002 s = partial_name(partial);
Bram Moolenaar851f86b2021-12-13 14:26:44 +00002003 funcexe.fe_partial = partial;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002004 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002005 }
LemonBoyf3b48952022-05-05 13:53:03 +01002006 else if (expr->v_type == VAR_INSTR)
2007 {
2008 exe_typval_instr(expr, &rettv);
2009 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002010 if (matchList.sl_list.lv_len > 0)
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002011 // fill_submatch_list() was called
Bram Moolenaar6100d022016-10-02 16:51:57 +02002012 clear_submatch_list(&matchList);
2013
Bram Moolenaar4c054e92019-11-10 00:13:50 +01002014 if (rettv.v_type == VAR_UNKNOWN)
2015 // something failed, no need to report another error
2016 eval_result = NULL;
2017 else
2018 {
2019 eval_result = tv_get_string_buf_chk(&rettv, buf);
2020 if (eval_result != NULL)
2021 eval_result = vim_strsave(eval_result);
2022 }
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002023 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002024 }
Bram Moolenaar4c137212021-04-19 16:48:48 +02002025 else if (substitute_instr != NULL)
2026 // Execute instructions from ISN_SUBSTITUTE.
2027 eval_result = exe_substitute_instr();
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002028 else
Bram Moolenaarb171fb12020-06-24 20:34:03 +02002029 eval_result = eval_to_string(source + 2, TRUE);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002030
Bram Moolenaar071d4272004-06-13 20:20:40 +00002031 if (eval_result != NULL)
2032 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002033 int had_backslash = FALSE;
2034
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002035 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002036 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002037 // Change NL to CR, so that it becomes a line break,
2038 // unless called from vim_regexec_nl().
2039 // Skip over a backslashed character.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002040 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002041 *s = CAR;
2042 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002043 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002044 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002045 /* Change NL to CR here too, so that this works:
2046 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2047 * abc\
2048 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002049 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002050 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002051 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002052 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002053 had_backslash = TRUE;
2054 }
2055 }
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002056 if (had_backslash && (flags & REGSUB_BACKSLASH))
Bram Moolenaar06975a42010-03-23 16:27:22 +01002057 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002058 // Backslashes will be consumed, need to double them.
Bram Moolenaar06975a42010-03-23 16:27:22 +01002059 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
2060 if (s != NULL)
2061 {
2062 vim_free(eval_result);
2063 eval_result = s;
2064 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002065 }
2066
2067 dst += STRLEN(eval_result);
2068 }
2069
Bram Moolenaar6100d022016-10-02 16:51:57 +02002070 can_f_submatch = prev_can_f_submatch;
2071 if (can_f_submatch)
2072 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002073 }
2074#endif
2075 }
2076 else
2077 while ((c = *src++) != NUL)
2078 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002079 if (c == '&' && (flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002080 no = 0;
2081 else if (c == '\\' && *src != NUL)
2082 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002083 if (*src == '&' && !(flags & REGSUB_MAGIC))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002084 {
2085 ++src;
2086 no = 0;
2087 }
2088 else if ('0' <= *src && *src <= '9')
2089 {
2090 no = *src++ - '0';
2091 }
2092 else if (vim_strchr((char_u *)"uUlLeE", *src))
2093 {
2094 switch (*src++)
2095 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002096 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002097 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002098 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002099 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002100 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002101 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002102 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002103 continue;
2104 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002105 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002106 continue;
2107 }
2108 }
2109 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002110 if (no < 0) // Ordinary character.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002111 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002112 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2113 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002114 // Copy a special key as-is.
Bram Moolenaardb552d602006-03-23 22:59:57 +00002115 if (copy)
2116 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002117 if (dst + 3 > dest + destlen)
2118 {
2119 iemsg("vim_regsub_both(): not enough space");
2120 return 0;
2121 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002122 *dst++ = c;
2123 *dst++ = *src++;
2124 *dst++ = *src++;
2125 }
2126 else
2127 {
2128 dst += 3;
2129 src += 2;
2130 }
2131 continue;
2132 }
2133
Bram Moolenaar071d4272004-06-13 20:20:40 +00002134 if (c == '\\' && *src != NUL)
2135 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002136 // Check for abbreviations -- webb
Bram Moolenaar071d4272004-06-13 20:20:40 +00002137 switch (*src)
2138 {
2139 case 'r': c = CAR; ++src; break;
2140 case 'n': c = NL; ++src; break;
2141 case 't': c = TAB; ++src; break;
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002142 // Oh no! \e already has meaning in subst pat :-(
2143 // case 'e': c = ESC; ++src; break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002144 case 'b': c = Ctrl_H; ++src; break;
2145
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002146 // If "backslash" is TRUE the backslash will be removed
2147 // later. Used to insert a literal CR.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002148 default: if (flags & REGSUB_BACKSLASH)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002149 {
2150 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002151 {
2152 if (dst + 1 > dest + destlen)
2153 {
2154 iemsg("vim_regsub_both(): not enough space");
2155 return 0;
2156 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002157 *dst = '\\';
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002158 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002159 ++dst;
2160 }
2161 c = *src++;
2162 }
2163 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002164 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002165 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002166
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002167 // Write to buffer, if copy is set.
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002168 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002169 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002170 func_one = (fptr_T)(func_one(&cc, c));
2171 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002172 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002173 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002174 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002175 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002176
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002177 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002178 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002179 int totlen = mb_ptr2len(src - 1);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002180 int charlen = mb_char2len(cc);
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002181
Bram Moolenaar071d4272004-06-13 20:20:40 +00002182 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002183 {
2184 if (dst + charlen > dest + destlen)
2185 {
2186 iemsg("vim_regsub_both(): not enough space");
2187 return 0;
2188 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002189 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002190 }
2191 dst += charlen - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002192 if (enc_utf8)
2193 {
2194 int clen = utf_ptr2len(src - 1);
2195
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002196 // If the character length is shorter than "totlen", there
2197 // are composing characters; copy them as-is.
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002198 if (clen < totlen)
2199 {
2200 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002201 {
2202 if (dst + totlen - clen > dest + destlen)
2203 {
2204 iemsg("vim_regsub_both(): not enough space");
2205 return 0;
2206 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002207 mch_memmove(dst + 1, src - 1 + clen,
2208 (size_t)(totlen - clen));
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002209 }
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002210 dst += totlen - clen;
2211 }
2212 }
2213 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002214 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002215 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002216 {
2217 if (dst + 1 > dest + destlen)
2218 {
2219 iemsg("vim_regsub_both(): not enough space");
2220 return 0;
2221 }
2222 *dst = cc;
2223 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002224 dst++;
2225 }
2226 else
2227 {
2228 if (REG_MULTI)
2229 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002230 clnum = rex.reg_mmatch->startpos[no].lnum;
2231 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002232 s = NULL;
2233 else
2234 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002235 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2236 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2237 len = rex.reg_mmatch->endpos[no].col
2238 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002239 else
2240 len = (int)STRLEN(s);
2241 }
2242 }
2243 else
2244 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002245 s = rex.reg_match->startp[no];
2246 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002247 s = NULL;
2248 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002249 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002250 }
2251 if (s != NULL)
2252 {
2253 for (;;)
2254 {
2255 if (len == 0)
2256 {
2257 if (REG_MULTI)
2258 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002259 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002260 break;
2261 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002262 {
2263 if (dst + 1 > dest + destlen)
2264 {
2265 iemsg("vim_regsub_both(): not enough space");
2266 return 0;
2267 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002268 *dst = CAR;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002269 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002270 ++dst;
2271 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002272 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2273 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002274 else
2275 len = (int)STRLEN(s);
2276 }
2277 else
2278 break;
2279 }
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002280 else if (*s == NUL) // we hit NUL.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002281 {
2282 if (copy)
Bram Moolenaare29a27f2021-07-20 21:07:36 +02002283 iemsg(_(e_damaged_match_string));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002284 goto exit;
2285 }
2286 else
2287 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002288 if ((flags & REGSUB_BACKSLASH)
2289 && (*s == CAR || *s == '\\'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002290 {
2291 /*
2292 * Insert a backslash in front of a CR, otherwise
2293 * it will be replaced by a line break.
2294 * Number of backslashes will be halved later,
2295 * double them here.
2296 */
2297 if (copy)
2298 {
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002299 if (dst + 2 > dest + destlen)
2300 {
2301 iemsg("vim_regsub_both(): not enough space");
2302 return 0;
2303 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002304 dst[0] = '\\';
2305 dst[1] = *s;
2306 }
2307 dst += 2;
2308 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002309 else
2310 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002311 if (has_mbyte)
2312 c = mb_ptr2char(s);
2313 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002314 c = *s;
2315
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002316 if (func_one != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002317 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002318 func_one = (fptr_T)(func_one(&cc, c));
2319 else if (func_all != (fptr_T)NULL)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002320 // Turbo C complains without the typecast
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002321 func_all = (fptr_T)(func_all(&cc, c));
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002322 else // just copy
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002323 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002324
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002325 if (has_mbyte)
2326 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002327 int l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002328 int charlen;
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002329
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002330 // Copy composing characters separately, one
2331 // at a time.
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002332 if (enc_utf8)
2333 l = utf_ptr2len(s) - 1;
2334 else
2335 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002336
2337 s += l;
2338 len -= l;
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002339 charlen = mb_char2len(cc);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002340 if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002341 {
2342 if (dst + charlen > dest + destlen)
2343 {
2344 iemsg("vim_regsub_both(): not enough space");
2345 return 0;
2346 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002347 mb_char2bytes(cc, dst);
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002348 }
2349 dst += charlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002350 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002351 else if (copy)
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002352 {
2353 if (dst + 1 > dest + destlen)
2354 {
2355 iemsg("vim_regsub_both(): not enough space");
2356 return 0;
2357 }
2358 *dst = cc;
2359 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002360 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002361 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002362
Bram Moolenaar071d4272004-06-13 20:20:40 +00002363 ++s;
2364 --len;
2365 }
2366 }
2367 }
2368 no = -1;
2369 }
2370 }
2371 if (copy)
2372 *dst = NUL;
2373
2374exit:
2375 return (int)((dst - dest) + 1);
2376}
2377
2378#ifdef FEAT_EVAL
2379/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002380 * Call reg_getline() with the line numbers from the submatch. If a
2381 * substitute() was used the reg_maxline and other values have been
2382 * overwritten.
2383 */
2384 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002385reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002386{
2387 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002388 linenr_T save_first = rex.reg_firstlnum;
2389 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002390
Bram Moolenaar6100d022016-10-02 16:51:57 +02002391 rex.reg_firstlnum = rsm.sm_firstlnum;
2392 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002393
2394 s = reg_getline(lnum);
2395
Bram Moolenaar6100d022016-10-02 16:51:57 +02002396 rex.reg_firstlnum = save_first;
2397 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002398 return s;
2399}
2400
2401/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002402 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002403 * allocated memory.
2404 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2405 */
2406 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002407reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002408{
2409 char_u *retval = NULL;
2410 char_u *s;
2411 int len;
2412 int round;
2413 linenr_T lnum;
2414
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002415 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002416 return NULL;
2417
Bram Moolenaar6100d022016-10-02 16:51:57 +02002418 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002419 {
2420 /*
2421 * First round: compute the length and allocate memory.
2422 * Second round: copy the text.
2423 */
2424 for (round = 1; round <= 2; ++round)
2425 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002426 lnum = rsm.sm_mmatch->startpos[no].lnum;
2427 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002428 return NULL;
2429
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002430 s = reg_getline_submatch(lnum);
2431 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002432 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002433 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002434 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002435 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002436 // Within one line: take form start to end col.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002437 len = rsm.sm_mmatch->endpos[no].col
2438 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002439 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002440 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002441 ++len;
2442 }
2443 else
2444 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002445 // Multiple lines: take start line from start col, middle
2446 // lines completely and end line up to end col.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002447 len = (int)STRLEN(s);
2448 if (round == 2)
2449 {
2450 STRCPY(retval, s);
2451 retval[len] = '\n';
2452 }
2453 ++len;
2454 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002455 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002456 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002457 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002458 if (round == 2)
2459 STRCPY(retval + len, s);
2460 len += (int)STRLEN(s);
2461 if (round == 2)
2462 retval[len] = '\n';
2463 ++len;
2464 }
2465 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002466 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002467 rsm.sm_mmatch->endpos[no].col);
2468 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002469 if (round == 2)
2470 retval[len] = NUL;
2471 ++len;
2472 }
2473
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002474 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002475 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002476 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002477 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002478 return NULL;
2479 }
2480 }
2481 }
2482 else
2483 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002484 s = rsm.sm_match->startp[no];
2485 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002486 retval = NULL;
2487 else
Bram Moolenaar71ccd032020-06-12 22:59:11 +02002488 retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002489 }
2490
2491 return retval;
2492}
Bram Moolenaar41571762014-04-02 19:00:58 +02002493
2494/*
2495 * Used for the submatch() function with the optional non-zero argument: get
2496 * the list of strings from the n'th submatch in allocated memory with NULs
2497 * represented in NLs.
2498 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2499 * command, for a non-existing submatch and for any error.
2500 */
2501 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002502reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002503{
2504 char_u *s;
2505 linenr_T slnum;
2506 linenr_T elnum;
2507 colnr_T scol;
2508 colnr_T ecol;
2509 int i;
2510 list_T *list;
2511 int error = FALSE;
2512
2513 if (!can_f_submatch || no < 0)
2514 return NULL;
2515
Bram Moolenaar6100d022016-10-02 16:51:57 +02002516 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002517 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002518 slnum = rsm.sm_mmatch->startpos[no].lnum;
2519 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002520 if (slnum < 0 || elnum < 0)
2521 return NULL;
2522
Bram Moolenaar6100d022016-10-02 16:51:57 +02002523 scol = rsm.sm_mmatch->startpos[no].col;
2524 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002525
2526 list = list_alloc();
2527 if (list == NULL)
2528 return NULL;
2529
2530 s = reg_getline_submatch(slnum) + scol;
2531 if (slnum == elnum)
2532 {
2533 if (list_append_string(list, s, ecol - scol) == FAIL)
2534 error = TRUE;
2535 }
2536 else
2537 {
2538 if (list_append_string(list, s, -1) == FAIL)
2539 error = TRUE;
2540 for (i = 1; i < elnum - slnum; i++)
2541 {
2542 s = reg_getline_submatch(slnum + i);
2543 if (list_append_string(list, s, -1) == FAIL)
2544 error = TRUE;
2545 }
2546 s = reg_getline_submatch(elnum);
2547 if (list_append_string(list, s, ecol) == FAIL)
2548 error = TRUE;
2549 }
2550 }
2551 else
2552 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002553 s = rsm.sm_match->startp[no];
2554 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002555 return NULL;
2556 list = list_alloc();
2557 if (list == NULL)
2558 return NULL;
2559 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002560 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002561 error = TRUE;
2562 }
2563
2564 if (error)
2565 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002566 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002567 return NULL;
2568 }
Bram Moolenaar8a0dcf42020-09-06 15:14:45 +02002569 ++list->lv_refcount;
Bram Moolenaar41571762014-04-02 19:00:58 +02002570 return list;
2571}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002572#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002573
Bram Moolenaarf4140482020-02-15 23:06:45 +01002574/*
2575 * Initialize the values used for matching against multiple lines
2576 */
2577 static void
2578init_regexec_multi(
2579 regmmatch_T *rmp,
2580 win_T *win, // window in which to search or NULL
2581 buf_T *buf, // buffer in which to search
2582 linenr_T lnum) // nr of line to start looking for match
2583{
2584 rex.reg_match = NULL;
2585 rex.reg_mmatch = rmp;
2586 rex.reg_buf = buf;
2587 rex.reg_win = win;
2588 rex.reg_firstlnum = lnum;
2589 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
2590 rex.reg_line_lbr = FALSE;
2591 rex.reg_ic = rmp->rmm_ic;
2592 rex.reg_icombine = FALSE;
2593 rex.reg_maxcol = rmp->rmm_maxcol;
2594}
2595
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002596#include "regexp_bt.c"
2597
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002598static regengine_T bt_regengine =
2599{
2600 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002601 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002602 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002603 bt_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002604};
2605
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002606#include "regexp_nfa.c"
2607
2608static regengine_T nfa_regengine =
2609{
2610 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002611 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002612 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002613 nfa_regexec_multi,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002614};
2615
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002616// Which regexp engine to use? Needed for vim_regcomp().
2617// Must match with 'regexpengine'.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002618static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002619
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002620#ifdef DEBUG
2621static char_u regname[][30] = {
2622 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002623 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002624 "NFA Regexp Engine"
2625 };
2626#endif
2627
2628/*
2629 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002630 * Returns the program in allocated memory.
2631 * Use vim_regfree() to free the memory.
2632 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002633 */
2634 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002635vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002636{
2637 regprog_T *prog = NULL;
2638 char_u *expr = expr_arg;
Bram Moolenaar53989552019-12-23 22:59:18 +01002639 int called_emsg_before;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002640
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002641 regexp_engine = p_re;
2642
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002643 // Check for prefix "\%#=", that sets the regexp engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002644 if (STRNCMP(expr, "\\%#=", 4) == 0)
2645 {
2646 int newengine = expr[4] - '0';
2647
2648 if (newengine == AUTOMATIC_ENGINE
2649 || newengine == BACKTRACKING_ENGINE
2650 || newengine == NFA_ENGINE)
2651 {
2652 regexp_engine = expr[4] - '0';
2653 expr += 5;
2654#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002655 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002656 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002657#endif
2658 }
2659 else
2660 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002661 emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002662 regexp_engine = AUTOMATIC_ENGINE;
2663 }
2664 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002665#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002666 bt_regengine.expr = expr;
2667 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002668#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002669 // reg_iswordc() uses rex.reg_buf
2670 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002671
2672 /*
2673 * First try the NFA engine, unless backtracking was requested.
2674 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002675 called_emsg_before = called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002676 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002677 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002678 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002679 else
2680 prog = bt_regengine.regcomp(expr, re_flags);
2681
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002682 // Check for error compiling regexp with initial engine.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002683 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002684 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002685#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002686 if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002687 {
2688 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002689 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002690 if (f)
2691 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002692 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002693 fclose(f);
2694 }
2695 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002696 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002697 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002698 }
2699#endif
2700 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002701 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002702 * The NFA engine also fails for patterns that it can't handle well
2703 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002704 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002705 */
Bram Moolenaar53989552019-12-23 22:59:18 +01002706 if (regexp_engine == AUTOMATIC_ENGINE
2707 && called_emsg == called_emsg_before)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002708 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002709 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaar66c50c52021-01-02 17:43:49 +01002710#ifdef FEAT_EVAL
2711 report_re_switch(expr);
2712#endif
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002713 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002714 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002715 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002716
Bram Moolenaarfda37292014-11-05 14:27:36 +01002717 if (prog != NULL)
2718 {
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002719 // Store the info needed to call regcomp() again when the engine turns
2720 // out to be very slow when executing it.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002721 prog->re_engine = regexp_engine;
2722 prog->re_flags = re_flags;
2723 }
2724
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002725 return prog;
2726}
2727
2728/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002729 * Free a compiled regexp program, returned by vim_regcomp().
2730 */
2731 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002732vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002733{
2734 if (prog != NULL)
2735 prog->engine->regfree(prog);
2736}
2737
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002738#if defined(EXITFREE) || defined(PROTO)
2739 void
2740free_regexp_stuff(void)
2741{
2742 ga_clear(&regstack);
2743 ga_clear(&backpos);
2744 vim_free(reg_tofree);
2745 vim_free(reg_prev_sub);
2746}
2747#endif
2748
Bram Moolenaarfda37292014-11-05 14:27:36 +01002749#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002750 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002751report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002752{
2753 if (p_verbose > 0)
2754 {
2755 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002756 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2757 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002758 verbose_leave();
2759 }
2760}
2761#endif
2762
Bram Moolenaar651fca82021-11-29 20:39:38 +00002763#if defined(FEAT_X11) || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002764/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002765 * Return whether "prog" is currently being executed.
2766 */
2767 int
2768regprog_in_use(regprog_T *prog)
2769{
2770 return prog->re_in_use;
2771}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002772#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002773
2774/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002775 * Match a regexp against a string.
Bram Moolenaar4aaf3e72022-05-30 20:58:55 +01002776 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002777 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002778 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002779 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002780 *
2781 * Return TRUE if there is a match, FALSE if not.
2782 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002783 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002784vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002785 regmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002786 char_u *line, // string to match against
2787 colnr_T col, // column to start looking for match
Bram Moolenaar05540972016-01-30 20:31:25 +01002788 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002789{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002790 int result;
2791 regexec_T rex_save;
2792 int rex_in_use_save = rex_in_use;
2793
Bram Moolenaar0270f382018-07-17 05:43:58 +02002794 // Cannot use the same prog recursively, it contains state.
2795 if (rmp->regprog->re_in_use)
2796 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002797 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002798 return FALSE;
2799 }
2800 rmp->regprog->re_in_use = TRUE;
2801
Bram Moolenaar6100d022016-10-02 16:51:57 +02002802 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002803 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002804 rex_save = rex;
2805 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002806
Bram Moolenaar6100d022016-10-02 16:51:57 +02002807 rex.reg_startp = NULL;
2808 rex.reg_endp = NULL;
2809 rex.reg_startpos = NULL;
2810 rex.reg_endpos = NULL;
2811
2812 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002813 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002814
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002815 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002816 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2817 && result == NFA_TOO_EXPENSIVE)
2818 {
2819 int save_p_re = p_re;
2820 int re_flags = rmp->regprog->re_flags;
2821 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2822
2823 p_re = BACKTRACKING_ENGINE;
2824 vim_regfree(rmp->regprog);
2825 if (pat != NULL)
2826 {
2827#ifdef FEAT_EVAL
2828 report_re_switch(pat);
2829#endif
2830 rmp->regprog = vim_regcomp(pat, re_flags);
2831 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002832 {
2833 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002834 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002835 rmp->regprog->re_in_use = FALSE;
2836 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002837 vim_free(pat);
2838 }
2839
2840 p_re = save_p_re;
2841 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002842
2843 rex_in_use = rex_in_use_save;
2844 if (rex_in_use)
2845 rex = rex_save;
2846
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002847 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002848}
2849
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002850/*
2851 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002852 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002853 */
2854 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002855vim_regexec_prog(
2856 regprog_T **prog,
2857 int ignore_case,
2858 char_u *line,
2859 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002860{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002861 int r;
2862 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002863
2864 regmatch.regprog = *prog;
2865 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002866 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002867 *prog = regmatch.regprog;
2868 return r;
2869}
2870
2871/*
2872 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002873 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002874 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002875 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002876vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002877{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002878 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002879}
2880
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002881/*
2882 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002883 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002884 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002885 */
2886 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002887vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002888{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002889 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002890}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002891
2892/*
2893 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002894 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2895 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002896 * Uses curbuf for line count and 'iskeyword'.
2897 *
2898 * Return zero if there is no match. Return number of lines contained in the
2899 * match otherwise.
2900 */
2901 long
Bram Moolenaar05540972016-01-30 20:31:25 +01002902vim_regexec_multi(
2903 regmmatch_T *rmp,
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002904 win_T *win, // window in which to search or NULL
2905 buf_T *buf, // buffer in which to search
2906 linenr_T lnum, // nr of line to start looking for match
2907 colnr_T col, // column to start looking for match
2908 proftime_T *tm, // timeout limit or NULL
2909 int *timed_out) // flag is set when timeout limit reached
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002910{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002911 int result;
2912 regexec_T rex_save;
2913 int rex_in_use_save = rex_in_use;
2914
Bram Moolenaar0270f382018-07-17 05:43:58 +02002915 // Cannot use the same prog recursively, it contains state.
2916 if (rmp->regprog->re_in_use)
2917 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00002918 emsg(_(e_cannot_use_pattern_recursively));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002919 return FALSE;
2920 }
2921 rmp->regprog->re_in_use = TRUE;
2922
Bram Moolenaar6100d022016-10-02 16:51:57 +02002923 if (rex_in_use)
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002924 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002925 rex_save = rex;
2926 rex_in_use = TRUE;
2927
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002928 result = rmp->regprog->engine->regexec_multi(
2929 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002930 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002931
Bram Moolenaar63d9e732019-12-05 21:10:38 +01002932 // NFA engine aborted because it's very slow.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002933 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2934 && result == NFA_TOO_EXPENSIVE)
2935 {
2936 int save_p_re = p_re;
2937 int re_flags = rmp->regprog->re_flags;
2938 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2939
2940 p_re = BACKTRACKING_ENGINE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002941 if (pat != NULL)
2942 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002943 regprog_T *prev_prog = rmp->regprog;
2944
Bram Moolenaarfda37292014-11-05 14:27:36 +01002945#ifdef FEAT_EVAL
2946 report_re_switch(pat);
2947#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002948#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002949 // checking for \z misuse was already done when compiling for NFA,
2950 // allow all here
2951 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002952#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01002953 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002954#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002955 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002956#endif
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002957 if (rmp->regprog == NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002958 {
Bram Moolenaare8a4c0d2022-04-04 18:14:34 +01002959 // Somehow compiling the pattern failed now, put back the
2960 // previous one to avoid "regprog" becoming NULL.
2961 rmp->regprog = prev_prog;
2962 }
2963 else
2964 {
2965 vim_regfree(prev_prog);
2966
Bram Moolenaar41499802018-07-18 06:02:09 +02002967 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002968 result = rmp->regprog->engine->regexec_multi(
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002969 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002970 rmp->regprog->re_in_use = FALSE;
2971 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002972 vim_free(pat);
2973 }
2974 p_re = save_p_re;
2975 }
2976
Bram Moolenaar6100d022016-10-02 16:51:57 +02002977 rex_in_use = rex_in_use_save;
2978 if (rex_in_use)
2979 rex = rex_save;
2980
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002981 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002982}