blob: b952315b21c0b44c3ee8c719e531fc65f0e59eca [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
15/* show/save debugging data when BT engine is used */
16# define BT_REGEXP_DUMP
17/* save the debugging data to a file instead of displaying it */
18# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
23/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000024 * Magic characters have a special meaning, they don't match literally.
25 * Magic characters are negative. This separates them from literal characters
26 * (possibly multi-byte). Only ASCII characters can be Magic.
27 */
28#define Magic(x) ((int)(x) - 256)
29#define un_Magic(x) ((x) + 256)
30#define is_Magic(x) ((x) < 0)
31
Bram Moolenaar071d4272004-06-13 20:20:40 +000032 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010033no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000034{
35 if (is_Magic(x))
36 return un_Magic(x);
37 return x;
38}
39
40 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010041toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000042{
43 if (is_Magic(x))
44 return un_Magic(x);
45 return Magic(x);
46}
47
48/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020049 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +000050 * number; the start node begins in the second byte. It's used to catch the
51 * most severe mutilation of the program by the caller.
52 */
53
54#define REGMAGIC 0234
55
56/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000057 * Utility definitions.
58 */
59#define UCHARAT(p) ((int)*(char_u *)(p))
60
61/* Used for an error (down from) vim_regcomp(): give the error message, set
62 * rc_did_emsg and return NULL */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010063#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
64#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
65#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
66#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +010067#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010068#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020069#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +000070
Bram Moolenaar95f09602016-11-10 20:01:45 +010071
Bram Moolenaar071d4272004-06-13 20:20:40 +000072#define MAX_LIMIT (32767L << 16L)
73
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020074static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
Bram Moolenaar966e58e2017-06-05 16:54:08 +020075static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
76static char_u e_large_class[] = N_("E945: Range too large in character class");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020077static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
78static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
79static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +020080#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +020081static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
Bram Moolenaarbcf94422018-06-23 14:21:42 +020082static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +020083#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +020084static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +020085static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar0270f382018-07-17 05:43:58 +020086static char_u e_recursive[] = N_("E956: Cannot use pattern recursively");
87
Bram Moolenaar071d4272004-06-13 20:20:40 +000088#define NOT_MULTI 0
89#define MULTI_ONE 1
90#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020091
92// return values for regmatch()
93#define RA_FAIL 1 /* something failed, abort */
94#define RA_CONT 2 /* continue in inner loop */
95#define RA_BREAK 3 /* break inner loop */
96#define RA_MATCH 4 /* successful match */
97#define RA_NOMATCH 5 /* didn't match */
98
Bram Moolenaar071d4272004-06-13 20:20:40 +000099/*
100 * Return NOT_MULTI if c is not a "multi" operator.
101 * Return MULTI_ONE if c is a single "multi" operator.
102 * Return MULTI_MULT if c is a multi "multi" operator.
103 */
104 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100105re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106{
107 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
108 return MULTI_ONE;
109 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
110 return MULTI_MULT;
111 return NOT_MULTI;
112}
113
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000114static char_u *reg_prev_sub = NULL;
115
Bram Moolenaar071d4272004-06-13 20:20:40 +0000116/*
117 * REGEXP_INRANGE contains all characters which are always special in a []
118 * range after '\'.
119 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
120 * These are:
121 * \n - New line (NL).
122 * \r - Carriage Return (CR).
123 * \t - Tab (TAB).
124 * \e - Escape (ESC).
125 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000126 * \d - Character code in decimal, eg \d123
127 * \o - Character code in octal, eg \o80
128 * \x - Character code in hex, eg \x4a
129 * \u - Multibyte character code, eg \u20ac
130 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000131 */
132static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000133static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000134
Bram Moolenaar071d4272004-06-13 20:20:40 +0000135/*
136 * Translate '\x' to its control character, except "\n", which is Magic.
137 */
138 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100139backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140{
141 switch (c)
142 {
143 case 'r': return CAR;
144 case 't': return TAB;
145 case 'e': return ESC;
146 case 'b': return BS;
147 }
148 return c;
149}
150
151/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000152 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000153 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
154 * recognized. Otherwise "pp" is advanced to after the item.
155 */
156 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100157get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000158{
159 static const char *(class_names[]) =
160 {
161 "alnum:]",
162#define CLASS_ALNUM 0
163 "alpha:]",
164#define CLASS_ALPHA 1
165 "blank:]",
166#define CLASS_BLANK 2
167 "cntrl:]",
168#define CLASS_CNTRL 3
169 "digit:]",
170#define CLASS_DIGIT 4
171 "graph:]",
172#define CLASS_GRAPH 5
173 "lower:]",
174#define CLASS_LOWER 6
175 "print:]",
176#define CLASS_PRINT 7
177 "punct:]",
178#define CLASS_PUNCT 8
179 "space:]",
180#define CLASS_SPACE 9
181 "upper:]",
182#define CLASS_UPPER 10
183 "xdigit:]",
184#define CLASS_XDIGIT 11
185 "tab:]",
186#define CLASS_TAB 12
187 "return:]",
188#define CLASS_RETURN 13
189 "backspace:]",
190#define CLASS_BACKSPACE 14
191 "escape:]",
192#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100193 "ident:]",
194#define CLASS_IDENT 16
195 "keyword:]",
196#define CLASS_KEYWORD 17
197 "fname:]",
198#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000199 };
200#define CLASS_NONE 99
201 int i;
202
203 if ((*pp)[1] == ':')
204 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000205 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000206 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
207 {
208 *pp += STRLEN(class_names[i]) + 2;
209 return i;
210 }
211 }
212 return CLASS_NONE;
213}
214
215/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000216 * Specific version of character class functions.
217 * Using a table to keep this fast.
218 */
219static short class_tab[256];
220
221#define RI_DIGIT 0x01
222#define RI_HEX 0x02
223#define RI_OCTAL 0x04
224#define RI_WORD 0x08
225#define RI_HEAD 0x10
226#define RI_ALPHA 0x20
227#define RI_LOWER 0x40
228#define RI_UPPER 0x80
229#define RI_WHITE 0x100
230
231 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100232init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000233{
234 int i;
235 static int done = FALSE;
236
237 if (done)
238 return;
239
240 for (i = 0; i < 256; ++i)
241 {
242 if (i >= '0' && i <= '7')
243 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
244 else if (i >= '8' && i <= '9')
245 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
246 else if (i >= 'a' && i <= 'f')
247 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
248#ifdef EBCDIC
249 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
250 || (i >= 's' && i <= 'z'))
251#else
252 else if (i >= 'g' && i <= 'z')
253#endif
254 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
255 else if (i >= 'A' && i <= 'F')
256 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
257#ifdef EBCDIC
258 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
259 || (i >= 'S' && i <= 'Z'))
260#else
261 else if (i >= 'G' && i <= 'Z')
262#endif
263 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
264 else if (i == '_')
265 class_tab[i] = RI_WORD + RI_HEAD;
266 else
267 class_tab[i] = 0;
268 }
269 class_tab[' '] |= RI_WHITE;
270 class_tab['\t'] |= RI_WHITE;
271 done = TRUE;
272}
273
Bram Moolenaara12a1612019-01-24 16:39:02 +0100274#define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
275#define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
276#define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
277#define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
278#define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
279#define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
280#define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
281#define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
282#define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283
284/* flags for regflags */
285#define RF_ICASE 1 /* ignore case */
286#define RF_NOICASE 2 /* don't ignore case */
287#define RF_HASNL 4 /* can match a NL */
288#define RF_ICOMBINE 8 /* ignore combining characters */
289#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
290
291/*
292 * Global work variables for vim_regcomp().
293 */
294
295static char_u *regparse; /* Input-scan pointer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296static int regnpar; /* () count. */
297#ifdef FEAT_SYN_HL
298static int regnzpar; /* \z() count. */
299static int re_has_z; /* \z item detected */
300#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000301static unsigned regflags; /* RF_ flags for prog */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000302#if defined(FEAT_SYN_HL) || defined(PROTO)
303static int had_eol; /* TRUE when EOL found by vim_regcomp() */
304#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000305
306static int reg_magic; /* magicness of the pattern: */
307#define MAGIC_NONE 1 /* "\V" very unmagic */
308#define MAGIC_OFF 2 /* "\M" or 'magic' off */
309#define MAGIC_ON 3 /* "\m" or 'magic' */
310#define MAGIC_ALL 4 /* "\v" very magic */
311
312static int reg_string; /* matching with a string instead of a buffer
313 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000314static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000315
316/*
317 * META contains all characters that may be magic, except '^' and '$'.
318 */
319
320#ifdef EBCDIC
321static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
322#else
323/* META[] is used often enough to justify turning it into a table. */
324static char_u META_flags[] = {
325 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
327/* % & ( ) * + . */
328 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
329/* 1 2 3 4 5 6 7 8 9 < = > ? */
330 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
331/* @ A C D F H I K L M O */
332 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
333/* P S U V W X Z [ _ */
334 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
335/* a c d f h i k l m n o */
336 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
337/* p s u v w x z { | ~ */
338 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
339};
340#endif
341
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200342static int curchr; /* currently parsed character */
343/* Previous character. Note: prevchr is sometimes -1 when we are not at the
344 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
345 * because ^ was taken to be magic -- webb */
346static int prevchr;
347static int prevprevchr; /* previous-previous character */
348static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000349
350/* arguments for reg() */
351#define REG_NOPAREN 0 /* toplevel reg() */
352#define REG_PAREN 1 /* \(\) */
353#define REG_ZPAREN 2 /* \z(\) */
354#define REG_NPAREN 3 /* \%(\) */
355
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200356typedef struct
357{
358 char_u *regparse;
359 int prevchr_len;
360 int curchr;
361 int prevchr;
362 int prevprevchr;
363 int nextchr;
364 int at_start;
365 int prev_at_start;
366 int regnpar;
367} parse_state_T;
368
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100369static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100370static int getchr(void);
371static void skipchr_keepstart(void);
372static int peekchr(void);
373static void skipchr(void);
374static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100375static long gethexchrs(int maxinputlen);
376static long getoctchrs(void);
377static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100378static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100379static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200380static int cstrncmp(char_u *s1, char_u *s2, int *n);
381static char_u *cstrchr(char_u *, int);
382static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100383static int reg_iswordc(int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200385static regengine_T bt_regengine;
386static regengine_T nfa_regengine;
387
Bram Moolenaar071d4272004-06-13 20:20:40 +0000388/*
389 * Return TRUE if compiled regular expression "prog" can match a line break.
390 */
391 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100392re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393{
394 return (prog->regflags & RF_HASNL);
395}
396
397/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000398 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
399 * Returns a character representing the class. Zero means that no item was
400 * recognized. Otherwise "pp" is advanced to after the item.
401 */
402 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100403get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000404{
405 int c;
406 int l = 1;
407 char_u *p = *pp;
408
Bram Moolenaar985079c2019-02-16 17:07:47 +0100409 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000410 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000411 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000412 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000413 if (p[l + 2] == '=' && p[l + 3] == ']')
414 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000415 if (has_mbyte)
416 c = mb_ptr2char(p + 2);
417 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000418 c = p[2];
419 *pp += l + 4;
420 return c;
421 }
422 }
423 return 0;
424}
425
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200426#ifdef EBCDIC
427/*
428 * Table for equivalence class "c". (IBM-1047)
429 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +0200430static char *EQUIVAL_CLASS_C[16] = {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200431 "A\x62\x63\x64\x65\x66\x67",
432 "C\x68",
433 "E\x71\x72\x73\x74",
434 "I\x75\x76\x77\x78",
435 "N\x69",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200436 "O\xEB\xEC\xED\xEE\xEF\x80",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200437 "U\xFB\xFC\xFD\xFE",
438 "Y\xBA",
439 "a\x42\x43\x44\x45\x46\x47",
440 "c\x48",
441 "e\x51\x52\x53\x54",
442 "i\x55\x56\x57\x58",
443 "n\x49",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200444 "o\xCB\xCC\xCD\xCE\xCF\x70",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200445 "u\xDB\xDC\xDD\xDE",
446 "y\x8D\xDF",
447};
448#endif
449
Bram Moolenaardf177f62005-02-22 08:39:57 +0000450/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000451 * Check for a collating element "[.a.]". "pp" points to the '['.
452 * Returns a character. Zero means that no item was recognized. Otherwise
453 * "pp" is advanced to after the item.
454 * Currently only single characters are recognized!
455 */
456 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100457get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000458{
459 int c;
460 int l = 1;
461 char_u *p = *pp;
462
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100463 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000464 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000465 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000466 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000467 if (p[l + 2] == '.' && p[l + 3] == ']')
468 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469 if (has_mbyte)
470 c = mb_ptr2char(p + 2);
471 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000472 c = p[2];
473 *pp += l + 4;
474 return c;
475 }
476 }
477 return 0;
478}
479
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200480static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
481static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
482
483 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100484get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200485{
486 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
487 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
488}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000489
490/*
491 * Skip over a "[]" range.
492 * "p" must point to the character after the '['.
493 * The returned pointer is on the matching ']', or the terminating NUL.
494 */
495 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100496skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000497{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000498 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000499
Bram Moolenaardf177f62005-02-22 08:39:57 +0000500 if (*p == '^') /* Complement of range. */
501 ++p;
502 if (*p == ']' || *p == '-')
503 ++p;
504 while (*p != NUL && *p != ']')
505 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000506 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000507 p += l;
508 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000509 if (*p == '-')
510 {
511 ++p;
512 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100513 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000514 }
515 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200516 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000517 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200518 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000519 p += 2;
520 else if (*p == '[')
521 {
522 if (get_char_class(&p) == CLASS_NONE
523 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200524 && get_coll_element(&p) == 0
525 && *p != NUL)
526 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +0000527 }
528 else
529 ++p;
530 }
531
532 return p;
533}
534
535/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000536 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +0000537 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000538 * Take care of characters with a backslash in front of it.
539 * Skip strings inside [ and ].
540 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
541 * expression and change "\?" to "?". If "*newp" is not NULL the expression
542 * is changed in-place.
543 */
544 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100545skip_regexp(
546 char_u *startp,
547 int dirc,
548 int magic,
549 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550{
551 int mymagic;
552 char_u *p = startp;
553
554 if (magic)
555 mymagic = MAGIC_ON;
556 else
557 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200558 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000559
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100560 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000561 {
562 if (p[0] == dirc) /* found end of regexp */
563 break;
564 if ((p[0] == '[' && mymagic >= MAGIC_ON)
565 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
566 {
567 p = skip_anyof(p + 1);
568 if (p[0] == NUL)
569 break;
570 }
571 else if (p[0] == '\\' && p[1] != NUL)
572 {
573 if (dirc == '?' && newp != NULL && p[1] == '?')
574 {
575 /* change "\?" to "?", make a copy first. */
576 if (*newp == NULL)
577 {
578 *newp = vim_strsave(startp);
579 if (*newp != NULL)
580 p = *newp + (p - startp);
581 }
582 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000583 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000584 else
585 ++p;
586 }
587 else
588 ++p; /* skip next character */
589 if (*p == 'v')
590 mymagic = MAGIC_ALL;
591 else if (*p == 'V')
592 mymagic = MAGIC_NONE;
593 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000594 }
595 return p;
596}
597
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200598/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200599 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200600 */
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200601static int prevchr_len; /* byte length of previous char */
Bram Moolenaar0270f382018-07-17 05:43:58 +0200602static int at_start; // True when on the first character
603static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100604
Bram Moolenaar071d4272004-06-13 20:20:40 +0000605/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200606 * Start parsing at "str".
607 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000608 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100609initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000610{
611 regparse = str;
612 prevchr_len = 0;
613 curchr = prevprevchr = prevchr = nextchr = -1;
614 at_start = TRUE;
615 prev_at_start = FALSE;
616}
617
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200618/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200619 * Save the current parse state, so that it can be restored and parsing
620 * starts in the same state again.
621 */
622 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100623save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200624{
625 ps->regparse = regparse;
626 ps->prevchr_len = prevchr_len;
627 ps->curchr = curchr;
628 ps->prevchr = prevchr;
629 ps->prevprevchr = prevprevchr;
630 ps->nextchr = nextchr;
631 ps->at_start = at_start;
632 ps->prev_at_start = prev_at_start;
633 ps->regnpar = regnpar;
634}
635
636/*
637 * Restore a previously saved parse state.
638 */
639 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100640restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200641{
642 regparse = ps->regparse;
643 prevchr_len = ps->prevchr_len;
644 curchr = ps->curchr;
645 prevchr = ps->prevchr;
646 prevprevchr = ps->prevprevchr;
647 nextchr = ps->nextchr;
648 at_start = ps->at_start;
649 prev_at_start = ps->prev_at_start;
650 regnpar = ps->regnpar;
651}
652
653
654/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200655 * Get the next character without advancing.
656 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000657 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100658peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000659{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000660 static int after_slash = FALSE;
661
Bram Moolenaar071d4272004-06-13 20:20:40 +0000662 if (curchr == -1)
663 {
664 switch (curchr = regparse[0])
665 {
666 case '.':
667 case '[':
668 case '~':
669 /* magic when 'magic' is on */
670 if (reg_magic >= MAGIC_ON)
671 curchr = Magic(curchr);
672 break;
673 case '(':
674 case ')':
675 case '{':
676 case '%':
677 case '+':
678 case '=':
679 case '?':
680 case '@':
681 case '!':
682 case '&':
683 case '|':
684 case '<':
685 case '>':
686 case '#': /* future ext. */
687 case '"': /* future ext. */
688 case '\'': /* future ext. */
689 case ',': /* future ext. */
690 case '-': /* future ext. */
691 case ':': /* future ext. */
692 case ';': /* future ext. */
693 case '`': /* future ext. */
694 case '/': /* Can't be used in / command */
695 /* magic only after "\v" */
696 if (reg_magic == MAGIC_ALL)
697 curchr = Magic(curchr);
698 break;
699 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +0000700 /* * is not magic as the very first character, eg "?*ptr", when
701 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
702 * "\(\*" is not magic, thus must be magic if "after_slash" */
703 if (reg_magic >= MAGIC_ON
704 && !at_start
705 && !(prev_at_start && prevchr == Magic('^'))
706 && (after_slash
707 || (prevchr != Magic('(')
708 && prevchr != Magic('&')
709 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000710 curchr = Magic('*');
711 break;
712 case '^':
713 /* '^' is only magic as the very first character and if it's after
714 * "\(", "\|", "\&' or "\n" */
715 if (reg_magic >= MAGIC_OFF
716 && (at_start
717 || reg_magic == MAGIC_ALL
718 || prevchr == Magic('(')
719 || prevchr == Magic('|')
720 || prevchr == Magic('&')
721 || prevchr == Magic('n')
722 || (no_Magic(prevchr) == '('
723 && prevprevchr == Magic('%'))))
724 {
725 curchr = Magic('^');
726 at_start = TRUE;
727 prev_at_start = FALSE;
728 }
729 break;
730 case '$':
731 /* '$' is only magic as the very last char and if it's in front of
732 * either "\|", "\)", "\&", or "\n" */
733 if (reg_magic >= MAGIC_OFF)
734 {
735 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200736 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000737
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200738 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000739 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200740 || p[1] == 'm' || p[1] == 'M'
741 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
742 {
743 if (p[1] == 'v')
744 is_magic_all = TRUE;
745 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
746 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000747 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200748 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000749 if (p[0] == NUL
750 || (p[0] == '\\'
751 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
752 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200753 || (is_magic_all
754 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000755 || reg_magic == MAGIC_ALL)
756 curchr = Magic('$');
757 }
758 break;
759 case '\\':
760 {
761 int c = regparse[1];
762
763 if (c == NUL)
764 curchr = '\\'; /* trailing '\' */
765 else if (
766#ifdef EBCDIC
767 vim_strchr(META, c)
768#else
769 c <= '~' && META_flags[c]
770#endif
771 )
772 {
773 /*
774 * META contains everything that may be magic sometimes,
775 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200776 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777 * magicness. Therefore, \ is so meta-magic that it is
778 * not in META.
779 */
780 curchr = -1;
781 prev_at_start = at_start;
782 at_start = FALSE; /* be able to say "/\*ptr" */
783 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000784 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000785 peekchr();
786 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000787 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000788 curchr = toggle_Magic(curchr);
789 }
790 else if (vim_strchr(REGEXP_ABBR, c))
791 {
792 /*
793 * Handle abbreviations, like "\t" for TAB -- webb
794 */
795 curchr = backslash_trans(c);
796 }
797 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
798 curchr = toggle_Magic(c);
799 else
800 {
801 /*
802 * Next character can never be (made) magic?
803 * Then backslashing it won't do anything.
804 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000805 if (has_mbyte)
806 curchr = (*mb_ptr2char)(regparse + 1);
807 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000808 curchr = c;
809 }
810 break;
811 }
812
Bram Moolenaar071d4272004-06-13 20:20:40 +0000813 default:
814 if (has_mbyte)
815 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000816 }
817 }
818
819 return curchr;
820}
821
822/*
823 * Eat one lexed character. Do this in a way that we can undo it.
824 */
825 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100826skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000827{
828 /* peekchr() eats a backslash, do the same here */
829 if (*regparse == '\\')
830 prevchr_len = 1;
831 else
832 prevchr_len = 0;
833 if (regparse[prevchr_len] != NUL)
834 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000835 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000836 /* exclude composing chars that mb_ptr2len does include */
837 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000838 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000839 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000841 ++prevchr_len;
842 }
843 regparse += prevchr_len;
844 prev_at_start = at_start;
845 at_start = FALSE;
846 prevprevchr = prevchr;
847 prevchr = curchr;
848 curchr = nextchr; /* use previously unget char, or -1 */
849 nextchr = -1;
850}
851
852/*
853 * Skip a character while keeping the value of prev_at_start for at_start.
854 * prevchr and prevprevchr are also kept.
855 */
856 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100857skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000858{
859 int as = prev_at_start;
860 int pr = prevchr;
861 int prpr = prevprevchr;
862
863 skipchr();
864 at_start = as;
865 prevchr = pr;
866 prevprevchr = prpr;
867}
868
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200869/*
870 * Get the next character from the pattern. We know about magic and such, so
871 * therefore we need a lexical analyzer.
872 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000873 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100874getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875{
876 int chr = peekchr();
877
878 skipchr();
879 return chr;
880}
881
882/*
883 * put character back. Works only once!
884 */
885 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100886ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887{
888 nextchr = curchr;
889 curchr = prevchr;
890 prevchr = prevprevchr;
891 at_start = prev_at_start;
892 prev_at_start = FALSE;
893
894 /* Backup regparse, so that it's at the same position as before the
895 * getchr(). */
896 regparse -= prevchr_len;
897}
898
899/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000900 * Get and return the value of the hex string at the current position.
901 * Return -1 if there is no valid hex number.
902 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000903 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000904 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000905 * The parameter controls the maximum number of input characters. This will be
906 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
907 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100908 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100909gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000910{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100911 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000912 int c;
913 int i;
914
915 for (i = 0; i < maxinputlen; ++i)
916 {
917 c = regparse[0];
918 if (!vim_isxdigit(c))
919 break;
920 nr <<= 4;
921 nr |= hex2nr(c);
922 ++regparse;
923 }
924
925 if (i == 0)
926 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100927 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000928}
929
930/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200931 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000932 * current position. Return -1 for invalid. Consumes all digits.
933 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100934 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100935getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000936{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100937 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000938 int c;
939 int i;
940
941 for (i = 0; ; ++i)
942 {
943 c = regparse[0];
944 if (c < '0' || c > '9')
945 break;
946 nr *= 10;
947 nr += c - '0';
948 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200949 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000950 }
951
952 if (i == 0)
953 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100954 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000955}
956
957/*
958 * get and return the value of the octal string immediately after the current
959 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
960 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
961 * treat 8 or 9 as recognised characters. Position is updated:
962 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000963 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000964 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100965 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100966getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000967{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100968 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000969 int c;
970 int i;
971
972 for (i = 0; i < 3 && nr < 040; ++i)
973 {
974 c = regparse[0];
975 if (c < '0' || c > '7')
976 break;
977 nr <<= 3;
978 nr |= hex2nr(c);
979 ++regparse;
980 }
981
982 if (i == 0)
983 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100984 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000985}
986
987/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000988 * read_limits - Read two integers to be taken as a minimum and maximum.
989 * If the first character is '-', then the range is reversed.
990 * Should end with 'end'. If minval is missing, zero is default, if maxval is
991 * missing, a very big number is the default.
992 */
993 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100994read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995{
996 int reverse = FALSE;
997 char_u *first_char;
998 long tmp;
999
1000 if (*regparse == '-')
1001 {
1002 /* Starts with '-', so reverse the range later */
1003 regparse++;
1004 reverse = TRUE;
1005 }
1006 first_char = regparse;
1007 *minval = getdigits(&regparse);
1008 if (*regparse == ',') /* There is a comma */
1009 {
1010 if (vim_isdigit(*++regparse))
1011 *maxval = getdigits(&regparse);
1012 else
1013 *maxval = MAX_LIMIT;
1014 }
1015 else if (VIM_ISDIGIT(*first_char))
1016 *maxval = *minval; /* It was \{n} or \{-n} */
1017 else
1018 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
1019 if (*regparse == '\\')
1020 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001021 if (*regparse != '}')
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001022 EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"),
1023 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001024
1025 /*
1026 * Reverse the range if there was a '-', or make sure it is in the right
1027 * order otherwise.
1028 */
1029 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1030 {
1031 tmp = *minval;
1032 *minval = *maxval;
1033 *maxval = tmp;
1034 }
1035 skipchr(); /* let's be friends with the lexer again */
1036 return OK;
1037}
1038
1039/*
1040 * vim_regexec and friends
1041 */
1042
1043/*
1044 * Global work variables for vim_regexec().
1045 */
1046
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001047static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001048#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001049static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001050#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001051static void reg_nextline(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001052static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001053
1054/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1056 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001057 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001058 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001059static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001060static unsigned reg_tofreelen;
1061
1062/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001063 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001064 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001065 * done:
1066 * single-line multi-line
1067 * reg_match &regmatch_T NULL
1068 * reg_mmatch NULL &regmmatch_T
1069 * reg_startp reg_match->startp <invalid>
1070 * reg_endp reg_match->endp <invalid>
1071 * reg_startpos <invalid> reg_mmatch->startpos
1072 * reg_endpos <invalid> reg_mmatch->endpos
1073 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001074 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001075 * reg_firstlnum <invalid> first line in which to search
1076 * reg_maxline 0 last line nr
1077 * reg_line_lbr FALSE or TRUE FALSE
1078 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001079typedef struct {
1080 regmatch_T *reg_match;
1081 regmmatch_T *reg_mmatch;
1082 char_u **reg_startp;
1083 char_u **reg_endp;
1084 lpos_T *reg_startpos;
1085 lpos_T *reg_endpos;
1086 win_T *reg_win;
1087 buf_T *reg_buf;
1088 linenr_T reg_firstlnum;
1089 linenr_T reg_maxline;
1090 int reg_line_lbr; /* "\n" in string is line break */
1091
Bram Moolenaar0270f382018-07-17 05:43:58 +02001092 // The current match-position is stord in these variables:
1093 linenr_T lnum; // line number, relative to first line
1094 char_u *line; // start of current line
1095 char_u *input; // current input, points into "regline"
1096
1097 int need_clear_subexpr; // subexpressions still need to be cleared
1098#ifdef FEAT_SYN_HL
1099 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1100 // cleared
1101#endif
1102
Bram Moolenaar6100d022016-10-02 16:51:57 +02001103 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1104 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1105 * contains '\c' or '\C' the value is overruled. */
1106 int reg_ic;
1107
Bram Moolenaar0270f382018-07-17 05:43:58 +02001108 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z
Bram Moolenaar6100d022016-10-02 16:51:57 +02001109 * flag in the regexp. Defaults to false, always. */
1110 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001111
1112 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1113 * there is no maximum. */
1114 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001115
1116 // State for the NFA engine regexec.
1117 int nfa_has_zend; // NFA regexp \ze operator encountered.
1118 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1119 int nfa_nsubexpr; // Number of sub expressions actually being used
1120 // during execution. 1 if only the whole match
1121 // (subexpr 0) is used.
1122 // listid is global, so that it increases on recursive calls to
1123 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1124 // all the states.
1125 int nfa_listid;
1126 int nfa_alt_listid;
1127
1128#ifdef FEAT_SYN_HL
1129 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1130#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001131} regexec_T;
1132
1133static regexec_T rex;
1134static int rex_in_use = FALSE;
1135
Bram Moolenaar071d4272004-06-13 20:20:40 +00001136/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001137 * Return TRUE if character 'c' is included in 'iskeyword' option for
1138 * "reg_buf" buffer.
1139 */
1140 static int
1141reg_iswordc(int c)
1142{
1143 return vim_iswordc_buf(c, rex.reg_buf);
1144}
1145
1146/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001147 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1148 */
1149 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001150reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001151{
1152 /* when looking behind for a match/no-match lnum is negative. But we
1153 * can't go before line 1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001154 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001155 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001156 if (lnum > rex.reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001157 /* Must have matched the "\n" in the last line. */
1158 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001159 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001160}
1161
Bram Moolenaar071d4272004-06-13 20:20:40 +00001162#ifdef FEAT_SYN_HL
1163static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
1164static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
1165static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
1166static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
1167#endif
1168
1169/* TRUE if using multi-line regexp. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001170#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001171
Bram Moolenaar071d4272004-06-13 20:20:40 +00001172#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001173/*
1174 * Create a new extmatch and mark it as referenced once.
1175 */
1176 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001177make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001178{
1179 reg_extmatch_T *em;
1180
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001181 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001182 if (em != NULL)
1183 em->refcnt = 1;
1184 return em;
1185}
1186
1187/*
1188 * Add a reference to an extmatch.
1189 */
1190 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001191ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192{
1193 if (em != NULL)
1194 em->refcnt++;
1195 return em;
1196}
1197
1198/*
1199 * Remove a reference to an extmatch. If there are no references left, free
1200 * the info.
1201 */
1202 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001203unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001204{
1205 int i;
1206
1207 if (em != NULL && --em->refcnt <= 0)
1208 {
1209 for (i = 0; i < NSUBEXP; ++i)
1210 vim_free(em->matches[i]);
1211 vim_free(em);
1212 }
1213}
1214#endif
1215
1216/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001217 * Get class of previous character.
1218 */
1219 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001220reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001221{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001222 if (rex.input > rex.line)
1223 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001224 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001225 return -1;
1226}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001227
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001228/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001229 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001230 */
1231 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001232reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001233{
1234 pos_T top, bot;
1235 linenr_T lnum;
1236 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001237 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001238 int mode;
1239 colnr_T start, end;
1240 colnr_T start2, end2;
1241 colnr_T cols;
1242
1243 /* Check if the buffer is the current buffer. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001244 if (rex.reg_buf != curbuf || VIsual.lnum == 0)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001245 return FALSE;
1246
1247 if (VIsual_active)
1248 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001249 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001250 {
1251 top = VIsual;
1252 bot = wp->w_cursor;
1253 }
1254 else
1255 {
1256 top = wp->w_cursor;
1257 bot = VIsual;
1258 }
1259 mode = VIsual_mode;
1260 }
1261 else
1262 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001263 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001264 {
1265 top = curbuf->b_visual.vi_start;
1266 bot = curbuf->b_visual.vi_end;
1267 }
1268 else
1269 {
1270 top = curbuf->b_visual.vi_end;
1271 bot = curbuf->b_visual.vi_start;
1272 }
1273 mode = curbuf->b_visual.vi_mode;
1274 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001275 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001276 if (lnum < top.lnum || lnum > bot.lnum)
1277 return FALSE;
1278
1279 if (mode == 'v')
1280 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001281 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001282 if ((lnum == top.lnum && col < top.col)
1283 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1284 return FALSE;
1285 }
1286 else if (mode == Ctrl_V)
1287 {
1288 getvvcol(wp, &top, &start, NULL, &end);
1289 getvvcol(wp, &bot, &start2, NULL, &end2);
1290 if (start2 < start)
1291 start = start2;
1292 if (end2 > end)
1293 end = end2;
1294 if (top.col == MAXCOL || bot.col == MAXCOL)
1295 end = MAXCOL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001296 cols = win_linetabsize(wp, rex.line, (colnr_T)(rex.input - rex.line));
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001297 if (cols < start || cols > end - (*p_sel == 'e'))
1298 return FALSE;
1299 }
1300 return TRUE;
1301}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001302
Bram Moolenaar071d4272004-06-13 20:20:40 +00001303/*
1304 * Check the regexp program for its magic number.
1305 * Return TRUE if it's wrong.
1306 */
1307 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001308prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001309{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001310 regprog_T *prog;
1311
Bram Moolenaar6100d022016-10-02 16:51:57 +02001312 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001313 if (prog->engine == &nfa_regengine)
1314 /* For NFA matcher we don't check the magic */
1315 return FALSE;
1316
1317 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001318 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01001319 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001320 return TRUE;
1321 }
1322 return FALSE;
1323}
1324
1325/*
1326 * Cleanup the subexpressions, if this wasn't done yet.
1327 * This construction is used to clear the subexpressions only when they are
1328 * used (to increase speed).
1329 */
1330 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001331cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001332{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001333 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 {
1335 if (REG_MULTI)
1336 {
1337 /* Use 0xff to set lnum to -1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001338 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1339 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001340 }
1341 else
1342 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001343 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1344 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001345 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001346 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001347 }
1348}
1349
1350#ifdef FEAT_SYN_HL
1351 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001352cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001353{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001354 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001355 {
1356 if (REG_MULTI)
1357 {
1358 /* Use 0xff to set lnum to -1 */
1359 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1360 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1361 }
1362 else
1363 {
1364 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1365 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1366 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001367 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001368 }
1369}
1370#endif
1371
1372/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001373 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374 */
1375 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001376reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001377{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001378 rex.line = reg_getline(++rex.lnum);
1379 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001380 fast_breakcheck();
1381}
1382
1383/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001384 * Check whether a backreference matches.
1385 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001386 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1387 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001388 */
1389 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001390match_with_backref(
1391 linenr_T start_lnum,
1392 colnr_T start_col,
1393 linenr_T end_lnum,
1394 colnr_T end_col,
1395 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001396{
1397 linenr_T clnum = start_lnum;
1398 colnr_T ccol = start_col;
1399 int len;
1400 char_u *p;
1401
1402 if (bytelen != NULL)
1403 *bytelen = 0;
1404 for (;;)
1405 {
1406 /* Since getting one line may invalidate the other, need to make copy.
1407 * Slow! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02001408 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001409 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001410 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001411 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1412 {
1413 len += 50; /* get some extra */
1414 vim_free(reg_tofree);
1415 reg_tofree = alloc(len);
1416 if (reg_tofree == NULL)
1417 return RA_FAIL; /* out of memory!*/
1418 reg_tofreelen = len;
1419 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001420 STRCPY(reg_tofree, rex.line);
1421 rex.input = reg_tofree + (rex.input - rex.line);
1422 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001423 }
1424
1425 /* Get the line to compare with. */
1426 p = reg_getline(clnum);
1427 if (clnum == end_lnum)
1428 len = end_col - ccol;
1429 else
1430 len = (int)STRLEN(p + ccol);
1431
Bram Moolenaar0270f382018-07-17 05:43:58 +02001432 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001433 return RA_NOMATCH; /* doesn't match */
1434 if (bytelen != NULL)
1435 *bytelen += len;
1436 if (clnum == end_lnum)
1437 break; /* match and at end! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02001438 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001439 return RA_NOMATCH; /* text too short */
1440
1441 /* Advance to next line. */
1442 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001443 if (bytelen != NULL)
1444 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001445 ++clnum;
1446 ccol = 0;
1447 if (got_int)
1448 return RA_FAIL;
1449 }
1450
Bram Moolenaar0270f382018-07-17 05:43:58 +02001451 /* found a match! Note that rex.line may now point to a copy of the line,
Bram Moolenaar580abea2013-06-14 20:31:28 +02001452 * that should not matter. */
1453 return RA_MATCH;
1454}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001455
Bram Moolenaarfb031402014-09-09 17:18:49 +02001456/*
1457 * Used in a place where no * or \+ can follow.
1458 */
1459 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001460re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001461{
1462 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001463 {
1464 semsg(_("E888: (NFA regexp) cannot repeat %s"), what);
1465 rc_did_emsg = TRUE;
1466 return FAIL;
1467 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001468 return OK;
1469}
1470
Bram Moolenaar071d4272004-06-13 20:20:40 +00001471typedef struct
1472{
1473 int a, b, c;
1474} decomp_T;
1475
1476
1477/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001478static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001479{
1480 {0x5e2,0,0}, /* 0xfb20 alt ayin */
1481 {0x5d0,0,0}, /* 0xfb21 alt alef */
1482 {0x5d3,0,0}, /* 0xfb22 alt dalet */
1483 {0x5d4,0,0}, /* 0xfb23 alt he */
1484 {0x5db,0,0}, /* 0xfb24 alt kaf */
1485 {0x5dc,0,0}, /* 0xfb25 alt lamed */
1486 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
1487 {0x5e8,0,0}, /* 0xfb27 alt resh */
1488 {0x5ea,0,0}, /* 0xfb28 alt tav */
1489 {'+', 0, 0}, /* 0xfb29 alt plus */
1490 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
1491 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
1492 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
1493 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
1494 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
1495 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
1496 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
1497 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
1498 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
1499 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
1500 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
1501 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
1502 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
1503 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
1504 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
1505 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
1506 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
1507 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
1508 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
1509 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
1510 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
1511 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
1512 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
1513 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
1514 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
1515 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
1516 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
1517 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
1518 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
1519 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
1520 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
1521 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
1522 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
1523 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
1524 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
1525 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
1526 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
1527 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
1528};
1529
1530 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001531mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001532{
1533 decomp_T d;
1534
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001535 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001536 {
1537 d = decomp_table[c - 0xfb20];
1538 *c1 = d.a;
1539 *c2 = d.b;
1540 *c3 = d.c;
1541 }
1542 else
1543 {
1544 *c1 = c;
1545 *c2 = *c3 = 0;
1546 }
1547}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001548
1549/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001550 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001551 * Return 0 if strings match, non-zero otherwise.
1552 * Correct the length "*n" when composing characters are ignored.
1553 */
1554 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001555cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001556{
1557 int result;
1558
Bram Moolenaar6100d022016-10-02 16:51:57 +02001559 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001560 result = STRNCMP(s1, s2, *n);
1561 else
1562 result = MB_STRNICMP(s1, s2, *n);
1563
Bram Moolenaar071d4272004-06-13 20:20:40 +00001564 /* if it failed and it's utf8 and we want to combineignore: */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001565 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001566 {
1567 char_u *str1, *str2;
1568 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001569 int junk;
1570
1571 /* we have to handle the strcmp ourselves, since it is necessary to
1572 * deal with the composing characters by ignoring them: */
1573 str1 = s1;
1574 str2 = s2;
1575 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001576 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001577 {
1578 c1 = mb_ptr2char_adv(&str1);
1579 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001580
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001581 // Decompose the character if necessary, into 'base' characters.
1582 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001583 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001584 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001585 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001586 mb_decompose(c1, &c11, &junk, &junk);
1587 mb_decompose(c2, &c12, &junk, &junk);
1588 c1 = c11;
1589 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001590 if (c11 != c12
1591 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001592 break;
1593 }
1594 }
1595 result = c2 - c1;
1596 if (result == 0)
1597 *n = (int)(str2 - s2);
1598 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001599
1600 return result;
1601}
1602
1603/*
1604 * cstrchr: This function is used a lot for simple searches, keep it fast!
1605 */
1606 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001607cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001608{
1609 char_u *p;
1610 int cc;
1611
Bram Moolenaara12a1612019-01-24 16:39:02 +01001612 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001613 return vim_strchr(s, c);
1614
1615 /* tolower() and toupper() can be slow, comparing twice should be a lot
1616 * faster (esp. when using MS Visual C++!).
1617 * For UTF-8 need to use folded case. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001618 if (enc_utf8 && c > 0x80)
1619 cc = utf_fold(c);
1620 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001621 if (MB_ISUPPER(c))
1622 cc = MB_TOLOWER(c);
1623 else if (MB_ISLOWER(c))
1624 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001625 else
1626 return vim_strchr(s, c);
1627
Bram Moolenaar071d4272004-06-13 20:20:40 +00001628 if (has_mbyte)
1629 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001630 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001631 {
1632 if (enc_utf8 && c > 0x80)
1633 {
1634 if (utf_fold(utf_ptr2char(p)) == cc)
1635 return p;
1636 }
1637 else if (*p == c || *p == cc)
1638 return p;
1639 }
1640 }
1641 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001642 /* Faster version for when there are no multi-byte characters. */
1643 for (p = s; *p != NUL; ++p)
1644 if (*p == c || *p == cc)
1645 return p;
1646
1647 return NULL;
1648}
1649
1650/***************************************************************
1651 * regsub stuff *
1652 ***************************************************************/
1653
Bram Moolenaar071d4272004-06-13 20:20:40 +00001654/*
1655 * We should define ftpr as a pointer to a function returning a pointer to
1656 * a function returning a pointer to a function ...
1657 * This is impossible, so we declare a pointer to a function returning a
1658 * pointer to a function returning void. This should work for all compilers.
1659 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001660typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001661
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001662static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001663
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001664 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001665do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001666{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001667 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001668
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001669 return (fptr_T)NULL;
1670}
1671
1672 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001673do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001674{
1675 *d = MB_TOUPPER(c);
1676
1677 return (fptr_T)do_Upper;
1678}
1679
1680 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001681do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001682{
1683 *d = MB_TOLOWER(c);
1684
1685 return (fptr_T)NULL;
1686}
1687
1688 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001689do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001690{
1691 *d = MB_TOLOWER(c);
1692
1693 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001694}
1695
1696/*
1697 * regtilde(): Replace tildes in the pattern by the old pattern.
1698 *
1699 * Short explanation of the tilde: It stands for the previous replacement
1700 * pattern. If that previous pattern also contains a ~ we should go back a
1701 * step further... But we insert the previous pattern into the current one
1702 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001703 * This still does not handle the case where "magic" changes. So require the
1704 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001705 *
1706 * The tildes are parsed once before the first call to vim_regsub().
1707 */
1708 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001709regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001710{
1711 char_u *newsub = source;
1712 char_u *tmpsub;
1713 char_u *p;
1714 int len;
1715 int prevlen;
1716
1717 for (p = newsub; *p; ++p)
1718 {
1719 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1720 {
1721 if (reg_prev_sub != NULL)
1722 {
1723 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
1724 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02001725 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001726 if (tmpsub != NULL)
1727 {
1728 /* copy prefix */
1729 len = (int)(p - newsub); /* not including ~ */
1730 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001731 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001732 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
1733 /* copy postfix */
1734 if (!magic)
1735 ++p; /* back off \ */
1736 STRCPY(tmpsub + len + prevlen, p + 1);
1737
1738 if (newsub != source) /* already allocated newsub */
1739 vim_free(newsub);
1740 newsub = tmpsub;
1741 p = newsub + len + prevlen;
1742 }
1743 }
1744 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001745 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00001747 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001748 --p;
1749 }
1750 else
1751 {
1752 if (*p == '\\' && p[1]) /* skip escaped characters */
1753 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001754 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001755 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001756 }
1757 }
1758
1759 vim_free(reg_prev_sub);
1760 if (newsub != source) /* newsub was allocated, just keep it */
1761 reg_prev_sub = newsub;
1762 else /* no ~ found, need to save newsub */
1763 reg_prev_sub = vim_strsave(newsub);
1764 return newsub;
1765}
1766
1767#ifdef FEAT_EVAL
1768static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
1769
Bram Moolenaar6100d022016-10-02 16:51:57 +02001770/* These pointers are used for reg_submatch(). Needed for when the
1771 * substitution string is an expression that contains a call to substitute()
1772 * and submatch(). */
1773typedef struct {
1774 regmatch_T *sm_match;
1775 regmmatch_T *sm_mmatch;
1776 linenr_T sm_firstlnum;
1777 linenr_T sm_maxline;
1778 int sm_line_lbr;
1779} regsubmatch_T;
1780
1781static regsubmatch_T rsm; /* can only be used when can_f_submatch is TRUE */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001782#endif
1783
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001784#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001785
1786/*
1787 * Put the submatches in "argv[0]" which is a list passed into call_func() by
1788 * vim_regsub_both().
1789 */
1790 static int
1791fill_submatch_list(int argc UNUSED, typval_T *argv, int argcount)
1792{
1793 listitem_T *li;
1794 int i;
1795 char_u *s;
1796
1797 if (argcount == 0)
1798 /* called function doesn't take an argument */
1799 return 0;
1800
1801 /* Relies on sl_list to be the first item in staticList10_T. */
1802 init_static_list((staticList10_T *)(argv->vval.v_list));
1803
1804 /* There are always 10 list items in staticList10_T. */
1805 li = argv->vval.v_list->lv_first;
1806 for (i = 0; i < 10; ++i)
1807 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001808 s = rsm.sm_match->startp[i];
1809 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001810 s = NULL;
1811 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02001812 s = vim_strnsave(s, (int)(rsm.sm_match->endp[i] - s));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001813 li->li_tv.v_type = VAR_STRING;
1814 li->li_tv.vval.v_string = s;
1815 li = li->li_next;
1816 }
1817 return 1;
1818}
1819
1820 static void
1821clear_submatch_list(staticList10_T *sl)
1822{
1823 int i;
1824
1825 for (i = 0; i < 10; ++i)
1826 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1827}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001828#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001829
Bram Moolenaar071d4272004-06-13 20:20:40 +00001830/*
1831 * vim_regsub() - perform substitutions after a vim_regexec() or
1832 * vim_regexec_multi() match.
1833 *
1834 * If "copy" is TRUE really copy into "dest".
1835 * If "copy" is FALSE nothing is copied, this is just to find out the length
1836 * of the result.
1837 *
1838 * If "backslash" is TRUE, a backslash will be removed later, need to double
1839 * them to keep them, and insert a backslash before a CR to avoid it being
1840 * replaced with a line break later.
1841 *
1842 * Note: The matched text must not change between the call of
1843 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1844 * references invalid!
1845 *
1846 * Returns the size of the replacement, including terminating NUL.
1847 */
1848 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001849vim_regsub(
1850 regmatch_T *rmp,
1851 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001852 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001853 char_u *dest,
1854 int copy,
1855 int magic,
1856 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001857{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001858 int result;
1859 regexec_T rex_save;
1860 int rex_in_use_save = rex_in_use;
1861
1862 if (rex_in_use)
1863 /* Being called recursively, save the state. */
1864 rex_save = rex;
1865 rex_in_use = TRUE;
1866
1867 rex.reg_match = rmp;
1868 rex.reg_mmatch = NULL;
1869 rex.reg_maxline = 0;
1870 rex.reg_buf = curbuf;
1871 rex.reg_line_lbr = TRUE;
1872 result = vim_regsub_both(source, expr, dest, copy, magic, backslash);
1873
1874 rex_in_use = rex_in_use_save;
1875 if (rex_in_use)
1876 rex = rex_save;
1877
1878 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001879}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001880
1881 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001882vim_regsub_multi(
1883 regmmatch_T *rmp,
1884 linenr_T lnum,
1885 char_u *source,
1886 char_u *dest,
1887 int copy,
1888 int magic,
1889 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001890{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001891 int result;
1892 regexec_T rex_save;
1893 int rex_in_use_save = rex_in_use;
1894
1895 if (rex_in_use)
1896 /* Being called recursively, save the state. */
1897 rex_save = rex;
1898 rex_in_use = TRUE;
1899
1900 rex.reg_match = NULL;
1901 rex.reg_mmatch = rmp;
1902 rex.reg_buf = curbuf; /* always works on the current buffer! */
1903 rex.reg_firstlnum = lnum;
1904 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1905 rex.reg_line_lbr = FALSE;
1906 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash);
1907
1908 rex_in_use = rex_in_use_save;
1909 if (rex_in_use)
1910 rex = rex_save;
1911
1912 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001913}
1914
1915 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001916vim_regsub_both(
1917 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001918 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001919 char_u *dest,
1920 int copy,
1921 int magic,
1922 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923{
1924 char_u *src;
1925 char_u *dst;
1926 char_u *s;
1927 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001928 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001929 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01001930 fptr_T func_all = (fptr_T)NULL;
1931 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001932 linenr_T clnum = 0; /* init for GCC */
1933 int len = 0; /* init for GCC */
1934#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001935 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001936#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001937
1938 /* Be paranoid... */
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001939 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001940 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01001941 emsg(_(e_null));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001942 return 0;
1943 }
1944 if (prog_magic_wrong())
1945 return 0;
1946 src = source;
1947 dst = dest;
1948
1949 /*
1950 * When the substitute part starts with "\=" evaluate it as an expression.
1951 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001952 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001953 {
1954#ifdef FEAT_EVAL
1955 /* To make sure that the length doesn't change between checking the
1956 * length and copying the string, and to speed up things, the
1957 * resulting string is saved from the call with "copy" == FALSE to the
1958 * call with "copy" == TRUE. */
1959 if (copy)
1960 {
1961 if (eval_result != NULL)
1962 {
1963 STRCPY(dest, eval_result);
1964 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01001965 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001966 }
1967 }
1968 else
1969 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001970 int prev_can_f_submatch = can_f_submatch;
1971 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001972
1973 vim_free(eval_result);
1974
1975 /* The expression may contain substitute(), which calls us
1976 * recursively. Make sure submatch() gets the text from the first
Bram Moolenaar6100d022016-10-02 16:51:57 +02001977 * level. */
1978 if (can_f_submatch)
1979 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001980 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001981 rsm.sm_match = rex.reg_match;
1982 rsm.sm_mmatch = rex.reg_mmatch;
1983 rsm.sm_firstlnum = rex.reg_firstlnum;
1984 rsm.sm_maxline = rex.reg_maxline;
1985 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001986
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001987 if (expr != NULL)
1988 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001989 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001990 char_u buf[NUMBUFLEN];
1991 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001992 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02001993 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001994
1995 rettv.v_type = VAR_STRING;
1996 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001997 argv[0].v_type = VAR_LIST;
1998 argv[0].vval.v_list = &matchList.sl_list;
1999 matchList.sl_list.lv_len = 0;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002000 vim_memset(&funcexe, 0, sizeof(funcexe));
2001 funcexe.argv_func = fill_submatch_list;
2002 funcexe.evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002003 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002004 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002005 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002006 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002007 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002008 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002009 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002010 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002011
Bram Moolenaar6100d022016-10-02 16:51:57 +02002012 s = partial_name(partial);
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002013 funcexe.partial = partial;
2014 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002015 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002016 if (matchList.sl_list.lv_len > 0)
2017 /* fill_submatch_list() was called */
2018 clear_submatch_list(&matchList);
2019
Bram Moolenaard155d7a2018-12-21 16:04:21 +01002020 eval_result = tv_get_string_buf_chk(&rettv, buf);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002021 if (eval_result != NULL)
2022 eval_result = vim_strsave(eval_result);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002023 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002024 }
2025 else
2026 eval_result = eval_to_string(source + 2, NULL, TRUE);
2027
Bram Moolenaar071d4272004-06-13 20:20:40 +00002028 if (eval_result != NULL)
2029 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002030 int had_backslash = FALSE;
2031
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002032 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002033 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02002034 /* Change NL to CR, so that it becomes a line break,
2035 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00002036 * Skip over a backslashed character. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002037 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002038 *s = CAR;
2039 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002040 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002041 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002042 /* Change NL to CR here too, so that this works:
2043 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2044 * abc\
2045 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002046 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002047 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002048 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002049 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002050 had_backslash = TRUE;
2051 }
2052 }
2053 if (had_backslash && backslash)
2054 {
2055 /* Backslashes will be consumed, need to double them. */
2056 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
2057 if (s != NULL)
2058 {
2059 vim_free(eval_result);
2060 eval_result = s;
2061 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002062 }
2063
2064 dst += STRLEN(eval_result);
2065 }
2066
Bram Moolenaar6100d022016-10-02 16:51:57 +02002067 can_f_submatch = prev_can_f_submatch;
2068 if (can_f_submatch)
2069 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002070 }
2071#endif
2072 }
2073 else
2074 while ((c = *src++) != NUL)
2075 {
2076 if (c == '&' && magic)
2077 no = 0;
2078 else if (c == '\\' && *src != NUL)
2079 {
2080 if (*src == '&' && !magic)
2081 {
2082 ++src;
2083 no = 0;
2084 }
2085 else if ('0' <= *src && *src <= '9')
2086 {
2087 no = *src++ - '0';
2088 }
2089 else if (vim_strchr((char_u *)"uUlLeE", *src))
2090 {
2091 switch (*src++)
2092 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002093 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002094 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002095 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002096 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002097 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002098 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002099 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002100 continue;
2101 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002102 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002103 continue;
2104 }
2105 }
2106 }
2107 if (no < 0) /* Ordinary character. */
2108 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002109 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2110 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002111 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00002112 if (copy)
2113 {
2114 *dst++ = c;
2115 *dst++ = *src++;
2116 *dst++ = *src++;
2117 }
2118 else
2119 {
2120 dst += 3;
2121 src += 2;
2122 }
2123 continue;
2124 }
2125
Bram Moolenaar071d4272004-06-13 20:20:40 +00002126 if (c == '\\' && *src != NUL)
2127 {
2128 /* Check for abbreviations -- webb */
2129 switch (*src)
2130 {
2131 case 'r': c = CAR; ++src; break;
2132 case 'n': c = NL; ++src; break;
2133 case 't': c = TAB; ++src; break;
2134 /* Oh no! \e already has meaning in subst pat :-( */
2135 /* case 'e': c = ESC; ++src; break; */
2136 case 'b': c = Ctrl_H; ++src; break;
2137
2138 /* If "backslash" is TRUE the backslash will be removed
2139 * later. Used to insert a literal CR. */
2140 default: if (backslash)
2141 {
2142 if (copy)
2143 *dst = '\\';
2144 ++dst;
2145 }
2146 c = *src++;
2147 }
2148 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002149 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002150 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002151
Bram Moolenaardb552d602006-03-23 22:59:57 +00002152 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002153 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002154 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002155 func_one = (fptr_T)(func_one(&cc, c));
2156 else if (func_all != (fptr_T)NULL)
2157 /* Turbo C complains without the typecast */
2158 func_all = (fptr_T)(func_all(&cc, c));
2159 else /* just copy */
2160 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002161
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002162 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002163 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002164 int totlen = mb_ptr2len(src - 1);
2165
Bram Moolenaar071d4272004-06-13 20:20:40 +00002166 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002167 mb_char2bytes(cc, dst);
2168 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002169 if (enc_utf8)
2170 {
2171 int clen = utf_ptr2len(src - 1);
2172
2173 /* If the character length is shorter than "totlen", there
2174 * are composing characters; copy them as-is. */
2175 if (clen < totlen)
2176 {
2177 if (copy)
2178 mch_memmove(dst + 1, src - 1 + clen,
2179 (size_t)(totlen - clen));
2180 dst += totlen - clen;
2181 }
2182 }
2183 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002184 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002185 else if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002186 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002187 dst++;
2188 }
2189 else
2190 {
2191 if (REG_MULTI)
2192 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002193 clnum = rex.reg_mmatch->startpos[no].lnum;
2194 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002195 s = NULL;
2196 else
2197 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002198 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2199 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2200 len = rex.reg_mmatch->endpos[no].col
2201 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002202 else
2203 len = (int)STRLEN(s);
2204 }
2205 }
2206 else
2207 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002208 s = rex.reg_match->startp[no];
2209 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002210 s = NULL;
2211 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002212 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002213 }
2214 if (s != NULL)
2215 {
2216 for (;;)
2217 {
2218 if (len == 0)
2219 {
2220 if (REG_MULTI)
2221 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002222 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002223 break;
2224 if (copy)
2225 *dst = CAR;
2226 ++dst;
2227 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002228 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2229 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002230 else
2231 len = (int)STRLEN(s);
2232 }
2233 else
2234 break;
2235 }
2236 else if (*s == NUL) /* we hit NUL. */
2237 {
2238 if (copy)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002239 emsg(_(e_re_damg));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002240 goto exit;
2241 }
2242 else
2243 {
2244 if (backslash && (*s == CAR || *s == '\\'))
2245 {
2246 /*
2247 * Insert a backslash in front of a CR, otherwise
2248 * it will be replaced by a line break.
2249 * Number of backslashes will be halved later,
2250 * double them here.
2251 */
2252 if (copy)
2253 {
2254 dst[0] = '\\';
2255 dst[1] = *s;
2256 }
2257 dst += 2;
2258 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002259 else
2260 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002261 if (has_mbyte)
2262 c = mb_ptr2char(s);
2263 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002264 c = *s;
2265
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002266 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002267 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002268 func_one = (fptr_T)(func_one(&cc, c));
2269 else if (func_all != (fptr_T)NULL)
2270 /* Turbo C complains without the typecast */
2271 func_all = (fptr_T)(func_all(&cc, c));
2272 else /* just copy */
2273 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002274
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002275 if (has_mbyte)
2276 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002277 int l;
2278
2279 /* Copy composing characters separately, one
2280 * at a time. */
2281 if (enc_utf8)
2282 l = utf_ptr2len(s) - 1;
2283 else
2284 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002285
2286 s += l;
2287 len -= l;
2288 if (copy)
2289 mb_char2bytes(cc, dst);
2290 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002291 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002292 else if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002293 *dst = cc;
2294 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002295 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002296
Bram Moolenaar071d4272004-06-13 20:20:40 +00002297 ++s;
2298 --len;
2299 }
2300 }
2301 }
2302 no = -1;
2303 }
2304 }
2305 if (copy)
2306 *dst = NUL;
2307
2308exit:
2309 return (int)((dst - dest) + 1);
2310}
2311
2312#ifdef FEAT_EVAL
2313/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002314 * Call reg_getline() with the line numbers from the submatch. If a
2315 * substitute() was used the reg_maxline and other values have been
2316 * overwritten.
2317 */
2318 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002319reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002320{
2321 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002322 linenr_T save_first = rex.reg_firstlnum;
2323 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002324
Bram Moolenaar6100d022016-10-02 16:51:57 +02002325 rex.reg_firstlnum = rsm.sm_firstlnum;
2326 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002327
2328 s = reg_getline(lnum);
2329
Bram Moolenaar6100d022016-10-02 16:51:57 +02002330 rex.reg_firstlnum = save_first;
2331 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002332 return s;
2333}
2334
2335/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002336 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002337 * allocated memory.
2338 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2339 */
2340 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002341reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002342{
2343 char_u *retval = NULL;
2344 char_u *s;
2345 int len;
2346 int round;
2347 linenr_T lnum;
2348
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002349 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002350 return NULL;
2351
Bram Moolenaar6100d022016-10-02 16:51:57 +02002352 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002353 {
2354 /*
2355 * First round: compute the length and allocate memory.
2356 * Second round: copy the text.
2357 */
2358 for (round = 1; round <= 2; ++round)
2359 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002360 lnum = rsm.sm_mmatch->startpos[no].lnum;
2361 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002362 return NULL;
2363
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002364 s = reg_getline_submatch(lnum);
2365 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002366 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002367 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002368 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002369 {
2370 /* Within one line: take form start to end col. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002371 len = rsm.sm_mmatch->endpos[no].col
2372 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002373 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002374 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002375 ++len;
2376 }
2377 else
2378 {
2379 /* Multiple lines: take start line from start col, middle
2380 * lines completely and end line up to end col. */
2381 len = (int)STRLEN(s);
2382 if (round == 2)
2383 {
2384 STRCPY(retval, s);
2385 retval[len] = '\n';
2386 }
2387 ++len;
2388 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002389 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002390 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002391 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002392 if (round == 2)
2393 STRCPY(retval + len, s);
2394 len += (int)STRLEN(s);
2395 if (round == 2)
2396 retval[len] = '\n';
2397 ++len;
2398 }
2399 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002400 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002401 rsm.sm_mmatch->endpos[no].col);
2402 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002403 if (round == 2)
2404 retval[len] = NUL;
2405 ++len;
2406 }
2407
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002408 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002409 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002410 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002411 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002412 return NULL;
2413 }
2414 }
2415 }
2416 else
2417 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002418 s = rsm.sm_match->startp[no];
2419 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002420 retval = NULL;
2421 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002422 retval = vim_strnsave(s, (int)(rsm.sm_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002423 }
2424
2425 return retval;
2426}
Bram Moolenaar41571762014-04-02 19:00:58 +02002427
2428/*
2429 * Used for the submatch() function with the optional non-zero argument: get
2430 * the list of strings from the n'th submatch in allocated memory with NULs
2431 * represented in NLs.
2432 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2433 * command, for a non-existing submatch and for any error.
2434 */
2435 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002436reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002437{
2438 char_u *s;
2439 linenr_T slnum;
2440 linenr_T elnum;
2441 colnr_T scol;
2442 colnr_T ecol;
2443 int i;
2444 list_T *list;
2445 int error = FALSE;
2446
2447 if (!can_f_submatch || no < 0)
2448 return NULL;
2449
Bram Moolenaar6100d022016-10-02 16:51:57 +02002450 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002451 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002452 slnum = rsm.sm_mmatch->startpos[no].lnum;
2453 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002454 if (slnum < 0 || elnum < 0)
2455 return NULL;
2456
Bram Moolenaar6100d022016-10-02 16:51:57 +02002457 scol = rsm.sm_mmatch->startpos[no].col;
2458 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002459
2460 list = list_alloc();
2461 if (list == NULL)
2462 return NULL;
2463
2464 s = reg_getline_submatch(slnum) + scol;
2465 if (slnum == elnum)
2466 {
2467 if (list_append_string(list, s, ecol - scol) == FAIL)
2468 error = TRUE;
2469 }
2470 else
2471 {
2472 if (list_append_string(list, s, -1) == FAIL)
2473 error = TRUE;
2474 for (i = 1; i < elnum - slnum; i++)
2475 {
2476 s = reg_getline_submatch(slnum + i);
2477 if (list_append_string(list, s, -1) == FAIL)
2478 error = TRUE;
2479 }
2480 s = reg_getline_submatch(elnum);
2481 if (list_append_string(list, s, ecol) == FAIL)
2482 error = TRUE;
2483 }
2484 }
2485 else
2486 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002487 s = rsm.sm_match->startp[no];
2488 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002489 return NULL;
2490 list = list_alloc();
2491 if (list == NULL)
2492 return NULL;
2493 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002494 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002495 error = TRUE;
2496 }
2497
2498 if (error)
2499 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002500 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002501 return NULL;
2502 }
2503 return list;
2504}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002505#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002506
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002507#include "regexp_bt.c"
2508
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002509static regengine_T bt_regengine =
2510{
2511 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002512 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002513 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002514 bt_regexec_multi,
2515 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002516};
2517
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002518#include "regexp_nfa.c"
2519
2520static regengine_T nfa_regengine =
2521{
2522 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002523 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002524 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002525 nfa_regexec_multi,
2526 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002527};
2528
2529/* Which regexp engine to use? Needed for vim_regcomp().
2530 * Must match with 'regexpengine'. */
2531static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002532
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002533#ifdef DEBUG
2534static char_u regname[][30] = {
2535 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002536 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002537 "NFA Regexp Engine"
2538 };
2539#endif
2540
2541/*
2542 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002543 * Returns the program in allocated memory.
2544 * Use vim_regfree() to free the memory.
2545 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002546 */
2547 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002548vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002549{
2550 regprog_T *prog = NULL;
2551 char_u *expr = expr_arg;
Bram Moolenaarcd625122019-02-22 17:29:43 +01002552 int save_called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002553
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002554 regexp_engine = p_re;
2555
2556 /* Check for prefix "\%#=", that sets the regexp engine */
2557 if (STRNCMP(expr, "\\%#=", 4) == 0)
2558 {
2559 int newengine = expr[4] - '0';
2560
2561 if (newengine == AUTOMATIC_ENGINE
2562 || newengine == BACKTRACKING_ENGINE
2563 || newengine == NFA_ENGINE)
2564 {
2565 regexp_engine = expr[4] - '0';
2566 expr += 5;
2567#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002568 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002569 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002570#endif
2571 }
2572 else
2573 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002574 emsg(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002575 regexp_engine = AUTOMATIC_ENGINE;
2576 }
2577 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002578#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002579 bt_regengine.expr = expr;
2580 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002581#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002582 // reg_iswordc() uses rex.reg_buf
2583 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002584
2585 /*
2586 * First try the NFA engine, unless backtracking was requested.
2587 */
Bram Moolenaarcd625122019-02-22 17:29:43 +01002588 save_called_emsg = called_emsg;
2589 called_emsg = FALSE;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002590 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002591 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002592 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002593 else
2594 prog = bt_regengine.regcomp(expr, re_flags);
2595
Bram Moolenaarfda37292014-11-05 14:27:36 +01002596 /* Check for error compiling regexp with initial engine. */
2597 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002598 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002599#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002600 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
2601 {
2602 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002603 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002604 if (f)
2605 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002606 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002607 fclose(f);
2608 }
2609 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002610 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002611 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002612 }
2613#endif
2614 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002615 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002616 * The NFA engine also fails for patterns that it can't handle well
2617 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002618 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002619 */
Bram Moolenaarcd625122019-02-22 17:29:43 +01002620 if (regexp_engine == AUTOMATIC_ENGINE && !called_emsg)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002621 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002622 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002623 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002624 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002625 }
Bram Moolenaarcd625122019-02-22 17:29:43 +01002626 called_emsg |= save_called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002627
Bram Moolenaarfda37292014-11-05 14:27:36 +01002628 if (prog != NULL)
2629 {
2630 /* Store the info needed to call regcomp() again when the engine turns
2631 * out to be very slow when executing it. */
2632 prog->re_engine = regexp_engine;
2633 prog->re_flags = re_flags;
2634 }
2635
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002636 return prog;
2637}
2638
2639/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002640 * Free a compiled regexp program, returned by vim_regcomp().
2641 */
2642 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002643vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002644{
2645 if (prog != NULL)
2646 prog->engine->regfree(prog);
2647}
2648
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002649#if defined(EXITFREE) || defined(PROTO)
2650 void
2651free_regexp_stuff(void)
2652{
2653 ga_clear(&regstack);
2654 ga_clear(&backpos);
2655 vim_free(reg_tofree);
2656 vim_free(reg_prev_sub);
2657}
2658#endif
2659
Bram Moolenaarfda37292014-11-05 14:27:36 +01002660#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002661 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002662report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002663{
2664 if (p_verbose > 0)
2665 {
2666 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002667 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2668 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002669 verbose_leave();
2670 }
2671}
2672#endif
2673
Bram Moolenaar113e1072019-01-20 15:30:40 +01002674#if (defined(FEAT_X11) && (defined(FEAT_TITLE) || defined(FEAT_XCLIPBOARD))) \
2675 || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002676/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002677 * Return whether "prog" is currently being executed.
2678 */
2679 int
2680regprog_in_use(regprog_T *prog)
2681{
2682 return prog->re_in_use;
2683}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002684#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002685
2686/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002687 * Match a regexp against a string.
2688 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002689 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002690 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002691 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002692 *
2693 * Return TRUE if there is a match, FALSE if not.
2694 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002695 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002696vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002697 regmatch_T *rmp,
2698 char_u *line, /* string to match against */
2699 colnr_T col, /* column to start looking for match */
2700 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002701{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002702 int result;
2703 regexec_T rex_save;
2704 int rex_in_use_save = rex_in_use;
2705
Bram Moolenaar0270f382018-07-17 05:43:58 +02002706 // Cannot use the same prog recursively, it contains state.
2707 if (rmp->regprog->re_in_use)
2708 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002709 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002710 return FALSE;
2711 }
2712 rmp->regprog->re_in_use = TRUE;
2713
Bram Moolenaar6100d022016-10-02 16:51:57 +02002714 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002715 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002716 rex_save = rex;
2717 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002718
Bram Moolenaar6100d022016-10-02 16:51:57 +02002719 rex.reg_startp = NULL;
2720 rex.reg_endp = NULL;
2721 rex.reg_startpos = NULL;
2722 rex.reg_endpos = NULL;
2723
2724 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002725 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002726
2727 /* NFA engine aborted because it's very slow. */
2728 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2729 && result == NFA_TOO_EXPENSIVE)
2730 {
2731 int save_p_re = p_re;
2732 int re_flags = rmp->regprog->re_flags;
2733 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2734
2735 p_re = BACKTRACKING_ENGINE;
2736 vim_regfree(rmp->regprog);
2737 if (pat != NULL)
2738 {
2739#ifdef FEAT_EVAL
2740 report_re_switch(pat);
2741#endif
2742 rmp->regprog = vim_regcomp(pat, re_flags);
2743 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002744 {
2745 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002746 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002747 rmp->regprog->re_in_use = FALSE;
2748 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002749 vim_free(pat);
2750 }
2751
2752 p_re = save_p_re;
2753 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002754
2755 rex_in_use = rex_in_use_save;
2756 if (rex_in_use)
2757 rex = rex_save;
2758
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002759 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002760}
2761
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002762/*
2763 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002764 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002765 */
2766 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002767vim_regexec_prog(
2768 regprog_T **prog,
2769 int ignore_case,
2770 char_u *line,
2771 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002772{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002773 int r;
2774 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002775
2776 regmatch.regprog = *prog;
2777 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002778 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002779 *prog = regmatch.regprog;
2780 return r;
2781}
2782
2783/*
2784 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002785 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002786 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002787 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002788vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002789{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002790 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002791}
2792
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002793/*
2794 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002795 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002796 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002797 */
2798 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002799vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002800{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002801 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002802}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002803
2804/*
2805 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002806 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2807 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002808 * Uses curbuf for line count and 'iskeyword'.
2809 *
2810 * Return zero if there is no match. Return number of lines contained in the
2811 * match otherwise.
2812 */
2813 long
Bram Moolenaar05540972016-01-30 20:31:25 +01002814vim_regexec_multi(
2815 regmmatch_T *rmp,
Bram Moolenaard23a8232018-02-10 18:45:26 +01002816 win_T *win, /* window in which to search or NULL */
2817 buf_T *buf, /* buffer in which to search */
2818 linenr_T lnum, /* nr of line to start looking for match */
2819 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002820 proftime_T *tm, /* timeout limit or NULL */
2821 int *timed_out) /* flag is set when timeout limit reached */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002822{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002823 int result;
2824 regexec_T rex_save;
2825 int rex_in_use_save = rex_in_use;
2826
Bram Moolenaar0270f382018-07-17 05:43:58 +02002827 // Cannot use the same prog recursively, it contains state.
2828 if (rmp->regprog->re_in_use)
2829 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002830 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002831 return FALSE;
2832 }
2833 rmp->regprog->re_in_use = TRUE;
2834
Bram Moolenaar6100d022016-10-02 16:51:57 +02002835 if (rex_in_use)
2836 /* Being called recursively, save the state. */
2837 rex_save = rex;
2838 rex_in_use = TRUE;
2839
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002840 result = rmp->regprog->engine->regexec_multi(
2841 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002842 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002843
2844 /* NFA engine aborted because it's very slow. */
2845 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2846 && result == NFA_TOO_EXPENSIVE)
2847 {
2848 int save_p_re = p_re;
2849 int re_flags = rmp->regprog->re_flags;
2850 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2851
2852 p_re = BACKTRACKING_ENGINE;
2853 vim_regfree(rmp->regprog);
2854 if (pat != NULL)
2855 {
2856#ifdef FEAT_EVAL
2857 report_re_switch(pat);
2858#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002859#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002860 // checking for \z misuse was already done when compiling for NFA,
2861 // allow all here
2862 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002863#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01002864 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002865#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002866 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002867#endif
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002868
Bram Moolenaarfda37292014-11-05 14:27:36 +01002869 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002870 {
2871 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002872 result = rmp->regprog->engine->regexec_multi(
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002873 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002874 rmp->regprog->re_in_use = FALSE;
2875 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002876 vim_free(pat);
2877 }
2878 p_re = save_p_re;
2879 }
2880
Bram Moolenaar6100d022016-10-02 16:51:57 +02002881 rex_in_use = rex_in_use_save;
2882 if (rex_in_use)
2883 rex = rex_save;
2884
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002885 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002886}