blob: 42f34c2f9957622856c7b6045c18bb8f1979ef9f [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
Bram Moolenaar071d4272004-06-13 20:20:40 +00004 */
5
Bram Moolenaarc2d09c92019-04-25 20:07:51 +02006// By default: do not create debugging logs or files related to regular
7// expressions, even when compiling with -DDEBUG.
8// Uncomment the second line to get the regexp debugging.
9#undef DEBUG
10// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020011
Bram Moolenaar071d4272004-06-13 20:20:40 +000012#include "vim.h"
13
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020014#ifdef DEBUG
15/* show/save debugging data when BT engine is used */
16# define BT_REGEXP_DUMP
17/* save the debugging data to a file instead of displaying it */
18# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020019# define BT_REGEXP_DEBUG_LOG
20# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020021#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000022
23/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000024 * Magic characters have a special meaning, they don't match literally.
25 * Magic characters are negative. This separates them from literal characters
26 * (possibly multi-byte). Only ASCII characters can be Magic.
27 */
28#define Magic(x) ((int)(x) - 256)
29#define un_Magic(x) ((x) + 256)
30#define is_Magic(x) ((x) < 0)
31
Bram Moolenaar071d4272004-06-13 20:20:40 +000032 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010033no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000034{
35 if (is_Magic(x))
36 return un_Magic(x);
37 return x;
38}
39
40 static int
Bram Moolenaar05540972016-01-30 20:31:25 +010041toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +000042{
43 if (is_Magic(x))
44 return un_Magic(x);
45 return Magic(x);
46}
47
48/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020049 * The first byte of the BT regexp internal "program" is actually this magic
Bram Moolenaar071d4272004-06-13 20:20:40 +000050 * number; the start node begins in the second byte. It's used to catch the
51 * most severe mutilation of the program by the caller.
52 */
53
54#define REGMAGIC 0234
55
56/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000057 * Utility definitions.
58 */
59#define UCHARAT(p) ((int)*(char_u *)(p))
60
61/* Used for an error (down from) vim_regcomp(): give the error message, set
62 * rc_did_emsg and return NULL */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010063#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
64#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
65#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
66#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +010067#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +010068#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020069#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +000070
Bram Moolenaar95f09602016-11-10 20:01:45 +010071
Bram Moolenaar071d4272004-06-13 20:20:40 +000072#define MAX_LIMIT (32767L << 16L)
73
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020074static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
Bram Moolenaar966e58e2017-06-05 16:54:08 +020075static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
76static char_u e_large_class[] = N_("E945: Range too large in character class");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020077static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
78static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
79static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +020080#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +020081static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
Bram Moolenaarbcf94422018-06-23 14:21:42 +020082static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +020083#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +020084static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +020085static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar0270f382018-07-17 05:43:58 +020086static char_u e_recursive[] = N_("E956: Cannot use pattern recursively");
87
Bram Moolenaar071d4272004-06-13 20:20:40 +000088#define NOT_MULTI 0
89#define MULTI_ONE 1
90#define MULTI_MULT 2
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +020091
92// return values for regmatch()
93#define RA_FAIL 1 /* something failed, abort */
94#define RA_CONT 2 /* continue in inner loop */
95#define RA_BREAK 3 /* break inner loop */
96#define RA_MATCH 4 /* successful match */
97#define RA_NOMATCH 5 /* didn't match */
98
Bram Moolenaar071d4272004-06-13 20:20:40 +000099/*
100 * Return NOT_MULTI if c is not a "multi" operator.
101 * Return MULTI_ONE if c is a single "multi" operator.
102 * Return MULTI_MULT if c is a multi "multi" operator.
103 */
104 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100105re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000106{
107 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
108 return MULTI_ONE;
109 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
110 return MULTI_MULT;
111 return NOT_MULTI;
112}
113
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000114static char_u *reg_prev_sub = NULL;
115
Bram Moolenaar071d4272004-06-13 20:20:40 +0000116/*
117 * REGEXP_INRANGE contains all characters which are always special in a []
118 * range after '\'.
119 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
120 * These are:
121 * \n - New line (NL).
122 * \r - Carriage Return (CR).
123 * \t - Tab (TAB).
124 * \e - Escape (ESC).
125 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000126 * \d - Character code in decimal, eg \d123
127 * \o - Character code in octal, eg \o80
128 * \x - Character code in hex, eg \x4a
129 * \u - Multibyte character code, eg \u20ac
130 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000131 */
132static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000133static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000134
Bram Moolenaar071d4272004-06-13 20:20:40 +0000135/*
136 * Translate '\x' to its control character, except "\n", which is Magic.
137 */
138 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100139backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000140{
141 switch (c)
142 {
143 case 'r': return CAR;
144 case 't': return TAB;
145 case 'e': return ESC;
146 case 'b': return BS;
147 }
148 return c;
149}
150
151/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000152 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000153 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
154 * recognized. Otherwise "pp" is advanced to after the item.
155 */
156 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100157get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000158{
159 static const char *(class_names[]) =
160 {
161 "alnum:]",
162#define CLASS_ALNUM 0
163 "alpha:]",
164#define CLASS_ALPHA 1
165 "blank:]",
166#define CLASS_BLANK 2
167 "cntrl:]",
168#define CLASS_CNTRL 3
169 "digit:]",
170#define CLASS_DIGIT 4
171 "graph:]",
172#define CLASS_GRAPH 5
173 "lower:]",
174#define CLASS_LOWER 6
175 "print:]",
176#define CLASS_PRINT 7
177 "punct:]",
178#define CLASS_PUNCT 8
179 "space:]",
180#define CLASS_SPACE 9
181 "upper:]",
182#define CLASS_UPPER 10
183 "xdigit:]",
184#define CLASS_XDIGIT 11
185 "tab:]",
186#define CLASS_TAB 12
187 "return:]",
188#define CLASS_RETURN 13
189 "backspace:]",
190#define CLASS_BACKSPACE 14
191 "escape:]",
192#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100193 "ident:]",
194#define CLASS_IDENT 16
195 "keyword:]",
196#define CLASS_KEYWORD 17
197 "fname:]",
198#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000199 };
200#define CLASS_NONE 99
201 int i;
202
203 if ((*pp)[1] == ':')
204 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000205 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000206 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
207 {
208 *pp += STRLEN(class_names[i]) + 2;
209 return i;
210 }
211 }
212 return CLASS_NONE;
213}
214
215/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000216 * Specific version of character class functions.
217 * Using a table to keep this fast.
218 */
219static short class_tab[256];
220
221#define RI_DIGIT 0x01
222#define RI_HEX 0x02
223#define RI_OCTAL 0x04
224#define RI_WORD 0x08
225#define RI_HEAD 0x10
226#define RI_ALPHA 0x20
227#define RI_LOWER 0x40
228#define RI_UPPER 0x80
229#define RI_WHITE 0x100
230
231 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100232init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000233{
234 int i;
235 static int done = FALSE;
236
237 if (done)
238 return;
239
240 for (i = 0; i < 256; ++i)
241 {
242 if (i >= '0' && i <= '7')
243 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
244 else if (i >= '8' && i <= '9')
245 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
246 else if (i >= 'a' && i <= 'f')
247 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
248#ifdef EBCDIC
249 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
250 || (i >= 's' && i <= 'z'))
251#else
252 else if (i >= 'g' && i <= 'z')
253#endif
254 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
255 else if (i >= 'A' && i <= 'F')
256 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
257#ifdef EBCDIC
258 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
259 || (i >= 'S' && i <= 'Z'))
260#else
261 else if (i >= 'G' && i <= 'Z')
262#endif
263 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
264 else if (i == '_')
265 class_tab[i] = RI_WORD + RI_HEAD;
266 else
267 class_tab[i] = 0;
268 }
269 class_tab[' '] |= RI_WHITE;
270 class_tab['\t'] |= RI_WHITE;
271 done = TRUE;
272}
273
Bram Moolenaara12a1612019-01-24 16:39:02 +0100274#define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
275#define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
276#define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
277#define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
278#define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
279#define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
280#define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
281#define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
282#define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000283
284/* flags for regflags */
285#define RF_ICASE 1 /* ignore case */
286#define RF_NOICASE 2 /* don't ignore case */
287#define RF_HASNL 4 /* can match a NL */
288#define RF_ICOMBINE 8 /* ignore combining characters */
289#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
290
291/*
292 * Global work variables for vim_regcomp().
293 */
294
295static char_u *regparse; /* Input-scan pointer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000296static int regnpar; /* () count. */
297#ifdef FEAT_SYN_HL
298static int regnzpar; /* \z() count. */
299static int re_has_z; /* \z item detected */
300#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000301static unsigned regflags; /* RF_ flags for prog */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000302#if defined(FEAT_SYN_HL) || defined(PROTO)
303static int had_eol; /* TRUE when EOL found by vim_regcomp() */
304#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000305
306static int reg_magic; /* magicness of the pattern: */
307#define MAGIC_NONE 1 /* "\V" very unmagic */
308#define MAGIC_OFF 2 /* "\M" or 'magic' off */
309#define MAGIC_ON 3 /* "\m" or 'magic' */
310#define MAGIC_ALL 4 /* "\v" very magic */
311
312static int reg_string; /* matching with a string instead of a buffer
313 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000314static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000315
316/*
317 * META contains all characters that may be magic, except '^' and '$'.
318 */
319
320#ifdef EBCDIC
321static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
322#else
323/* META[] is used often enough to justify turning it into a table. */
324static char_u META_flags[] = {
325 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
327/* % & ( ) * + . */
328 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
329/* 1 2 3 4 5 6 7 8 9 < = > ? */
330 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
331/* @ A C D F H I K L M O */
332 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
333/* P S U V W X Z [ _ */
334 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
335/* a c d f h i k l m n o */
336 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
337/* p s u v w x z { | ~ */
338 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
339};
340#endif
341
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200342static int curchr; /* currently parsed character */
343/* Previous character. Note: prevchr is sometimes -1 when we are not at the
344 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
345 * because ^ was taken to be magic -- webb */
346static int prevchr;
347static int prevprevchr; /* previous-previous character */
348static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000349
350/* arguments for reg() */
351#define REG_NOPAREN 0 /* toplevel reg() */
352#define REG_PAREN 1 /* \(\) */
353#define REG_ZPAREN 2 /* \z(\) */
354#define REG_NPAREN 3 /* \%(\) */
355
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200356typedef struct
357{
358 char_u *regparse;
359 int prevchr_len;
360 int curchr;
361 int prevchr;
362 int prevprevchr;
363 int nextchr;
364 int at_start;
365 int prev_at_start;
366 int regnpar;
367} parse_state_T;
368
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100369static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100370static int getchr(void);
371static void skipchr_keepstart(void);
372static int peekchr(void);
373static void skipchr(void);
374static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100375static long gethexchrs(int maxinputlen);
376static long getoctchrs(void);
377static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100378static int coll_get_char(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100379static int prog_magic_wrong(void);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200380static int cstrncmp(char_u *s1, char_u *s2, int *n);
381static char_u *cstrchr(char_u *, int);
382static int re_mult_next(char *what);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100383static int reg_iswordc(int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200385static regengine_T bt_regengine;
386static regengine_T nfa_regengine;
387
Bram Moolenaar071d4272004-06-13 20:20:40 +0000388/*
389 * Return TRUE if compiled regular expression "prog" can match a line break.
390 */
391 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100392re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000393{
394 return (prog->regflags & RF_HASNL);
395}
396
397/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000398 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
399 * Returns a character representing the class. Zero means that no item was
400 * recognized. Otherwise "pp" is advanced to after the item.
401 */
402 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100403get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000404{
405 int c;
406 int l = 1;
407 char_u *p = *pp;
408
Bram Moolenaar985079c2019-02-16 17:07:47 +0100409 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000410 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000411 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000412 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000413 if (p[l + 2] == '=' && p[l + 3] == ']')
414 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000415 if (has_mbyte)
416 c = mb_ptr2char(p + 2);
417 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000418 c = p[2];
419 *pp += l + 4;
420 return c;
421 }
422 }
423 return 0;
424}
425
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200426#ifdef EBCDIC
427/*
428 * Table for equivalence class "c". (IBM-1047)
429 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +0200430static char *EQUIVAL_CLASS_C[16] = {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200431 "A\x62\x63\x64\x65\x66\x67",
432 "C\x68",
433 "E\x71\x72\x73\x74",
434 "I\x75\x76\x77\x78",
435 "N\x69",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200436 "O\xEB\xEC\xED\xEE\xEF\x80",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200437 "U\xFB\xFC\xFD\xFE",
438 "Y\xBA",
439 "a\x42\x43\x44\x45\x46\x47",
440 "c\x48",
441 "e\x51\x52\x53\x54",
442 "i\x55\x56\x57\x58",
443 "n\x49",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200444 "o\xCB\xCC\xCD\xCE\xCF\x70",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200445 "u\xDB\xDC\xDD\xDE",
446 "y\x8D\xDF",
447};
448#endif
449
Bram Moolenaardf177f62005-02-22 08:39:57 +0000450/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000451 * Check for a collating element "[.a.]". "pp" points to the '['.
452 * Returns a character. Zero means that no item was recognized. Otherwise
453 * "pp" is advanced to after the item.
454 * Currently only single characters are recognized!
455 */
456 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100457get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000458{
459 int c;
460 int l = 1;
461 char_u *p = *pp;
462
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100463 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000464 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000465 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000466 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000467 if (p[l + 2] == '.' && p[l + 3] == ']')
468 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000469 if (has_mbyte)
470 c = mb_ptr2char(p + 2);
471 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000472 c = p[2];
473 *pp += l + 4;
474 return c;
475 }
476 }
477 return 0;
478}
479
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200480static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
481static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
482
483 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100484get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200485{
486 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
487 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
488}
Bram Moolenaardf177f62005-02-22 08:39:57 +0000489
490/*
491 * Skip over a "[]" range.
492 * "p" must point to the character after the '['.
493 * The returned pointer is on the matching ']', or the terminating NUL.
494 */
495 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100496skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000497{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000498 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000499
Bram Moolenaardf177f62005-02-22 08:39:57 +0000500 if (*p == '^') /* Complement of range. */
501 ++p;
502 if (*p == ']' || *p == '-')
503 ++p;
504 while (*p != NUL && *p != ']')
505 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000506 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000507 p += l;
508 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000509 if (*p == '-')
510 {
511 ++p;
512 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100513 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000514 }
515 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200516 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +0000517 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200518 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +0000519 p += 2;
520 else if (*p == '[')
521 {
522 if (get_char_class(&p) == CLASS_NONE
523 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200524 && get_coll_element(&p) == 0
525 && *p != NUL)
526 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +0000527 }
528 else
529 ++p;
530 }
531
532 return p;
533}
534
535/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000536 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +0000537 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000538 * Take care of characters with a backslash in front of it.
539 * Skip strings inside [ and ].
540 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
541 * expression and change "\?" to "?". If "*newp" is not NULL the expression
542 * is changed in-place.
543 */
544 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +0100545skip_regexp(
546 char_u *startp,
547 int dirc,
548 int magic,
549 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000550{
551 int mymagic;
552 char_u *p = startp;
553
554 if (magic)
555 mymagic = MAGIC_ON;
556 else
557 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200558 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +0000559
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100560 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000561 {
562 if (p[0] == dirc) /* found end of regexp */
563 break;
564 if ((p[0] == '[' && mymagic >= MAGIC_ON)
565 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
566 {
567 p = skip_anyof(p + 1);
568 if (p[0] == NUL)
569 break;
570 }
571 else if (p[0] == '\\' && p[1] != NUL)
572 {
573 if (dirc == '?' && newp != NULL && p[1] == '?')
574 {
575 /* change "\?" to "?", make a copy first. */
576 if (*newp == NULL)
577 {
578 *newp = vim_strsave(startp);
579 if (*newp != NULL)
580 p = *newp + (p - startp);
581 }
582 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000583 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000584 else
585 ++p;
586 }
587 else
588 ++p; /* skip next character */
589 if (*p == 'v')
590 mymagic = MAGIC_ALL;
591 else if (*p == 'V')
592 mymagic = MAGIC_NONE;
593 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000594 }
595 return p;
596}
597
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200598/*
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200599 * Functions for getting characters from the regexp input.
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +0200600 */
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200601static int prevchr_len; /* byte length of previous char */
Bram Moolenaar0270f382018-07-17 05:43:58 +0200602static int at_start; // True when on the first character
603static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +0100604
Bram Moolenaar071d4272004-06-13 20:20:40 +0000605/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200606 * Start parsing at "str".
607 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000608 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100609initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000610{
611 regparse = str;
612 prevchr_len = 0;
613 curchr = prevprevchr = prevchr = nextchr = -1;
614 at_start = TRUE;
615 prev_at_start = FALSE;
616}
617
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200618/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200619 * Save the current parse state, so that it can be restored and parsing
620 * starts in the same state again.
621 */
622 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100623save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200624{
625 ps->regparse = regparse;
626 ps->prevchr_len = prevchr_len;
627 ps->curchr = curchr;
628 ps->prevchr = prevchr;
629 ps->prevprevchr = prevprevchr;
630 ps->nextchr = nextchr;
631 ps->at_start = at_start;
632 ps->prev_at_start = prev_at_start;
633 ps->regnpar = regnpar;
634}
635
636/*
637 * Restore a previously saved parse state.
638 */
639 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100640restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200641{
642 regparse = ps->regparse;
643 prevchr_len = ps->prevchr_len;
644 curchr = ps->curchr;
645 prevchr = ps->prevchr;
646 prevprevchr = ps->prevprevchr;
647 nextchr = ps->nextchr;
648 at_start = ps->at_start;
649 prev_at_start = ps->prev_at_start;
650 regnpar = ps->regnpar;
651}
652
653
654/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200655 * Get the next character without advancing.
656 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000657 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100658peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000659{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000660 static int after_slash = FALSE;
661
Bram Moolenaar071d4272004-06-13 20:20:40 +0000662 if (curchr == -1)
663 {
664 switch (curchr = regparse[0])
665 {
666 case '.':
667 case '[':
668 case '~':
669 /* magic when 'magic' is on */
670 if (reg_magic >= MAGIC_ON)
671 curchr = Magic(curchr);
672 break;
673 case '(':
674 case ')':
675 case '{':
676 case '%':
677 case '+':
678 case '=':
679 case '?':
680 case '@':
681 case '!':
682 case '&':
683 case '|':
684 case '<':
685 case '>':
686 case '#': /* future ext. */
687 case '"': /* future ext. */
688 case '\'': /* future ext. */
689 case ',': /* future ext. */
690 case '-': /* future ext. */
691 case ':': /* future ext. */
692 case ';': /* future ext. */
693 case '`': /* future ext. */
694 case '/': /* Can't be used in / command */
695 /* magic only after "\v" */
696 if (reg_magic == MAGIC_ALL)
697 curchr = Magic(curchr);
698 break;
699 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +0000700 /* * is not magic as the very first character, eg "?*ptr", when
701 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
702 * "\(\*" is not magic, thus must be magic if "after_slash" */
703 if (reg_magic >= MAGIC_ON
704 && !at_start
705 && !(prev_at_start && prevchr == Magic('^'))
706 && (after_slash
707 || (prevchr != Magic('(')
708 && prevchr != Magic('&')
709 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000710 curchr = Magic('*');
711 break;
712 case '^':
713 /* '^' is only magic as the very first character and if it's after
714 * "\(", "\|", "\&' or "\n" */
715 if (reg_magic >= MAGIC_OFF
716 && (at_start
717 || reg_magic == MAGIC_ALL
718 || prevchr == Magic('(')
719 || prevchr == Magic('|')
720 || prevchr == Magic('&')
721 || prevchr == Magic('n')
722 || (no_Magic(prevchr) == '('
723 && prevprevchr == Magic('%'))))
724 {
725 curchr = Magic('^');
726 at_start = TRUE;
727 prev_at_start = FALSE;
728 }
729 break;
730 case '$':
731 /* '$' is only magic as the very last char and if it's in front of
732 * either "\|", "\)", "\&", or "\n" */
733 if (reg_magic >= MAGIC_OFF)
734 {
735 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200736 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000737
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200738 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000739 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200740 || p[1] == 'm' || p[1] == 'M'
741 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
742 {
743 if (p[1] == 'v')
744 is_magic_all = TRUE;
745 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
746 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000747 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200748 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000749 if (p[0] == NUL
750 || (p[0] == '\\'
751 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
752 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +0200753 || (is_magic_all
754 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000755 || reg_magic == MAGIC_ALL)
756 curchr = Magic('$');
757 }
758 break;
759 case '\\':
760 {
761 int c = regparse[1];
762
763 if (c == NUL)
764 curchr = '\\'; /* trailing '\' */
765 else if (
766#ifdef EBCDIC
767 vim_strchr(META, c)
768#else
769 c <= '~' && META_flags[c]
770#endif
771 )
772 {
773 /*
774 * META contains everything that may be magic sometimes,
775 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +0200776 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +0000777 * magicness. Therefore, \ is so meta-magic that it is
778 * not in META.
779 */
780 curchr = -1;
781 prev_at_start = at_start;
782 at_start = FALSE; /* be able to say "/\*ptr" */
783 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000784 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000785 peekchr();
786 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000787 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000788 curchr = toggle_Magic(curchr);
789 }
790 else if (vim_strchr(REGEXP_ABBR, c))
791 {
792 /*
793 * Handle abbreviations, like "\t" for TAB -- webb
794 */
795 curchr = backslash_trans(c);
796 }
797 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
798 curchr = toggle_Magic(c);
799 else
800 {
801 /*
802 * Next character can never be (made) magic?
803 * Then backslashing it won't do anything.
804 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000805 if (has_mbyte)
806 curchr = (*mb_ptr2char)(regparse + 1);
807 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000808 curchr = c;
809 }
810 break;
811 }
812
Bram Moolenaar071d4272004-06-13 20:20:40 +0000813 default:
814 if (has_mbyte)
815 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000816 }
817 }
818
819 return curchr;
820}
821
822/*
823 * Eat one lexed character. Do this in a way that we can undo it.
824 */
825 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100826skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000827{
828 /* peekchr() eats a backslash, do the same here */
829 if (*regparse == '\\')
830 prevchr_len = 1;
831 else
832 prevchr_len = 0;
833 if (regparse[prevchr_len] != NUL)
834 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000835 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +0000836 /* exclude composing chars that mb_ptr2len does include */
837 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000838 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000839 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000840 else
Bram Moolenaar071d4272004-06-13 20:20:40 +0000841 ++prevchr_len;
842 }
843 regparse += prevchr_len;
844 prev_at_start = at_start;
845 at_start = FALSE;
846 prevprevchr = prevchr;
847 prevchr = curchr;
848 curchr = nextchr; /* use previously unget char, or -1 */
849 nextchr = -1;
850}
851
852/*
853 * Skip a character while keeping the value of prev_at_start for at_start.
854 * prevchr and prevprevchr are also kept.
855 */
856 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100857skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000858{
859 int as = prev_at_start;
860 int pr = prevchr;
861 int prpr = prevprevchr;
862
863 skipchr();
864 at_start = as;
865 prevchr = pr;
866 prevprevchr = prpr;
867}
868
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200869/*
870 * Get the next character from the pattern. We know about magic and such, so
871 * therefore we need a lexical analyzer.
872 */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000873 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100874getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000875{
876 int chr = peekchr();
877
878 skipchr();
879 return chr;
880}
881
882/*
883 * put character back. Works only once!
884 */
885 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100886ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000887{
888 nextchr = curchr;
889 curchr = prevchr;
890 prevchr = prevprevchr;
891 at_start = prev_at_start;
892 prev_at_start = FALSE;
893
894 /* Backup regparse, so that it's at the same position as before the
895 * getchr(). */
896 regparse -= prevchr_len;
897}
898
899/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +0000900 * Get and return the value of the hex string at the current position.
901 * Return -1 if there is no valid hex number.
902 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000903 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000904 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000905 * The parameter controls the maximum number of input characters. This will be
906 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
907 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100908 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100909gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000910{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100911 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000912 int c;
913 int i;
914
915 for (i = 0; i < maxinputlen; ++i)
916 {
917 c = regparse[0];
918 if (!vim_isxdigit(c))
919 break;
920 nr <<= 4;
921 nr |= hex2nr(c);
922 ++regparse;
923 }
924
925 if (i == 0)
926 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100927 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000928}
929
930/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200931 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000932 * current position. Return -1 for invalid. Consumes all digits.
933 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100934 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100935getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000936{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100937 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000938 int c;
939 int i;
940
941 for (i = 0; ; ++i)
942 {
943 c = regparse[0];
944 if (c < '0' || c > '9')
945 break;
946 nr *= 10;
947 nr += c - '0';
948 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200949 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000950 }
951
952 if (i == 0)
953 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100954 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000955}
956
957/*
958 * get and return the value of the octal string immediately after the current
959 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
960 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
961 * treat 8 or 9 as recognised characters. Position is updated:
962 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000963 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000964 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100965 static long
Bram Moolenaar05540972016-01-30 20:31:25 +0100966getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000967{
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100968 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000969 int c;
970 int i;
971
972 for (i = 0; i < 3 && nr < 040; ++i)
973 {
974 c = regparse[0];
975 if (c < '0' || c > '7')
976 break;
977 nr <<= 3;
978 nr |= hex2nr(c);
979 ++regparse;
980 }
981
982 if (i == 0)
983 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100984 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000985}
986
987/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000988 * read_limits - Read two integers to be taken as a minimum and maximum.
989 * If the first character is '-', then the range is reversed.
990 * Should end with 'end'. If minval is missing, zero is default, if maxval is
991 * missing, a very big number is the default.
992 */
993 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100994read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000995{
996 int reverse = FALSE;
997 char_u *first_char;
998 long tmp;
999
1000 if (*regparse == '-')
1001 {
1002 /* Starts with '-', so reverse the range later */
1003 regparse++;
1004 reverse = TRUE;
1005 }
1006 first_char = regparse;
1007 *minval = getdigits(&regparse);
1008 if (*regparse == ',') /* There is a comma */
1009 {
1010 if (vim_isdigit(*++regparse))
1011 *maxval = getdigits(&regparse);
1012 else
1013 *maxval = MAX_LIMIT;
1014 }
1015 else if (VIM_ISDIGIT(*first_char))
1016 *maxval = *minval; /* It was \{n} or \{-n} */
1017 else
1018 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
1019 if (*regparse == '\\')
1020 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001021 if (*regparse != '}')
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001022 EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"),
1023 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001024
1025 /*
1026 * Reverse the range if there was a '-', or make sure it is in the right
1027 * order otherwise.
1028 */
1029 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
1030 {
1031 tmp = *minval;
1032 *minval = *maxval;
1033 *maxval = tmp;
1034 }
1035 skipchr(); /* let's be friends with the lexer again */
1036 return OK;
1037}
1038
1039/*
1040 * vim_regexec and friends
1041 */
1042
1043/*
1044 * Global work variables for vim_regexec().
1045 */
1046
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001047static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001048#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001049static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001050#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001051static void reg_nextline(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001052static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001053
1054/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001055 * Sometimes need to save a copy of a line. Since alloc()/free() is very
1056 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001057 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001058 */
Bram Moolenaard4210772008-01-02 14:35:30 +00001059static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001060static unsigned reg_tofreelen;
1061
1062/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001063 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001064 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00001065 * done:
1066 * single-line multi-line
1067 * reg_match &regmatch_T NULL
1068 * reg_mmatch NULL &regmmatch_T
1069 * reg_startp reg_match->startp <invalid>
1070 * reg_endp reg_match->endp <invalid>
1071 * reg_startpos <invalid> reg_mmatch->startpos
1072 * reg_endpos <invalid> reg_mmatch->endpos
1073 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01001074 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00001075 * reg_firstlnum <invalid> first line in which to search
1076 * reg_maxline 0 last line nr
1077 * reg_line_lbr FALSE or TRUE FALSE
1078 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001079typedef struct {
1080 regmatch_T *reg_match;
1081 regmmatch_T *reg_mmatch;
1082 char_u **reg_startp;
1083 char_u **reg_endp;
1084 lpos_T *reg_startpos;
1085 lpos_T *reg_endpos;
1086 win_T *reg_win;
1087 buf_T *reg_buf;
1088 linenr_T reg_firstlnum;
1089 linenr_T reg_maxline;
1090 int reg_line_lbr; /* "\n" in string is line break */
1091
Bram Moolenaar0270f382018-07-17 05:43:58 +02001092 // The current match-position is stord in these variables:
1093 linenr_T lnum; // line number, relative to first line
1094 char_u *line; // start of current line
1095 char_u *input; // current input, points into "regline"
1096
1097 int need_clear_subexpr; // subexpressions still need to be cleared
1098#ifdef FEAT_SYN_HL
1099 int need_clear_zsubexpr; // extmatch subexpressions still need to be
1100 // cleared
1101#endif
1102
Bram Moolenaar6100d022016-10-02 16:51:57 +02001103 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
1104 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
1105 * contains '\c' or '\C' the value is overruled. */
1106 int reg_ic;
1107
Bram Moolenaar0270f382018-07-17 05:43:58 +02001108 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z
Bram Moolenaar6100d022016-10-02 16:51:57 +02001109 * flag in the regexp. Defaults to false, always. */
1110 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001111
1112 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
1113 * there is no maximum. */
1114 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001115
1116 // State for the NFA engine regexec.
1117 int nfa_has_zend; // NFA regexp \ze operator encountered.
1118 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
1119 int nfa_nsubexpr; // Number of sub expressions actually being used
1120 // during execution. 1 if only the whole match
1121 // (subexpr 0) is used.
1122 // listid is global, so that it increases on recursive calls to
1123 // nfa_regmatch(), which means we don't have to clear the lastlist field of
1124 // all the states.
1125 int nfa_listid;
1126 int nfa_alt_listid;
1127
1128#ifdef FEAT_SYN_HL
1129 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
1130#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02001131} regexec_T;
1132
1133static regexec_T rex;
1134static int rex_in_use = FALSE;
1135
Bram Moolenaar071d4272004-06-13 20:20:40 +00001136/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01001137 * Return TRUE if character 'c' is included in 'iskeyword' option for
1138 * "reg_buf" buffer.
1139 */
1140 static int
1141reg_iswordc(int c)
1142{
1143 return vim_iswordc_buf(c, rex.reg_buf);
1144}
1145
1146/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001147 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1148 */
1149 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001150reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001151{
1152 /* when looking behind for a match/no-match lnum is negative. But we
1153 * can't go before line 1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001154 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001155 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001156 if (lnum > rex.reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001157 /* Must have matched the "\n" in the last line. */
1158 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02001159 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001160}
1161
Bram Moolenaar071d4272004-06-13 20:20:40 +00001162#ifdef FEAT_SYN_HL
1163static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
1164static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
1165static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
1166static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
1167#endif
1168
1169/* TRUE if using multi-line regexp. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001170#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001171
Bram Moolenaar071d4272004-06-13 20:20:40 +00001172#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00001173/*
1174 * Create a new extmatch and mark it as referenced once.
1175 */
1176 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001177make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001178{
1179 reg_extmatch_T *em;
1180
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001181 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001182 if (em != NULL)
1183 em->refcnt = 1;
1184 return em;
1185}
1186
1187/*
1188 * Add a reference to an extmatch.
1189 */
1190 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001191ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001192{
1193 if (em != NULL)
1194 em->refcnt++;
1195 return em;
1196}
1197
1198/*
1199 * Remove a reference to an extmatch. If there are no references left, free
1200 * the info.
1201 */
1202 void
Bram Moolenaar05540972016-01-30 20:31:25 +01001203unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001204{
1205 int i;
1206
1207 if (em != NULL && --em->refcnt <= 0)
1208 {
1209 for (i = 0; i < NSUBEXP; ++i)
1210 vim_free(em->matches[i]);
1211 vim_free(em);
1212 }
1213}
1214#endif
1215
1216/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001217 * Get class of previous character.
1218 */
1219 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001220reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001221{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001222 if (rex.input > rex.line)
1223 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01001224 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001225 return -1;
1226}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01001227
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001228/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001229 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001230 */
1231 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001232reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001233{
1234 pos_T top, bot;
1235 linenr_T lnum;
1236 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001237 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001238 int mode;
1239 colnr_T start, end;
1240 colnr_T start2, end2;
1241 colnr_T cols;
1242
1243 /* Check if the buffer is the current buffer. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001244 if (rex.reg_buf != curbuf || VIsual.lnum == 0)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001245 return FALSE;
1246
1247 if (VIsual_active)
1248 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001249 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001250 {
1251 top = VIsual;
1252 bot = wp->w_cursor;
1253 }
1254 else
1255 {
1256 top = wp->w_cursor;
1257 bot = VIsual;
1258 }
1259 mode = VIsual_mode;
1260 }
1261 else
1262 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001263 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001264 {
1265 top = curbuf->b_visual.vi_start;
1266 bot = curbuf->b_visual.vi_end;
1267 }
1268 else
1269 {
1270 top = curbuf->b_visual.vi_end;
1271 bot = curbuf->b_visual.vi_start;
1272 }
1273 mode = curbuf->b_visual.vi_mode;
1274 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001275 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001276 if (lnum < top.lnum || lnum > bot.lnum)
1277 return FALSE;
1278
1279 if (mode == 'v')
1280 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001281 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001282 if ((lnum == top.lnum && col < top.col)
1283 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
1284 return FALSE;
1285 }
1286 else if (mode == Ctrl_V)
1287 {
1288 getvvcol(wp, &top, &start, NULL, &end);
1289 getvvcol(wp, &bot, &start2, NULL, &end2);
1290 if (start2 < start)
1291 start = start2;
1292 if (end2 > end)
1293 end = end2;
1294 if (top.col == MAXCOL || bot.col == MAXCOL)
1295 end = MAXCOL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001296 cols = win_linetabsize(wp, rex.line, (colnr_T)(rex.input - rex.line));
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001297 if (cols < start || cols > end - (*p_sel == 'e'))
1298 return FALSE;
1299 }
1300 return TRUE;
1301}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02001302
Bram Moolenaar071d4272004-06-13 20:20:40 +00001303/*
1304 * Check the regexp program for its magic number.
1305 * Return TRUE if it's wrong.
1306 */
1307 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001308prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001309{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001310 regprog_T *prog;
1311
Bram Moolenaar6100d022016-10-02 16:51:57 +02001312 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001313 if (prog->engine == &nfa_regengine)
1314 /* For NFA matcher we don't check the magic */
1315 return FALSE;
1316
1317 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001318 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01001319 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001320 return TRUE;
1321 }
1322 return FALSE;
1323}
1324
1325/*
1326 * Cleanup the subexpressions, if this wasn't done yet.
1327 * This construction is used to clear the subexpressions only when they are
1328 * used (to increase speed).
1329 */
1330 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001331cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001332{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001333 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 {
1335 if (REG_MULTI)
1336 {
1337 /* Use 0xff to set lnum to -1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001338 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1339 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001340 }
1341 else
1342 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001343 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
1344 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001345 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001346 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001347 }
1348}
1349
1350#ifdef FEAT_SYN_HL
1351 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001352cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001353{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001354 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001355 {
1356 if (REG_MULTI)
1357 {
1358 /* Use 0xff to set lnum to -1 */
1359 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1360 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
1361 }
1362 else
1363 {
1364 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
1365 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
1366 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001367 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001368 }
1369}
1370#endif
1371
1372/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02001373 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374 */
1375 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001376reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001377{
Bram Moolenaar0270f382018-07-17 05:43:58 +02001378 rex.line = reg_getline(++rex.lnum);
1379 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001380 fast_breakcheck();
1381}
1382
1383/*
Bram Moolenaar580abea2013-06-14 20:31:28 +02001384 * Check whether a backreference matches.
1385 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001386 * If "bytelen" is not NULL, it is set to the byte length of the match in the
1387 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02001388 */
1389 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001390match_with_backref(
1391 linenr_T start_lnum,
1392 colnr_T start_col,
1393 linenr_T end_lnum,
1394 colnr_T end_col,
1395 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001396{
1397 linenr_T clnum = start_lnum;
1398 colnr_T ccol = start_col;
1399 int len;
1400 char_u *p;
1401
1402 if (bytelen != NULL)
1403 *bytelen = 0;
1404 for (;;)
1405 {
1406 /* Since getting one line may invalidate the other, need to make copy.
1407 * Slow! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02001408 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001409 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02001410 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02001411 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
1412 {
1413 len += 50; /* get some extra */
1414 vim_free(reg_tofree);
1415 reg_tofree = alloc(len);
1416 if (reg_tofree == NULL)
1417 return RA_FAIL; /* out of memory!*/
1418 reg_tofreelen = len;
1419 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02001420 STRCPY(reg_tofree, rex.line);
1421 rex.input = reg_tofree + (rex.input - rex.line);
1422 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001423 }
1424
1425 /* Get the line to compare with. */
1426 p = reg_getline(clnum);
1427 if (clnum == end_lnum)
1428 len = end_col - ccol;
1429 else
1430 len = (int)STRLEN(p + ccol);
1431
Bram Moolenaar0270f382018-07-17 05:43:58 +02001432 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001433 return RA_NOMATCH; /* doesn't match */
1434 if (bytelen != NULL)
1435 *bytelen += len;
1436 if (clnum == end_lnum)
1437 break; /* match and at end! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02001438 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar580abea2013-06-14 20:31:28 +02001439 return RA_NOMATCH; /* text too short */
1440
1441 /* Advance to next line. */
1442 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01001443 if (bytelen != NULL)
1444 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02001445 ++clnum;
1446 ccol = 0;
1447 if (got_int)
1448 return RA_FAIL;
1449 }
1450
Bram Moolenaar0270f382018-07-17 05:43:58 +02001451 /* found a match! Note that rex.line may now point to a copy of the line,
Bram Moolenaar580abea2013-06-14 20:31:28 +02001452 * that should not matter. */
1453 return RA_MATCH;
1454}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001455
Bram Moolenaarfb031402014-09-09 17:18:49 +02001456/*
1457 * Used in a place where no * or \+ can follow.
1458 */
1459 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001460re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02001461{
1462 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001463 {
1464 semsg(_("E888: (NFA regexp) cannot repeat %s"), what);
1465 rc_did_emsg = TRUE;
1466 return FAIL;
1467 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02001468 return OK;
1469}
1470
Bram Moolenaar071d4272004-06-13 20:20:40 +00001471typedef struct
1472{
1473 int a, b, c;
1474} decomp_T;
1475
1476
1477/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00001478static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00001479{
1480 {0x5e2,0,0}, /* 0xfb20 alt ayin */
1481 {0x5d0,0,0}, /* 0xfb21 alt alef */
1482 {0x5d3,0,0}, /* 0xfb22 alt dalet */
1483 {0x5d4,0,0}, /* 0xfb23 alt he */
1484 {0x5db,0,0}, /* 0xfb24 alt kaf */
1485 {0x5dc,0,0}, /* 0xfb25 alt lamed */
1486 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
1487 {0x5e8,0,0}, /* 0xfb27 alt resh */
1488 {0x5ea,0,0}, /* 0xfb28 alt tav */
1489 {'+', 0, 0}, /* 0xfb29 alt plus */
1490 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
1491 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
1492 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
1493 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
1494 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
1495 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
1496 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
1497 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
1498 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
1499 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
1500 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
1501 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
1502 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
1503 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
1504 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
1505 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
1506 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
1507 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
1508 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
1509 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
1510 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
1511 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
1512 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
1513 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
1514 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
1515 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
1516 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
1517 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
1518 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
1519 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
1520 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
1521 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
1522 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
1523 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
1524 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
1525 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
1526 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
1527 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
1528};
1529
1530 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001531mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001532{
1533 decomp_T d;
1534
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02001535 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001536 {
1537 d = decomp_table[c - 0xfb20];
1538 *c1 = d.a;
1539 *c2 = d.b;
1540 *c3 = d.c;
1541 }
1542 else
1543 {
1544 *c1 = c;
1545 *c2 = *c3 = 0;
1546 }
1547}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001548
1549/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02001550 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001551 * Return 0 if strings match, non-zero otherwise.
1552 * Correct the length "*n" when composing characters are ignored.
1553 */
1554 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001555cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001556{
1557 int result;
1558
Bram Moolenaar6100d022016-10-02 16:51:57 +02001559 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001560 result = STRNCMP(s1, s2, *n);
1561 else
1562 result = MB_STRNICMP(s1, s2, *n);
1563
Bram Moolenaar071d4272004-06-13 20:20:40 +00001564 /* if it failed and it's utf8 and we want to combineignore: */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001565 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001566 {
1567 char_u *str1, *str2;
1568 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001569 int junk;
1570
1571 /* we have to handle the strcmp ourselves, since it is necessary to
1572 * deal with the composing characters by ignoring them: */
1573 str1 = s1;
1574 str2 = s2;
1575 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00001576 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001577 {
1578 c1 = mb_ptr2char_adv(&str1);
1579 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001580
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001581 // Decompose the character if necessary, into 'base' characters.
1582 // Currently hard-coded for Hebrew, Arabic to be done...
Bram Moolenaar6100d022016-10-02 16:51:57 +02001583 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001584 {
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001585 // decomposition necessary?
Bram Moolenaar071d4272004-06-13 20:20:40 +00001586 mb_decompose(c1, &c11, &junk, &junk);
1587 mb_decompose(c2, &c12, &junk, &junk);
1588 c1 = c11;
1589 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001590 if (c11 != c12
1591 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001592 break;
1593 }
1594 }
1595 result = c2 - c1;
1596 if (result == 0)
1597 *n = (int)(str2 - s2);
1598 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001599
1600 return result;
1601}
1602
1603/*
1604 * cstrchr: This function is used a lot for simple searches, keep it fast!
1605 */
1606 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001607cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001608{
1609 char_u *p;
1610 int cc;
1611
Bram Moolenaara12a1612019-01-24 16:39:02 +01001612 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001613 return vim_strchr(s, c);
1614
1615 /* tolower() and toupper() can be slow, comparing twice should be a lot
1616 * faster (esp. when using MS Visual C++!).
1617 * For UTF-8 need to use folded case. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001618 if (enc_utf8 && c > 0x80)
1619 cc = utf_fold(c);
1620 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00001621 if (MB_ISUPPER(c))
1622 cc = MB_TOLOWER(c);
1623 else if (MB_ISLOWER(c))
1624 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001625 else
1626 return vim_strchr(s, c);
1627
Bram Moolenaar071d4272004-06-13 20:20:40 +00001628 if (has_mbyte)
1629 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001630 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001631 {
1632 if (enc_utf8 && c > 0x80)
1633 {
1634 if (utf_fold(utf_ptr2char(p)) == cc)
1635 return p;
1636 }
1637 else if (*p == c || *p == cc)
1638 return p;
1639 }
1640 }
1641 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001642 /* Faster version for when there are no multi-byte characters. */
1643 for (p = s; *p != NUL; ++p)
1644 if (*p == c || *p == cc)
1645 return p;
1646
1647 return NULL;
1648}
1649
1650/***************************************************************
1651 * regsub stuff *
1652 ***************************************************************/
1653
Bram Moolenaar071d4272004-06-13 20:20:40 +00001654/*
1655 * We should define ftpr as a pointer to a function returning a pointer to
1656 * a function returning a pointer to a function ...
1657 * This is impossible, so we declare a pointer to a function returning a
1658 * pointer to a function returning void. This should work for all compilers.
1659 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001660typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001661
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001662static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001663
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001664 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001665do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001666{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001667 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001668
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001669 return (fptr_T)NULL;
1670}
1671
1672 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001673do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001674{
1675 *d = MB_TOUPPER(c);
1676
1677 return (fptr_T)do_Upper;
1678}
1679
1680 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001681do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001682{
1683 *d = MB_TOLOWER(c);
1684
1685 return (fptr_T)NULL;
1686}
1687
1688 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01001689do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001690{
1691 *d = MB_TOLOWER(c);
1692
1693 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001694}
1695
1696/*
1697 * regtilde(): Replace tildes in the pattern by the old pattern.
1698 *
1699 * Short explanation of the tilde: It stands for the previous replacement
1700 * pattern. If that previous pattern also contains a ~ we should go back a
1701 * step further... But we insert the previous pattern into the current one
1702 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001703 * This still does not handle the case where "magic" changes. So require the
1704 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00001705 *
1706 * The tildes are parsed once before the first call to vim_regsub().
1707 */
1708 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001709regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001710{
1711 char_u *newsub = source;
1712 char_u *tmpsub;
1713 char_u *p;
1714 int len;
1715 int prevlen;
1716
1717 for (p = newsub; *p; ++p)
1718 {
1719 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
1720 {
1721 if (reg_prev_sub != NULL)
1722 {
1723 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
1724 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02001725 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001726 if (tmpsub != NULL)
1727 {
1728 /* copy prefix */
1729 len = (int)(p - newsub); /* not including ~ */
1730 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00001731 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001732 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
1733 /* copy postfix */
1734 if (!magic)
1735 ++p; /* back off \ */
1736 STRCPY(tmpsub + len + prevlen, p + 1);
1737
1738 if (newsub != source) /* already allocated newsub */
1739 vim_free(newsub);
1740 newsub = tmpsub;
1741 p = newsub + len + prevlen;
1742 }
1743 }
1744 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001745 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00001747 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001748 --p;
1749 }
1750 else
1751 {
1752 if (*p == '\\' && p[1]) /* skip escaped characters */
1753 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001754 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001755 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001756 }
1757 }
1758
1759 vim_free(reg_prev_sub);
1760 if (newsub != source) /* newsub was allocated, just keep it */
1761 reg_prev_sub = newsub;
1762 else /* no ~ found, need to save newsub */
1763 reg_prev_sub = vim_strsave(newsub);
1764 return newsub;
1765}
1766
1767#ifdef FEAT_EVAL
1768static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
1769
Bram Moolenaar6100d022016-10-02 16:51:57 +02001770/* These pointers are used for reg_submatch(). Needed for when the
1771 * substitution string is an expression that contains a call to substitute()
1772 * and submatch(). */
1773typedef struct {
1774 regmatch_T *sm_match;
1775 regmmatch_T *sm_mmatch;
1776 linenr_T sm_firstlnum;
1777 linenr_T sm_maxline;
1778 int sm_line_lbr;
1779} regsubmatch_T;
1780
1781static regsubmatch_T rsm; /* can only be used when can_f_submatch is TRUE */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001782#endif
1783
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001784#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001785
1786/*
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001787 * Put the submatches in "argv[argskip]" which is a list passed into
1788 * call_func() by vim_regsub_both().
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001789 */
1790 static int
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001791fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, int argcount)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001792{
1793 listitem_T *li;
1794 int i;
1795 char_u *s;
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001796 typval_T *listarg = argv + argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001797
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001798 if (argcount == argskip)
1799 // called function doesn't take a submatches argument
1800 return argskip;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001801
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001802 // Relies on sl_list to be the first item in staticList10_T.
1803 init_static_list((staticList10_T *)(listarg->vval.v_list));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001804
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001805 // There are always 10 list items in staticList10_T.
1806 li = listarg->vval.v_list->lv_first;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001807 for (i = 0; i < 10; ++i)
1808 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001809 s = rsm.sm_match->startp[i];
1810 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001811 s = NULL;
1812 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02001813 s = vim_strnsave(s, (int)(rsm.sm_match->endp[i] - s));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001814 li->li_tv.v_type = VAR_STRING;
1815 li->li_tv.vval.v_string = s;
1816 li = li->li_next;
1817 }
Bram Moolenaarb0745b22019-11-09 22:28:11 +01001818 return argskip + 1;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001819}
1820
1821 static void
1822clear_submatch_list(staticList10_T *sl)
1823{
1824 int i;
1825
1826 for (i = 0; i < 10; ++i)
1827 vim_free(sl->sl_items[i].li_tv.vval.v_string);
1828}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02001829#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001830
Bram Moolenaar071d4272004-06-13 20:20:40 +00001831/*
1832 * vim_regsub() - perform substitutions after a vim_regexec() or
1833 * vim_regexec_multi() match.
1834 *
1835 * If "copy" is TRUE really copy into "dest".
1836 * If "copy" is FALSE nothing is copied, this is just to find out the length
1837 * of the result.
1838 *
1839 * If "backslash" is TRUE, a backslash will be removed later, need to double
1840 * them to keep them, and insert a backslash before a CR to avoid it being
1841 * replaced with a line break later.
1842 *
1843 * Note: The matched text must not change between the call of
1844 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1845 * references invalid!
1846 *
1847 * Returns the size of the replacement, including terminating NUL.
1848 */
1849 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001850vim_regsub(
1851 regmatch_T *rmp,
1852 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001853 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001854 char_u *dest,
1855 int copy,
1856 int magic,
1857 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001858{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001859 int result;
1860 regexec_T rex_save;
1861 int rex_in_use_save = rex_in_use;
1862
1863 if (rex_in_use)
1864 /* Being called recursively, save the state. */
1865 rex_save = rex;
1866 rex_in_use = TRUE;
1867
1868 rex.reg_match = rmp;
1869 rex.reg_mmatch = NULL;
1870 rex.reg_maxline = 0;
1871 rex.reg_buf = curbuf;
1872 rex.reg_line_lbr = TRUE;
1873 result = vim_regsub_both(source, expr, dest, copy, magic, backslash);
1874
1875 rex_in_use = rex_in_use_save;
1876 if (rex_in_use)
1877 rex = rex_save;
1878
1879 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001880}
Bram Moolenaar071d4272004-06-13 20:20:40 +00001881
1882 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001883vim_regsub_multi(
1884 regmmatch_T *rmp,
1885 linenr_T lnum,
1886 char_u *source,
1887 char_u *dest,
1888 int copy,
1889 int magic,
1890 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001891{
Bram Moolenaar6100d022016-10-02 16:51:57 +02001892 int result;
1893 regexec_T rex_save;
1894 int rex_in_use_save = rex_in_use;
1895
1896 if (rex_in_use)
1897 /* Being called recursively, save the state. */
1898 rex_save = rex;
1899 rex_in_use = TRUE;
1900
1901 rex.reg_match = NULL;
1902 rex.reg_mmatch = rmp;
1903 rex.reg_buf = curbuf; /* always works on the current buffer! */
1904 rex.reg_firstlnum = lnum;
1905 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1906 rex.reg_line_lbr = FALSE;
1907 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash);
1908
1909 rex_in_use = rex_in_use_save;
1910 if (rex_in_use)
1911 rex = rex_save;
1912
1913 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001914}
1915
1916 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001917vim_regsub_both(
1918 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001919 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01001920 char_u *dest,
1921 int copy,
1922 int magic,
1923 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001924{
1925 char_u *src;
1926 char_u *dst;
1927 char_u *s;
1928 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00001929 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001930 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01001931 fptr_T func_all = (fptr_T)NULL;
1932 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001933 linenr_T clnum = 0; /* init for GCC */
1934 int len = 0; /* init for GCC */
1935#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001936 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001937#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001938
1939 /* Be paranoid... */
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001940 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001941 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01001942 emsg(_(e_null));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001943 return 0;
1944 }
1945 if (prog_magic_wrong())
1946 return 0;
1947 src = source;
1948 dst = dest;
1949
1950 /*
1951 * When the substitute part starts with "\=" evaluate it as an expression.
1952 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02001953 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001954 {
1955#ifdef FEAT_EVAL
1956 /* To make sure that the length doesn't change between checking the
1957 * length and copying the string, and to speed up things, the
1958 * resulting string is saved from the call with "copy" == FALSE to the
1959 * call with "copy" == TRUE. */
1960 if (copy)
1961 {
1962 if (eval_result != NULL)
1963 {
1964 STRCPY(dest, eval_result);
1965 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01001966 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001967 }
1968 }
1969 else
1970 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02001971 int prev_can_f_submatch = can_f_submatch;
1972 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001973
1974 vim_free(eval_result);
1975
1976 /* The expression may contain substitute(), which calls us
1977 * recursively. Make sure submatch() gets the text from the first
Bram Moolenaar6100d022016-10-02 16:51:57 +02001978 * level. */
1979 if (can_f_submatch)
1980 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001981 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001982 rsm.sm_match = rex.reg_match;
1983 rsm.sm_mmatch = rex.reg_mmatch;
1984 rsm.sm_firstlnum = rex.reg_firstlnum;
1985 rsm.sm_maxline = rex.reg_maxline;
1986 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001987
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001988 if (expr != NULL)
1989 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001990 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001991 char_u buf[NUMBUFLEN];
1992 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02001993 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02001994 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02001995
1996 rettv.v_type = VAR_STRING;
1997 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02001998 argv[0].v_type = VAR_LIST;
1999 argv[0].vval.v_list = &matchList.sl_list;
2000 matchList.sl_list.lv_len = 0;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002001 vim_memset(&funcexe, 0, sizeof(funcexe));
2002 funcexe.argv_func = fill_submatch_list;
2003 funcexe.evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002004 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002005 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002006 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002007 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002008 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002009 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002010 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002011 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002012
Bram Moolenaar6100d022016-10-02 16:51:57 +02002013 s = partial_name(partial);
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02002014 funcexe.partial = partial;
2015 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002016 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002017 if (matchList.sl_list.lv_len > 0)
2018 /* fill_submatch_list() was called */
2019 clear_submatch_list(&matchList);
2020
Bram Moolenaard155d7a2018-12-21 16:04:21 +01002021 eval_result = tv_get_string_buf_chk(&rettv, buf);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002022 if (eval_result != NULL)
2023 eval_result = vim_strsave(eval_result);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02002024 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02002025 }
2026 else
2027 eval_result = eval_to_string(source + 2, NULL, TRUE);
2028
Bram Moolenaar071d4272004-06-13 20:20:40 +00002029 if (eval_result != NULL)
2030 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01002031 int had_backslash = FALSE;
2032
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002033 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002034 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02002035 /* Change NL to CR, so that it becomes a line break,
2036 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00002037 * Skip over a backslashed character. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002038 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002039 *s = CAR;
2040 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01002041 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002042 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02002043 /* Change NL to CR here too, so that this works:
2044 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
2045 * abc\
2046 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02002047 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02002048 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002049 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02002050 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01002051 had_backslash = TRUE;
2052 }
2053 }
2054 if (had_backslash && backslash)
2055 {
2056 /* Backslashes will be consumed, need to double them. */
2057 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
2058 if (s != NULL)
2059 {
2060 vim_free(eval_result);
2061 eval_result = s;
2062 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002063 }
2064
2065 dst += STRLEN(eval_result);
2066 }
2067
Bram Moolenaar6100d022016-10-02 16:51:57 +02002068 can_f_submatch = prev_can_f_submatch;
2069 if (can_f_submatch)
2070 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002071 }
2072#endif
2073 }
2074 else
2075 while ((c = *src++) != NUL)
2076 {
2077 if (c == '&' && magic)
2078 no = 0;
2079 else if (c == '\\' && *src != NUL)
2080 {
2081 if (*src == '&' && !magic)
2082 {
2083 ++src;
2084 no = 0;
2085 }
2086 else if ('0' <= *src && *src <= '9')
2087 {
2088 no = *src++ - '0';
2089 }
2090 else if (vim_strchr((char_u *)"uUlLeE", *src))
2091 {
2092 switch (*src++)
2093 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002094 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002095 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002096 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002097 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002098 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002099 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002100 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002101 continue;
2102 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002103 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002104 continue;
2105 }
2106 }
2107 }
2108 if (no < 0) /* Ordinary character. */
2109 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00002110 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2111 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002112 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00002113 if (copy)
2114 {
2115 *dst++ = c;
2116 *dst++ = *src++;
2117 *dst++ = *src++;
2118 }
2119 else
2120 {
2121 dst += 3;
2122 src += 2;
2123 }
2124 continue;
2125 }
2126
Bram Moolenaar071d4272004-06-13 20:20:40 +00002127 if (c == '\\' && *src != NUL)
2128 {
2129 /* Check for abbreviations -- webb */
2130 switch (*src)
2131 {
2132 case 'r': c = CAR; ++src; break;
2133 case 'n': c = NL; ++src; break;
2134 case 't': c = TAB; ++src; break;
2135 /* Oh no! \e already has meaning in subst pat :-( */
2136 /* case 'e': c = ESC; ++src; break; */
2137 case 'b': c = Ctrl_H; ++src; break;
2138
2139 /* If "backslash" is TRUE the backslash will be removed
2140 * later. Used to insert a literal CR. */
2141 default: if (backslash)
2142 {
2143 if (copy)
2144 *dst = '\\';
2145 ++dst;
2146 }
2147 c = *src++;
2148 }
2149 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00002150 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002151 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002152
Bram Moolenaardb552d602006-03-23 22:59:57 +00002153 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002154 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002155 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002156 func_one = (fptr_T)(func_one(&cc, c));
2157 else if (func_all != (fptr_T)NULL)
2158 /* Turbo C complains without the typecast */
2159 func_all = (fptr_T)(func_all(&cc, c));
2160 else /* just copy */
2161 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002162
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002163 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002164 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002165 int totlen = mb_ptr2len(src - 1);
2166
Bram Moolenaar071d4272004-06-13 20:20:40 +00002167 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002168 mb_char2bytes(cc, dst);
2169 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02002170 if (enc_utf8)
2171 {
2172 int clen = utf_ptr2len(src - 1);
2173
2174 /* If the character length is shorter than "totlen", there
2175 * are composing characters; copy them as-is. */
2176 if (clen < totlen)
2177 {
2178 if (copy)
2179 mch_memmove(dst + 1, src - 1 + clen,
2180 (size_t)(totlen - clen));
2181 dst += totlen - clen;
2182 }
2183 }
2184 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002185 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002186 else if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002187 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002188 dst++;
2189 }
2190 else
2191 {
2192 if (REG_MULTI)
2193 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002194 clnum = rex.reg_mmatch->startpos[no].lnum;
2195 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002196 s = NULL;
2197 else
2198 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002199 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2200 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2201 len = rex.reg_mmatch->endpos[no].col
2202 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002203 else
2204 len = (int)STRLEN(s);
2205 }
2206 }
2207 else
2208 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002209 s = rex.reg_match->startp[no];
2210 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002211 s = NULL;
2212 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002213 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002214 }
2215 if (s != NULL)
2216 {
2217 for (;;)
2218 {
2219 if (len == 0)
2220 {
2221 if (REG_MULTI)
2222 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002223 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002224 break;
2225 if (copy)
2226 *dst = CAR;
2227 ++dst;
2228 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02002229 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2230 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002231 else
2232 len = (int)STRLEN(s);
2233 }
2234 else
2235 break;
2236 }
2237 else if (*s == NUL) /* we hit NUL. */
2238 {
2239 if (copy)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002240 emsg(_(e_re_damg));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002241 goto exit;
2242 }
2243 else
2244 {
2245 if (backslash && (*s == CAR || *s == '\\'))
2246 {
2247 /*
2248 * Insert a backslash in front of a CR, otherwise
2249 * it will be replaced by a line break.
2250 * Number of backslashes will be halved later,
2251 * double them here.
2252 */
2253 if (copy)
2254 {
2255 dst[0] = '\\';
2256 dst[1] = *s;
2257 }
2258 dst += 2;
2259 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002260 else
2261 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002262 if (has_mbyte)
2263 c = mb_ptr2char(s);
2264 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002265 c = *s;
2266
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002267 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002268 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01002269 func_one = (fptr_T)(func_one(&cc, c));
2270 else if (func_all != (fptr_T)NULL)
2271 /* Turbo C complains without the typecast */
2272 func_all = (fptr_T)(func_all(&cc, c));
2273 else /* just copy */
2274 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002275
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002276 if (has_mbyte)
2277 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00002278 int l;
2279
2280 /* Copy composing characters separately, one
2281 * at a time. */
2282 if (enc_utf8)
2283 l = utf_ptr2len(s) - 1;
2284 else
2285 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002286
2287 s += l;
2288 len -= l;
2289 if (copy)
2290 mb_char2bytes(cc, dst);
2291 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002292 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01002293 else if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002294 *dst = cc;
2295 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002296 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00002297
Bram Moolenaar071d4272004-06-13 20:20:40 +00002298 ++s;
2299 --len;
2300 }
2301 }
2302 }
2303 no = -1;
2304 }
2305 }
2306 if (copy)
2307 *dst = NUL;
2308
2309exit:
2310 return (int)((dst - dest) + 1);
2311}
2312
2313#ifdef FEAT_EVAL
2314/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002315 * Call reg_getline() with the line numbers from the submatch. If a
2316 * substitute() was used the reg_maxline and other values have been
2317 * overwritten.
2318 */
2319 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002320reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002321{
2322 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002323 linenr_T save_first = rex.reg_firstlnum;
2324 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002325
Bram Moolenaar6100d022016-10-02 16:51:57 +02002326 rex.reg_firstlnum = rsm.sm_firstlnum;
2327 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002328
2329 s = reg_getline(lnum);
2330
Bram Moolenaar6100d022016-10-02 16:51:57 +02002331 rex.reg_firstlnum = save_first;
2332 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002333 return s;
2334}
2335
2336/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00002337 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00002338 * allocated memory.
2339 * Returns NULL when not in a ":s" command and for a non-existing submatch.
2340 */
2341 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002342reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002343{
2344 char_u *retval = NULL;
2345 char_u *s;
2346 int len;
2347 int round;
2348 linenr_T lnum;
2349
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002350 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002351 return NULL;
2352
Bram Moolenaar6100d022016-10-02 16:51:57 +02002353 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002354 {
2355 /*
2356 * First round: compute the length and allocate memory.
2357 * Second round: copy the text.
2358 */
2359 for (round = 1; round <= 2; ++round)
2360 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002361 lnum = rsm.sm_mmatch->startpos[no].lnum;
2362 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002363 return NULL;
2364
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002365 s = reg_getline_submatch(lnum);
2366 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00002367 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01002368 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002369 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002370 {
2371 /* Within one line: take form start to end col. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02002372 len = rsm.sm_mmatch->endpos[no].col
2373 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002374 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00002375 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002376 ++len;
2377 }
2378 else
2379 {
2380 /* Multiple lines: take start line from start col, middle
2381 * lines completely and end line up to end col. */
2382 len = (int)STRLEN(s);
2383 if (round == 2)
2384 {
2385 STRCPY(retval, s);
2386 retval[len] = '\n';
2387 }
2388 ++len;
2389 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02002390 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002391 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002392 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002393 if (round == 2)
2394 STRCPY(retval + len, s);
2395 len += (int)STRLEN(s);
2396 if (round == 2)
2397 retval[len] = '\n';
2398 ++len;
2399 }
2400 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00002401 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02002402 rsm.sm_mmatch->endpos[no].col);
2403 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002404 if (round == 2)
2405 retval[len] = NUL;
2406 ++len;
2407 }
2408
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002409 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002410 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02002411 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00002412 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002413 return NULL;
2414 }
2415 }
2416 }
2417 else
2418 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002419 s = rsm.sm_match->startp[no];
2420 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002421 retval = NULL;
2422 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02002423 retval = vim_strnsave(s, (int)(rsm.sm_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002424 }
2425
2426 return retval;
2427}
Bram Moolenaar41571762014-04-02 19:00:58 +02002428
2429/*
2430 * Used for the submatch() function with the optional non-zero argument: get
2431 * the list of strings from the n'th submatch in allocated memory with NULs
2432 * represented in NLs.
2433 * Returns a list of allocated strings. Returns NULL when not in a ":s"
2434 * command, for a non-existing submatch and for any error.
2435 */
2436 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002437reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02002438{
2439 char_u *s;
2440 linenr_T slnum;
2441 linenr_T elnum;
2442 colnr_T scol;
2443 colnr_T ecol;
2444 int i;
2445 list_T *list;
2446 int error = FALSE;
2447
2448 if (!can_f_submatch || no < 0)
2449 return NULL;
2450
Bram Moolenaar6100d022016-10-02 16:51:57 +02002451 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002452 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002453 slnum = rsm.sm_mmatch->startpos[no].lnum;
2454 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02002455 if (slnum < 0 || elnum < 0)
2456 return NULL;
2457
Bram Moolenaar6100d022016-10-02 16:51:57 +02002458 scol = rsm.sm_mmatch->startpos[no].col;
2459 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02002460
2461 list = list_alloc();
2462 if (list == NULL)
2463 return NULL;
2464
2465 s = reg_getline_submatch(slnum) + scol;
2466 if (slnum == elnum)
2467 {
2468 if (list_append_string(list, s, ecol - scol) == FAIL)
2469 error = TRUE;
2470 }
2471 else
2472 {
2473 if (list_append_string(list, s, -1) == FAIL)
2474 error = TRUE;
2475 for (i = 1; i < elnum - slnum; i++)
2476 {
2477 s = reg_getline_submatch(slnum + i);
2478 if (list_append_string(list, s, -1) == FAIL)
2479 error = TRUE;
2480 }
2481 s = reg_getline_submatch(elnum);
2482 if (list_append_string(list, s, ecol) == FAIL)
2483 error = TRUE;
2484 }
2485 }
2486 else
2487 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02002488 s = rsm.sm_match->startp[no];
2489 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002490 return NULL;
2491 list = list_alloc();
2492 if (list == NULL)
2493 return NULL;
2494 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02002495 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02002496 error = TRUE;
2497 }
2498
2499 if (error)
2500 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02002501 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02002502 return NULL;
2503 }
2504 return list;
2505}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002506#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002507
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002508#include "regexp_bt.c"
2509
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002510static regengine_T bt_regengine =
2511{
2512 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002513 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002514 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002515 bt_regexec_multi,
2516 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002517};
2518
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002519#include "regexp_nfa.c"
2520
2521static regengine_T nfa_regengine =
2522{
2523 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02002524 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002525 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01002526 nfa_regexec_multi,
2527 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002528};
2529
2530/* Which regexp engine to use? Needed for vim_regcomp().
2531 * Must match with 'regexpengine'. */
2532static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002533
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002534#ifdef DEBUG
2535static char_u regname[][30] = {
2536 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002537 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002538 "NFA Regexp Engine"
2539 };
2540#endif
2541
2542/*
2543 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02002544 * Returns the program in allocated memory.
2545 * Use vim_regfree() to free the memory.
2546 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002547 */
2548 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01002549vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002550{
2551 regprog_T *prog = NULL;
2552 char_u *expr = expr_arg;
Bram Moolenaarcd625122019-02-22 17:29:43 +01002553 int save_called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002554
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002555 regexp_engine = p_re;
2556
2557 /* Check for prefix "\%#=", that sets the regexp engine */
2558 if (STRNCMP(expr, "\\%#=", 4) == 0)
2559 {
2560 int newengine = expr[4] - '0';
2561
2562 if (newengine == AUTOMATIC_ENGINE
2563 || newengine == BACKTRACKING_ENGINE
2564 || newengine == NFA_ENGINE)
2565 {
2566 regexp_engine = expr[4] - '0';
2567 expr += 5;
2568#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002569 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02002570 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002571#endif
2572 }
2573 else
2574 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002575 emsg(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002576 regexp_engine = AUTOMATIC_ENGINE;
2577 }
2578 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02002579#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002580 bt_regengine.expr = expr;
2581 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002582#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01002583 // reg_iswordc() uses rex.reg_buf
2584 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002585
2586 /*
2587 * First try the NFA engine, unless backtracking was requested.
2588 */
Bram Moolenaarcd625122019-02-22 17:29:43 +01002589 save_called_emsg = called_emsg;
2590 called_emsg = FALSE;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002591 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01002592 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002593 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002594 else
2595 prog = bt_regengine.regcomp(expr, re_flags);
2596
Bram Moolenaarfda37292014-11-05 14:27:36 +01002597 /* Check for error compiling regexp with initial engine. */
2598 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002599 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002600#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002601 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
2602 {
2603 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02002604 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002605 if (f)
2606 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002607 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002608 fclose(f);
2609 }
2610 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002611 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01002612 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002613 }
2614#endif
2615 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01002616 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002617 * The NFA engine also fails for patterns that it can't handle well
2618 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01002619 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002620 */
Bram Moolenaarcd625122019-02-22 17:29:43 +01002621 if (regexp_engine == AUTOMATIC_ENGINE && !called_emsg)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002622 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01002623 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002624 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002625 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02002626 }
Bram Moolenaarcd625122019-02-22 17:29:43 +01002627 called_emsg |= save_called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002628
Bram Moolenaarfda37292014-11-05 14:27:36 +01002629 if (prog != NULL)
2630 {
2631 /* Store the info needed to call regcomp() again when the engine turns
2632 * out to be very slow when executing it. */
2633 prog->re_engine = regexp_engine;
2634 prog->re_flags = re_flags;
2635 }
2636
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002637 return prog;
2638}
2639
2640/*
Bram Moolenaar473de612013-06-08 18:19:48 +02002641 * Free a compiled regexp program, returned by vim_regcomp().
2642 */
2643 void
Bram Moolenaar05540972016-01-30 20:31:25 +01002644vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02002645{
2646 if (prog != NULL)
2647 prog->engine->regfree(prog);
2648}
2649
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002650#if defined(EXITFREE) || defined(PROTO)
2651 void
2652free_regexp_stuff(void)
2653{
2654 ga_clear(&regstack);
2655 ga_clear(&backpos);
2656 vim_free(reg_tofree);
2657 vim_free(reg_prev_sub);
2658}
2659#endif
2660
Bram Moolenaarfda37292014-11-05 14:27:36 +01002661#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01002662 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002663report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002664{
2665 if (p_verbose > 0)
2666 {
2667 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01002668 msg_puts(_("Switching to backtracking RE engine for pattern: "));
2669 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01002670 verbose_leave();
2671 }
2672}
2673#endif
2674
Bram Moolenaar113e1072019-01-20 15:30:40 +01002675#if (defined(FEAT_X11) && (defined(FEAT_TITLE) || defined(FEAT_XCLIPBOARD))) \
2676 || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02002677/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002678 * Return whether "prog" is currently being executed.
2679 */
2680 int
2681regprog_in_use(regprog_T *prog)
2682{
2683 return prog->re_in_use;
2684}
Bram Moolenaar113e1072019-01-20 15:30:40 +01002685#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01002686
2687/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002688 * Match a regexp against a string.
2689 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002690 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002691 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01002692 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002693 *
2694 * Return TRUE if there is a match, FALSE if not.
2695 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01002696 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002697vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01002698 regmatch_T *rmp,
2699 char_u *line, /* string to match against */
2700 colnr_T col, /* column to start looking for match */
2701 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01002702{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002703 int result;
2704 regexec_T rex_save;
2705 int rex_in_use_save = rex_in_use;
2706
Bram Moolenaar0270f382018-07-17 05:43:58 +02002707 // Cannot use the same prog recursively, it contains state.
2708 if (rmp->regprog->re_in_use)
2709 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002710 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002711 return FALSE;
2712 }
2713 rmp->regprog->re_in_use = TRUE;
2714
Bram Moolenaar6100d022016-10-02 16:51:57 +02002715 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02002716 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02002717 rex_save = rex;
2718 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02002719
Bram Moolenaar6100d022016-10-02 16:51:57 +02002720 rex.reg_startp = NULL;
2721 rex.reg_endp = NULL;
2722 rex.reg_startpos = NULL;
2723 rex.reg_endpos = NULL;
2724
2725 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002726 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002727
2728 /* NFA engine aborted because it's very slow. */
2729 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2730 && result == NFA_TOO_EXPENSIVE)
2731 {
2732 int save_p_re = p_re;
2733 int re_flags = rmp->regprog->re_flags;
2734 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2735
2736 p_re = BACKTRACKING_ENGINE;
2737 vim_regfree(rmp->regprog);
2738 if (pat != NULL)
2739 {
2740#ifdef FEAT_EVAL
2741 report_re_switch(pat);
2742#endif
2743 rmp->regprog = vim_regcomp(pat, re_flags);
2744 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002745 {
2746 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002747 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02002748 rmp->regprog->re_in_use = FALSE;
2749 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002750 vim_free(pat);
2751 }
2752
2753 p_re = save_p_re;
2754 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02002755
2756 rex_in_use = rex_in_use_save;
2757 if (rex_in_use)
2758 rex = rex_save;
2759
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002760 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002761}
2762
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002763/*
2764 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002765 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002766 */
2767 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002768vim_regexec_prog(
2769 regprog_T **prog,
2770 int ignore_case,
2771 char_u *line,
2772 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002773{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002774 int r;
2775 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002776
2777 regmatch.regprog = *prog;
2778 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002779 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002780 *prog = regmatch.regprog;
2781 return r;
2782}
2783
2784/*
2785 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002786 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002787 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002788 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002789vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002790{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002791 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002792}
2793
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002794/*
2795 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01002796 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002797 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002798 */
2799 int
Bram Moolenaar05540972016-01-30 20:31:25 +01002800vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002801{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02002802 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002803}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002804
2805/*
2806 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002807 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2808 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002809 * Uses curbuf for line count and 'iskeyword'.
2810 *
2811 * Return zero if there is no match. Return number of lines contained in the
2812 * match otherwise.
2813 */
2814 long
Bram Moolenaar05540972016-01-30 20:31:25 +01002815vim_regexec_multi(
2816 regmmatch_T *rmp,
Bram Moolenaard23a8232018-02-10 18:45:26 +01002817 win_T *win, /* window in which to search or NULL */
2818 buf_T *buf, /* buffer in which to search */
2819 linenr_T lnum, /* nr of line to start looking for match */
2820 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002821 proftime_T *tm, /* timeout limit or NULL */
2822 int *timed_out) /* flag is set when timeout limit reached */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002823{
Bram Moolenaar6100d022016-10-02 16:51:57 +02002824 int result;
2825 regexec_T rex_save;
2826 int rex_in_use_save = rex_in_use;
2827
Bram Moolenaar0270f382018-07-17 05:43:58 +02002828 // Cannot use the same prog recursively, it contains state.
2829 if (rmp->regprog->re_in_use)
2830 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002831 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02002832 return FALSE;
2833 }
2834 rmp->regprog->re_in_use = TRUE;
2835
Bram Moolenaar6100d022016-10-02 16:51:57 +02002836 if (rex_in_use)
2837 /* Being called recursively, save the state. */
2838 rex_save = rex;
2839 rex_in_use = TRUE;
2840
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002841 result = rmp->regprog->engine->regexec_multi(
2842 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002843 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002844
2845 /* NFA engine aborted because it's very slow. */
2846 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
2847 && result == NFA_TOO_EXPENSIVE)
2848 {
2849 int save_p_re = p_re;
2850 int re_flags = rmp->regprog->re_flags;
2851 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
2852
2853 p_re = BACKTRACKING_ENGINE;
2854 vim_regfree(rmp->regprog);
2855 if (pat != NULL)
2856 {
2857#ifdef FEAT_EVAL
2858 report_re_switch(pat);
2859#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002860#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002861 // checking for \z misuse was already done when compiling for NFA,
2862 // allow all here
2863 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002864#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01002865 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002866#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002867 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02002868#endif
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002869
Bram Moolenaarfda37292014-11-05 14:27:36 +01002870 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02002871 {
2872 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01002873 result = rmp->regprog->engine->regexec_multi(
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02002874 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02002875 rmp->regprog->re_in_use = FALSE;
2876 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01002877 vim_free(pat);
2878 }
2879 p_re = save_p_re;
2880 }
2881
Bram Moolenaar6100d022016-10-02 16:51:57 +02002882 rex_in_use = rex_in_use_save;
2883 if (rex_in_use)
2884 rex = rex_save;
2885
Bram Moolenaar66a3e792014-11-20 23:07:05 +01002886 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002887}