blob: c21e0e62f02aa7594b52692010f069146178d7fa [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarc2d09c92019-04-25 20:07:51 +020041// By default: do not create debugging logs or files related to regular
42// expressions, even when compiling with -DDEBUG.
43// Uncomment the second line to get the regexp debugging.
44#undef DEBUG
45// #define DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020046
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100259no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260{
261 if (is_Magic(x))
262 return un_Magic(x);
263 return x;
264}
265
266 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100267toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268{
269 if (is_Magic(x))
270 return un_Magic(x);
271 return Magic(x);
272}
273
274/*
275 * The first byte of the regexp internal "program" is actually this magic
276 * number; the start node begins in the second byte. It's used to catch the
277 * most severe mutilation of the program by the caller.
278 */
279
280#define REGMAGIC 0234
281
282/*
283 * Opcode notes:
284 *
285 * BRANCH The set of branches constituting a single choice are hooked
286 * together with their "next" pointers, since precedence prevents
287 * anything being concatenated to any individual branch. The
288 * "next" pointer of the last BRANCH in a choice points to the
289 * thing following the whole choice. This is also where the
290 * final "next" pointer of each individual branch points; each
291 * branch starts with the operand node of a BRANCH node.
292 *
293 * BACK Normal "next" pointers all implicitly point forward; BACK
294 * exists to make loop structures possible.
295 *
296 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
297 * BRANCH structures using BACK. Simple cases (one character
298 * per match) are implemented with STAR and PLUS for speed
299 * and to minimize recursive plunges.
300 *
301 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
302 * node, and defines the min and max limits to be used for that
303 * node.
304 *
305 * MOPEN,MCLOSE ...are numbered at compile time.
306 * ZOPEN,ZCLOSE ...ditto
307 */
308
309/*
310 * A node is one char of opcode followed by two chars of "next" pointer.
311 * "Next" pointers are stored as two 8-bit bytes, high order first. The
312 * value is a positive offset from the opcode of the node containing it.
313 * An operand, if any, simply follows the node. (Note that much of the
314 * code generation knows about this implicit relationship.)
315 *
316 * Using two bytes for the "next" pointer is vast overkill for most things,
317 * but allows patterns to get big without disasters.
318 */
319#define OP(p) ((int)*(p))
320#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
321#define OPERAND(p) ((p) + 3)
322/* Obtain an operand that was stored as four bytes, MSB first. */
323#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
324 + ((long)(p)[5] << 8) + (long)(p)[6])
325/* Obtain a second operand stored as four bytes. */
326#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
327/* Obtain a second single-byte operand stored after a four bytes operand. */
328#define OPERAND_CMP(p) (p)[7]
329
330/*
331 * Utility definitions.
332 */
333#define UCHARAT(p) ((int)*(char_u *)(p))
334
335/* Used for an error (down from) vim_regcomp(): give the error message, set
336 * rc_did_emsg and return NULL */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100337#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
338#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
339#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
340#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +0100341#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100342#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200343#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344
Bram Moolenaar95f09602016-11-10 20:01:45 +0100345
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346#define MAX_LIMIT (32767L << 16L)
347
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100348static int cstrncmp(char_u *s1, char_u *s2, int *n);
349static char_u *cstrchr(char_u *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200351#ifdef BT_REGEXP_DUMP
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100352static void regdump(char_u *, bt_regprog_T *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200353#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354#ifdef DEBUG
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100355static char_u *regprop(char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356#endif
357
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static int re_mult_next(char *what);
Bram Moolenaarfb031402014-09-09 17:18:49 +0200359
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200360static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200361static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
362static char_u e_large_class[] = N_("E945: Range too large in character class");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200363static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
364static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
365static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200366#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200367static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
Bram Moolenaarbcf94422018-06-23 14:21:42 +0200368static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200369#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200370static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200371static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar0270f382018-07-17 05:43:58 +0200372static char_u e_recursive[] = N_("E956: Cannot use pattern recursively");
373
Bram Moolenaar071d4272004-06-13 20:20:40 +0000374#define NOT_MULTI 0
375#define MULTI_ONE 1
376#define MULTI_MULT 2
377/*
378 * Return NOT_MULTI if c is not a "multi" operator.
379 * Return MULTI_ONE if c is a single "multi" operator.
380 * Return MULTI_MULT if c is a multi "multi" operator.
381 */
382 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100383re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384{
385 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
386 return MULTI_ONE;
387 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
388 return MULTI_MULT;
389 return NOT_MULTI;
390}
391
392/*
393 * Flags to be passed up and down.
394 */
395#define HASWIDTH 0x1 /* Known never to match null string. */
396#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
397#define SPSTART 0x4 /* Starts with * or +. */
398#define HASNL 0x8 /* Contains some \n. */
399#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
400#define WORST 0 /* Worst case. */
401
402/*
403 * When regcode is set to this value, code is not emitted and size is computed
404 * instead.
405 */
406#define JUST_CALC_SIZE ((char_u *) -1)
407
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000408static char_u *reg_prev_sub = NULL;
409
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410/*
411 * REGEXP_INRANGE contains all characters which are always special in a []
412 * range after '\'.
413 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
414 * These are:
415 * \n - New line (NL).
416 * \r - Carriage Return (CR).
417 * \t - Tab (TAB).
418 * \e - Escape (ESC).
419 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000420 * \d - Character code in decimal, eg \d123
421 * \o - Character code in octal, eg \o80
422 * \x - Character code in hex, eg \x4a
423 * \u - Multibyte character code, eg \u20ac
424 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000425 */
426static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000427static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428
Bram Moolenaar071d4272004-06-13 20:20:40 +0000429/*
430 * Translate '\x' to its control character, except "\n", which is Magic.
431 */
432 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100433backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434{
435 switch (c)
436 {
437 case 'r': return CAR;
438 case 't': return TAB;
439 case 'e': return ESC;
440 case 'b': return BS;
441 }
442 return c;
443}
444
445/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000446 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000447 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
448 * recognized. Otherwise "pp" is advanced to after the item.
449 */
450 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100451get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000452{
453 static const char *(class_names[]) =
454 {
455 "alnum:]",
456#define CLASS_ALNUM 0
457 "alpha:]",
458#define CLASS_ALPHA 1
459 "blank:]",
460#define CLASS_BLANK 2
461 "cntrl:]",
462#define CLASS_CNTRL 3
463 "digit:]",
464#define CLASS_DIGIT 4
465 "graph:]",
466#define CLASS_GRAPH 5
467 "lower:]",
468#define CLASS_LOWER 6
469 "print:]",
470#define CLASS_PRINT 7
471 "punct:]",
472#define CLASS_PUNCT 8
473 "space:]",
474#define CLASS_SPACE 9
475 "upper:]",
476#define CLASS_UPPER 10
477 "xdigit:]",
478#define CLASS_XDIGIT 11
479 "tab:]",
480#define CLASS_TAB 12
481 "return:]",
482#define CLASS_RETURN 13
483 "backspace:]",
484#define CLASS_BACKSPACE 14
485 "escape:]",
486#define CLASS_ESCAPE 15
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100487 "ident:]",
488#define CLASS_IDENT 16
489 "keyword:]",
490#define CLASS_KEYWORD 17
491 "fname:]",
492#define CLASS_FNAME 18
Bram Moolenaar071d4272004-06-13 20:20:40 +0000493 };
494#define CLASS_NONE 99
495 int i;
496
497 if ((*pp)[1] == ':')
498 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000499 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
501 {
502 *pp += STRLEN(class_names[i]) + 2;
503 return i;
504 }
505 }
506 return CLASS_NONE;
507}
508
509/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000510 * Specific version of character class functions.
511 * Using a table to keep this fast.
512 */
513static short class_tab[256];
514
515#define RI_DIGIT 0x01
516#define RI_HEX 0x02
517#define RI_OCTAL 0x04
518#define RI_WORD 0x08
519#define RI_HEAD 0x10
520#define RI_ALPHA 0x20
521#define RI_LOWER 0x40
522#define RI_UPPER 0x80
523#define RI_WHITE 0x100
524
525 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100526init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000527{
528 int i;
529 static int done = FALSE;
530
531 if (done)
532 return;
533
534 for (i = 0; i < 256; ++i)
535 {
536 if (i >= '0' && i <= '7')
537 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
538 else if (i >= '8' && i <= '9')
539 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
540 else if (i >= 'a' && i <= 'f')
541 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
542#ifdef EBCDIC
543 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
544 || (i >= 's' && i <= 'z'))
545#else
546 else if (i >= 'g' && i <= 'z')
547#endif
548 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
549 else if (i >= 'A' && i <= 'F')
550 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
551#ifdef EBCDIC
552 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
553 || (i >= 'S' && i <= 'Z'))
554#else
555 else if (i >= 'G' && i <= 'Z')
556#endif
557 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
558 else if (i == '_')
559 class_tab[i] = RI_WORD + RI_HEAD;
560 else
561 class_tab[i] = 0;
562 }
563 class_tab[' '] |= RI_WHITE;
564 class_tab['\t'] |= RI_WHITE;
565 done = TRUE;
566}
567
Bram Moolenaara12a1612019-01-24 16:39:02 +0100568#define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
569#define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
570#define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
571#define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
572#define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
573#define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
574#define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
575#define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
576#define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000577
578/* flags for regflags */
579#define RF_ICASE 1 /* ignore case */
580#define RF_NOICASE 2 /* don't ignore case */
581#define RF_HASNL 4 /* can match a NL */
582#define RF_ICOMBINE 8 /* ignore combining characters */
583#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
584
585/*
586 * Global work variables for vim_regcomp().
587 */
588
589static char_u *regparse; /* Input-scan pointer. */
590static int prevchr_len; /* byte length of previous char */
591static int num_complex_braces; /* Complex \{...} count */
592static int regnpar; /* () count. */
593#ifdef FEAT_SYN_HL
594static int regnzpar; /* \z() count. */
595static int re_has_z; /* \z item detected */
596#endif
597static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
598static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000599static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000600static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
601static unsigned regflags; /* RF_ flags for prog */
602static long brace_min[10]; /* Minimums for complex brace repeats */
603static long brace_max[10]; /* Maximums for complex brace repeats */
604static int brace_count[10]; /* Current counts for complex brace repeats */
605#if defined(FEAT_SYN_HL) || defined(PROTO)
606static int had_eol; /* TRUE when EOL found by vim_regcomp() */
607#endif
608static int one_exactly = FALSE; /* only do one char for EXACTLY */
609
610static int reg_magic; /* magicness of the pattern: */
611#define MAGIC_NONE 1 /* "\V" very unmagic */
612#define MAGIC_OFF 2 /* "\M" or 'magic' off */
613#define MAGIC_ON 3 /* "\m" or 'magic' */
614#define MAGIC_ALL 4 /* "\v" very magic */
615
616static int reg_string; /* matching with a string instead of a buffer
617 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000618static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000619
620/*
621 * META contains all characters that may be magic, except '^' and '$'.
622 */
623
624#ifdef EBCDIC
625static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
626#else
627/* META[] is used often enough to justify turning it into a table. */
628static char_u META_flags[] = {
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631/* % & ( ) * + . */
632 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
633/* 1 2 3 4 5 6 7 8 9 < = > ? */
634 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
635/* @ A C D F H I K L M O */
636 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
637/* P S U V W X Z [ _ */
638 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
639/* a c d f h i k l m n o */
640 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
641/* p s u v w x z { | ~ */
642 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
643};
644#endif
645
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200646static int curchr; /* currently parsed character */
647/* Previous character. Note: prevchr is sometimes -1 when we are not at the
648 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
649 * because ^ was taken to be magic -- webb */
650static int prevchr;
651static int prevprevchr; /* previous-previous character */
652static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000653
654/* arguments for reg() */
655#define REG_NOPAREN 0 /* toplevel reg() */
656#define REG_PAREN 1 /* \(\) */
657#define REG_ZPAREN 2 /* \z(\) */
658#define REG_NPAREN 3 /* \%(\) */
659
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200660typedef struct
661{
662 char_u *regparse;
663 int prevchr_len;
664 int curchr;
665 int prevchr;
666 int prevprevchr;
667 int nextchr;
668 int at_start;
669 int prev_at_start;
670 int regnpar;
671} parse_state_T;
672
Bram Moolenaar071d4272004-06-13 20:20:40 +0000673/*
674 * Forward declarations for vim_regcomp()'s friends.
675 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100676static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100677static int getchr(void);
678static void skipchr_keepstart(void);
679static int peekchr(void);
680static void skipchr(void);
681static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100682static long gethexchrs(int maxinputlen);
683static long getoctchrs(void);
684static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100685static int coll_get_char(void);
686static void regcomp_start(char_u *expr, int flags);
687static char_u *reg(int, int *);
688static char_u *regbranch(int *flagp);
689static char_u *regconcat(int *flagp);
690static char_u *regpiece(int *);
691static char_u *regatom(int *);
692static char_u *regnode(int);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100693static int use_multibytecode(int c);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100694static int prog_magic_wrong(void);
695static char_u *regnext(char_u *);
696static void regc(int b);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100697static void regmbc(int c);
Bram Moolenaara12a1612019-01-24 16:39:02 +0100698#define REGMBC(x) regmbc(x);
699#define CASEMBC(x) case x:
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100700static void reginsert(int, char_u *);
701static void reginsert_nr(int op, long val, char_u *opnd);
702static void reginsert_limits(int, long, long, char_u *);
703static char_u *re_put_long(char_u *pr, long_u val);
704static int read_limits(long *, long *);
705static void regtail(char_u *, char_u *);
706static void regoptail(char_u *, char_u *);
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100707static int reg_iswordc(int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000708
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200709static regengine_T bt_regengine;
710static regengine_T nfa_regengine;
711
Bram Moolenaar071d4272004-06-13 20:20:40 +0000712/*
713 * Return TRUE if compiled regular expression "prog" can match a line break.
714 */
715 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100716re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717{
718 return (prog->regflags & RF_HASNL);
719}
720
721/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000722 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
723 * Returns a character representing the class. Zero means that no item was
724 * recognized. Otherwise "pp" is advanced to after the item.
725 */
726 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100727get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000728{
729 int c;
730 int l = 1;
731 char_u *p = *pp;
732
Bram Moolenaar985079c2019-02-16 17:07:47 +0100733 if (p[1] == '=' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000734 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000735 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000736 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000737 if (p[l + 2] == '=' && p[l + 3] == ']')
738 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000739 if (has_mbyte)
740 c = mb_ptr2char(p + 2);
741 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000742 c = p[2];
743 *pp += l + 4;
744 return c;
745 }
746 }
747 return 0;
748}
749
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200750#ifdef EBCDIC
751/*
752 * Table for equivalence class "c". (IBM-1047)
753 */
Bram Moolenaar5843f5f2019-08-20 20:13:45 +0200754static char *EQUIVAL_CLASS_C[16] = {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200755 "A\x62\x63\x64\x65\x66\x67",
756 "C\x68",
757 "E\x71\x72\x73\x74",
758 "I\x75\x76\x77\x78",
759 "N\x69",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200760 "O\xEB\xEC\xED\xEE\xEF\x80",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200761 "U\xFB\xFC\xFD\xFE",
762 "Y\xBA",
763 "a\x42\x43\x44\x45\x46\x47",
764 "c\x48",
765 "e\x51\x52\x53\x54",
766 "i\x55\x56\x57\x58",
767 "n\x49",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200768 "o\xCB\xCC\xCD\xCE\xCF\x70",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200769 "u\xDB\xDC\xDD\xDE",
770 "y\x8D\xDF",
771};
772#endif
773
Bram Moolenaardf177f62005-02-22 08:39:57 +0000774/*
775 * Produce the bytes for equivalence class "c".
776 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200777 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000778 */
779 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100780reg_equi_class(int c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000781{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000782 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000783 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000784 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200785#ifdef EBCDIC
786 int i;
787
788 /* This might be slower than switch/case below. */
789 for (i = 0; i < 16; i++)
790 {
791 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
792 {
793 char *p = EQUIVAL_CLASS_C[i];
794
795 while (*p != 0)
796 regmbc(*p++);
797 return;
798 }
799 }
800#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000801 switch (c)
802 {
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200803 /* Do not use '\300' style, it results in a negative number. */
804 case 'A': case 0xc0: case 0xc1: case 0xc2:
805 case 0xc3: case 0xc4: case 0xc5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200806 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
807 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200808 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
809 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
810 regmbc(0xc5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200811 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
812 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
813 REGMBC(0x1ea2)
814 return;
815 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
816 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000817 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200818 case 'C': case 0xc7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200819 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200820 regmbc('C'); regmbc(0xc7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200821 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
822 REGMBC(0x10c)
823 return;
824 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
825 CASEMBC(0x1e0e) CASEMBC(0x1e10)
826 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
827 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000828 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200829 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200830 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
831 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200832 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
833 regmbc(0xca); regmbc(0xcb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200834 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
835 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
836 REGMBC(0x1ebc)
837 return;
838 case 'F': CASEMBC(0x1e1e)
839 regmbc('F'); REGMBC(0x1e1e)
840 return;
841 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
842 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
843 CASEMBC(0x1e20)
844 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
845 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
846 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
847 return;
848 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
849 CASEMBC(0x1e26) CASEMBC(0x1e28)
850 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
851 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000852 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200853 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200854 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
855 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200856 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
857 regmbc(0xce); regmbc(0xcf);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200858 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
859 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
860 REGMBC(0x1ec8)
861 return;
862 case 'J': CASEMBC(0x134)
863 regmbc('J'); REGMBC(0x134)
864 return;
865 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
866 CASEMBC(0x1e34)
867 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
868 REGMBC(0x1e30) REGMBC(0x1e34)
869 return;
870 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
871 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
872 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
873 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
874 REGMBC(0x1e3a)
875 return;
876 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
877 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000878 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200879 case 'N': case 0xd1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200880 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
881 CASEMBC(0x1e48)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200882 regmbc('N'); regmbc(0xd1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200883 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
884 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000885 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200886 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
887 case 0xd6: case 0xd8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200888 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
889 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200890 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
891 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
892 regmbc(0xd8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200893 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
894 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
895 REGMBC(0x1ec) REGMBC(0x1ece)
896 return;
897 case 'P': case 0x1e54: case 0x1e56:
898 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
899 return;
900 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
901 CASEMBC(0x1e58) CASEMBC(0x1e5e)
902 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
903 REGMBC(0x1e58) REGMBC(0x1e5e)
904 return;
905 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
906 CASEMBC(0x160) CASEMBC(0x1e60)
907 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
908 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
909 return;
910 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
911 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
912 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
913 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000914 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200915 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200916 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
917 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
918 CASEMBC(0x1ee6)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200919 regmbc('U'); regmbc(0xd9); regmbc(0xda);
920 regmbc(0xdb); regmbc(0xdc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200921 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
922 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
923 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
924 return;
925 case 'V': CASEMBC(0x1e7c)
926 regmbc('V'); REGMBC(0x1e7c)
927 return;
928 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
929 CASEMBC(0x1e84) CASEMBC(0x1e86)
930 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
931 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
932 return;
933 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
934 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000935 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200936 case 'Y': case 0xdd:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200937 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
938 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200939 regmbc('Y'); regmbc(0xdd);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200940 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
941 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
942 return;
943 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
944 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
945 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
946 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
947 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000948 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200949 case 'a': case 0xe0: case 0xe1: case 0xe2:
950 case 0xe3: case 0xe4: case 0xe5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200951 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
952 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200953 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
954 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
955 regmbc(0xe5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200956 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
957 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
958 REGMBC(0x1ea3)
959 return;
960 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
961 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000962 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200963 case 'c': case 0xe7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200964 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200965 regmbc('c'); regmbc(0xe7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200966 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
967 REGMBC(0x10d)
968 return;
Bram Moolenaar2c61ec62015-07-10 19:16:34 +0200969 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
970 CASEMBC(0x1e0f) CASEMBC(0x1e11)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200971 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
Bram Moolenaar2c61ec62015-07-10 19:16:34 +0200972 REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000973 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200974 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200975 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
976 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200977 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
978 regmbc(0xea); regmbc(0xeb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200979 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
980 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
981 REGMBC(0x1ebd)
982 return;
983 case 'f': CASEMBC(0x1e1f)
984 regmbc('f'); REGMBC(0x1e1f)
985 return;
986 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
987 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
988 CASEMBC(0x1e21)
989 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
990 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
991 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
992 return;
993 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
994 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
995 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
996 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
997 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000998 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200999 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001000 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1001 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001002 regmbc('i'); regmbc(0xec); regmbc(0xed);
1003 regmbc(0xee); regmbc(0xef);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001004 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1005 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1006 return;
1007 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1008 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1009 return;
1010 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1011 CASEMBC(0x1e35)
1012 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1013 REGMBC(0x1e31) REGMBC(0x1e35)
1014 return;
1015 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1016 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1017 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1018 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1019 REGMBC(0x1e3b)
1020 return;
1021 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1022 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001023 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001024 case 'n': case 0xf1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001025 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1026 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001027 regmbc('n'); regmbc(0xf1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001028 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1029 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001030 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001031 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1032 case 0xf6: case 0xf8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001033 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1034 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001035 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1036 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1037 regmbc(0xf8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001038 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1039 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1040 REGMBC(0x1ed) REGMBC(0x1ecf)
1041 return;
1042 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1043 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1044 return;
1045 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1046 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1047 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1048 REGMBC(0x1e59) REGMBC(0x1e5f)
1049 return;
1050 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1051 CASEMBC(0x161) CASEMBC(0x1e61)
1052 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1053 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1054 return;
1055 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1056 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1057 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1058 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001059 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001060 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001061 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1062 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1063 CASEMBC(0x1ee7)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001064 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1065 regmbc(0xfb); regmbc(0xfc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001066 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1067 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1068 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1069 return;
1070 case 'v': CASEMBC(0x1e7d)
1071 regmbc('v'); REGMBC(0x1e7d)
1072 return;
1073 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1074 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1075 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1076 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1077 REGMBC(0x1e98)
1078 return;
1079 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1080 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001081 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001082 case 'y': case 0xfd: case 0xff:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001083 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1084 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001085 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001086 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1087 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1088 return;
1089 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1090 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1091 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1092 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1093 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001094 return;
1095 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001096#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001097 }
1098 regmbc(c);
1099}
1100
1101/*
1102 * Check for a collating element "[.a.]". "pp" points to the '['.
1103 * Returns a character. Zero means that no item was recognized. Otherwise
1104 * "pp" is advanced to after the item.
1105 * Currently only single characters are recognized!
1106 */
1107 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001108get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001109{
1110 int c;
1111 int l = 1;
1112 char_u *p = *pp;
1113
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +01001114 if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001115 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00001116 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001117 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001118 if (p[l + 2] == '.' && p[l + 3] == ']')
1119 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00001120 if (has_mbyte)
1121 c = mb_ptr2char(p + 2);
1122 else
Bram Moolenaardf177f62005-02-22 08:39:57 +00001123 c = p[2];
1124 *pp += l + 4;
1125 return c;
1126 }
1127 }
1128 return 0;
1129}
1130
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001131static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1132static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1133
1134 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001135get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001136{
1137 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1138 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1139}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001140
1141/*
1142 * Skip over a "[]" range.
1143 * "p" must point to the character after the '['.
1144 * The returned pointer is on the matching ']', or the terminating NUL.
1145 */
1146 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001147skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001148{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001149 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +00001150
Bram Moolenaardf177f62005-02-22 08:39:57 +00001151 if (*p == '^') /* Complement of range. */
1152 ++p;
1153 if (*p == ']' || *p == '-')
1154 ++p;
1155 while (*p != NUL && *p != ']')
1156 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001157 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001158 p += l;
1159 else
Bram Moolenaardf177f62005-02-22 08:39:57 +00001160 if (*p == '-')
1161 {
1162 ++p;
1163 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001164 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001165 }
1166 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001167 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001168 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001169 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001170 p += 2;
1171 else if (*p == '[')
1172 {
1173 if (get_char_class(&p) == CLASS_NONE
1174 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001175 && get_coll_element(&p) == 0
1176 && *p != NUL)
1177 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001178 }
1179 else
1180 ++p;
1181 }
1182
1183 return p;
1184}
1185
1186/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001187 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001188 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001189 * Take care of characters with a backslash in front of it.
1190 * Skip strings inside [ and ].
1191 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1192 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1193 * is changed in-place.
1194 */
1195 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001196skip_regexp(
1197 char_u *startp,
1198 int dirc,
1199 int magic,
1200 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001201{
1202 int mymagic;
1203 char_u *p = startp;
1204
1205 if (magic)
1206 mymagic = MAGIC_ON;
1207 else
1208 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001209 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001210
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001211 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001212 {
1213 if (p[0] == dirc) /* found end of regexp */
1214 break;
1215 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1216 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1217 {
1218 p = skip_anyof(p + 1);
1219 if (p[0] == NUL)
1220 break;
1221 }
1222 else if (p[0] == '\\' && p[1] != NUL)
1223 {
1224 if (dirc == '?' && newp != NULL && p[1] == '?')
1225 {
1226 /* change "\?" to "?", make a copy first. */
1227 if (*newp == NULL)
1228 {
1229 *newp = vim_strsave(startp);
1230 if (*newp != NULL)
1231 p = *newp + (p - startp);
1232 }
1233 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001234 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001235 else
1236 ++p;
1237 }
1238 else
1239 ++p; /* skip next character */
1240 if (*p == 'v')
1241 mymagic = MAGIC_ALL;
1242 else if (*p == 'V')
1243 mymagic = MAGIC_NONE;
1244 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001245 }
1246 return p;
1247}
1248
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02001249/*
1250 * Return TRUE if the back reference is legal. We must have seen the close
1251 * brace.
1252 * TODO: Should also check that we don't refer to something that is repeated
1253 * (+*=): what instance of the repetition should we match?
1254 */
1255 static int
1256seen_endbrace(int refnum)
1257{
1258 if (!had_endbrace[refnum])
1259 {
1260 char_u *p;
1261
1262 /* Trick: check if "@<=" or "@<!" follows, in which case
1263 * the \1 can appear before the referenced match. */
1264 for (p = regparse; *p != NUL; ++p)
1265 if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '='))
1266 break;
1267 if (*p == NUL)
1268 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01001269 emsg(_("E65: Illegal back reference"));
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02001270 rc_did_emsg = TRUE;
1271 return FALSE;
1272 }
1273 }
1274 return TRUE;
1275}
1276
Bram Moolenaar071d4272004-06-13 20:20:40 +00001277/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001278 * bt_regcomp() - compile a regular expression into internal code for the
1279 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001280 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001281 *
1282 * We can't allocate space until we know how big the compiled form will be,
1283 * but we can't compile it (and thus know how big it is) until we've got a
1284 * place to put the code. So we cheat: we compile it twice, once with code
1285 * generation turned off and size counting turned on, and once "for real".
1286 * This also means that we don't allocate space until we are sure that the
1287 * thing really will compile successfully, and we never have to move the
1288 * code and thus invalidate pointers into it. (Note that it has to be in
1289 * one piece because vim_free() must be able to free it all.)
1290 *
1291 * Whether upper/lower case is to be ignored is decided when executing the
1292 * program, it does not matter here.
1293 *
1294 * Beware that the optimization-preparation code in here knows about some
1295 * of the structure of the compiled regexp.
1296 * "re_flags": RE_MAGIC and/or RE_STRING.
1297 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001298 static regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001299bt_regcomp(char_u *expr, int re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001300{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001301 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001302 char_u *scan;
1303 char_u *longest;
1304 int len;
1305 int flags;
1306
1307 if (expr == NULL)
1308 EMSG_RET_NULL(_(e_null));
1309
1310 init_class_tab();
1311
1312 /*
1313 * First pass: determine size, legality.
1314 */
1315 regcomp_start(expr, re_flags);
1316 regcode = JUST_CALC_SIZE;
1317 regc(REGMAGIC);
1318 if (reg(REG_NOPAREN, &flags) == NULL)
1319 return NULL;
1320
Bram Moolenaar071d4272004-06-13 20:20:40 +00001321 /* Allocate space. */
Bram Moolenaar47ed5532019-08-08 20:49:14 +02001322 r = alloc(offsetof(bt_regprog_T, program) + regsize);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001323 if (r == NULL)
1324 return NULL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001325 r->re_in_use = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001326
1327 /*
1328 * Second pass: emit code.
1329 */
1330 regcomp_start(expr, re_flags);
1331 regcode = r->program;
1332 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001333 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 {
1335 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001336 if (reg_toolong)
1337 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001338 return NULL;
1339 }
1340
1341 /* Dig out information for optimizations. */
1342 r->regstart = NUL; /* Worst-case defaults. */
1343 r->reganch = 0;
1344 r->regmust = NULL;
1345 r->regmlen = 0;
1346 r->regflags = regflags;
1347 if (flags & HASNL)
1348 r->regflags |= RF_HASNL;
1349 if (flags & HASLOOKBH)
1350 r->regflags |= RF_LOOKBH;
1351#ifdef FEAT_SYN_HL
1352 /* Remember whether this pattern has any \z specials in it. */
1353 r->reghasz = re_has_z;
1354#endif
1355 scan = r->program + 1; /* First BRANCH. */
1356 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1357 {
1358 scan = OPERAND(scan);
1359
1360 /* Starting-point info. */
1361 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1362 {
1363 r->reganch++;
1364 scan = regnext(scan);
1365 }
1366
1367 if (OP(scan) == EXACTLY)
1368 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001369 if (has_mbyte)
1370 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1371 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372 r->regstart = *OPERAND(scan);
1373 }
1374 else if ((OP(scan) == BOW
1375 || OP(scan) == EOW
1376 || OP(scan) == NOTHING
1377 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1378 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1379 && OP(regnext(scan)) == EXACTLY)
1380 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001381 if (has_mbyte)
1382 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1383 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001384 r->regstart = *OPERAND(regnext(scan));
1385 }
1386
1387 /*
1388 * If there's something expensive in the r.e., find the longest
1389 * literal string that must appear and make it the regmust. Resolve
1390 * ties in favor of later strings, since the regstart check works
1391 * with the beginning of the r.e. and avoiding duplication
1392 * strengthens checking. Not a strong reason, but sufficient in the
1393 * absence of others.
1394 */
1395 /*
1396 * When the r.e. starts with BOW, it is faster to look for a regmust
1397 * first. Used a lot for "#" and "*" commands. (Added by mool).
1398 */
1399 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1400 && !(flags & HASNL))
1401 {
1402 longest = NULL;
1403 len = 0;
1404 for (; scan != NULL; scan = regnext(scan))
1405 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1406 {
1407 longest = OPERAND(scan);
1408 len = (int)STRLEN(OPERAND(scan));
1409 }
1410 r->regmust = longest;
1411 r->regmlen = len;
1412 }
1413 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001414#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001415 regdump(expr, r);
1416#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001417 r->engine = &bt_regengine;
1418 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001419}
1420
1421/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001422 * Free a compiled regexp program, returned by bt_regcomp().
1423 */
1424 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001425bt_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02001426{
1427 vim_free(prog);
1428}
1429
1430/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001431 * Setup to parse the regexp. Used once to get the length and once to do it.
1432 */
1433 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001434regcomp_start(
1435 char_u *expr,
1436 int re_flags) /* see vim_regcomp() */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001437{
1438 initchr(expr);
1439 if (re_flags & RE_MAGIC)
1440 reg_magic = MAGIC_ON;
1441 else
1442 reg_magic = MAGIC_OFF;
1443 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001444 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001445 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001446
1447 num_complex_braces = 0;
1448 regnpar = 1;
1449 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1450#ifdef FEAT_SYN_HL
1451 regnzpar = 1;
1452 re_has_z = 0;
1453#endif
1454 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001455 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001456 regflags = 0;
1457#if defined(FEAT_SYN_HL) || defined(PROTO)
1458 had_eol = FALSE;
1459#endif
1460}
1461
1462#if defined(FEAT_SYN_HL) || defined(PROTO)
1463/*
1464 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1465 * found. This is messy, but it works fine.
1466 */
1467 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001468vim_regcomp_had_eol(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001469{
1470 return had_eol;
1471}
1472#endif
1473
Bram Moolenaar0270f382018-07-17 05:43:58 +02001474// variables used for parsing
1475static int at_start; // True when on the first character
1476static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001477
Bram Moolenaar071d4272004-06-13 20:20:40 +00001478/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001479 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001480 *
1481 * Caller must absorb opening parenthesis.
1482 *
1483 * Combining parenthesis handling with the base level of regular expression
1484 * is a trifle forced, but the need to tie the tails of the branches to what
1485 * follows makes it hard to avoid.
1486 */
1487 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001488reg(
1489 int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1490 int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001491{
1492 char_u *ret;
1493 char_u *br;
1494 char_u *ender;
1495 int parno = 0;
1496 int flags;
1497
1498 *flagp = HASWIDTH; /* Tentatively. */
1499
1500#ifdef FEAT_SYN_HL
1501 if (paren == REG_ZPAREN)
1502 {
1503 /* Make a ZOPEN node. */
1504 if (regnzpar >= NSUBEXP)
1505 EMSG_RET_NULL(_("E50: Too many \\z("));
1506 parno = regnzpar;
1507 regnzpar++;
1508 ret = regnode(ZOPEN + parno);
1509 }
1510 else
1511#endif
1512 if (paren == REG_PAREN)
1513 {
1514 /* Make a MOPEN node. */
1515 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001516 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001517 parno = regnpar;
1518 ++regnpar;
1519 ret = regnode(MOPEN + parno);
1520 }
1521 else if (paren == REG_NPAREN)
1522 {
1523 /* Make a NOPEN node. */
1524 ret = regnode(NOPEN);
1525 }
1526 else
1527 ret = NULL;
1528
1529 /* Pick up the branches, linking them together. */
1530 br = regbranch(&flags);
1531 if (br == NULL)
1532 return NULL;
1533 if (ret != NULL)
1534 regtail(ret, br); /* [MZ]OPEN -> first. */
1535 else
1536 ret = br;
1537 /* If one of the branches can be zero-width, the whole thing can.
1538 * If one of the branches has * at start or matches a line-break, the
1539 * whole thing can. */
1540 if (!(flags & HASWIDTH))
1541 *flagp &= ~HASWIDTH;
1542 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1543 while (peekchr() == Magic('|'))
1544 {
1545 skipchr();
1546 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001547 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001548 return NULL;
1549 regtail(ret, br); /* BRANCH -> BRANCH. */
1550 if (!(flags & HASWIDTH))
1551 *flagp &= ~HASWIDTH;
1552 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1553 }
1554
1555 /* Make a closing node, and hook it on the end. */
1556 ender = regnode(
1557#ifdef FEAT_SYN_HL
1558 paren == REG_ZPAREN ? ZCLOSE + parno :
1559#endif
1560 paren == REG_PAREN ? MCLOSE + parno :
1561 paren == REG_NPAREN ? NCLOSE : END);
1562 regtail(ret, ender);
1563
1564 /* Hook the tails of the branches to the closing node. */
1565 for (br = ret; br != NULL; br = regnext(br))
1566 regoptail(br, ender);
1567
1568 /* Check for proper termination. */
1569 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1570 {
1571#ifdef FEAT_SYN_HL
1572 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001573 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001574 else
1575#endif
1576 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001577 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001579 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001580 }
1581 else if (paren == REG_NOPAREN && peekchr() != NUL)
1582 {
1583 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001584 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001585 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001586 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001587 /* NOTREACHED */
1588 }
1589 /*
1590 * Here we set the flag allowing back references to this set of
1591 * parentheses.
1592 */
1593 if (paren == REG_PAREN)
1594 had_endbrace[parno] = TRUE; /* have seen the close paren */
1595 return ret;
1596}
1597
1598/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001599 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001600 * Implements the & operator.
1601 */
1602 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001603regbranch(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001604{
1605 char_u *ret;
1606 char_u *chain = NULL;
1607 char_u *latest;
1608 int flags;
1609
1610 *flagp = WORST | HASNL; /* Tentatively. */
1611
1612 ret = regnode(BRANCH);
1613 for (;;)
1614 {
1615 latest = regconcat(&flags);
1616 if (latest == NULL)
1617 return NULL;
1618 /* If one of the branches has width, the whole thing has. If one of
1619 * the branches anchors at start-of-line, the whole thing does.
1620 * If one of the branches uses look-behind, the whole thing does. */
1621 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1622 /* If one of the branches doesn't match a line-break, the whole thing
1623 * doesn't. */
1624 *flagp &= ~HASNL | (flags & HASNL);
1625 if (chain != NULL)
1626 regtail(chain, latest);
1627 if (peekchr() != Magic('&'))
1628 break;
1629 skipchr();
1630 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001631 if (reg_toolong)
1632 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001633 reginsert(MATCH, latest);
1634 chain = latest;
1635 }
1636
1637 return ret;
1638}
1639
1640/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001641 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001642 * Implements the concatenation operator.
1643 */
1644 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001645regconcat(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001646{
1647 char_u *first = NULL;
1648 char_u *chain = NULL;
1649 char_u *latest;
1650 int flags;
1651 int cont = TRUE;
1652
1653 *flagp = WORST; /* Tentatively. */
1654
1655 while (cont)
1656 {
1657 switch (peekchr())
1658 {
1659 case NUL:
1660 case Magic('|'):
1661 case Magic('&'):
1662 case Magic(')'):
1663 cont = FALSE;
1664 break;
1665 case Magic('Z'):
Bram Moolenaar071d4272004-06-13 20:20:40 +00001666 regflags |= RF_ICOMBINE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001667 skipchr_keepstart();
1668 break;
1669 case Magic('c'):
1670 regflags |= RF_ICASE;
1671 skipchr_keepstart();
1672 break;
1673 case Magic('C'):
1674 regflags |= RF_NOICASE;
1675 skipchr_keepstart();
1676 break;
1677 case Magic('v'):
1678 reg_magic = MAGIC_ALL;
1679 skipchr_keepstart();
1680 curchr = -1;
1681 break;
1682 case Magic('m'):
1683 reg_magic = MAGIC_ON;
1684 skipchr_keepstart();
1685 curchr = -1;
1686 break;
1687 case Magic('M'):
1688 reg_magic = MAGIC_OFF;
1689 skipchr_keepstart();
1690 curchr = -1;
1691 break;
1692 case Magic('V'):
1693 reg_magic = MAGIC_NONE;
1694 skipchr_keepstart();
1695 curchr = -1;
1696 break;
1697 default:
1698 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001699 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001700 return NULL;
1701 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1702 if (chain == NULL) /* First piece. */
1703 *flagp |= flags & SPSTART;
1704 else
1705 regtail(chain, latest);
1706 chain = latest;
1707 if (first == NULL)
1708 first = latest;
1709 break;
1710 }
1711 }
1712 if (first == NULL) /* Loop ran zero times. */
1713 first = regnode(NOTHING);
1714 return first;
1715}
1716
1717/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001718 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001719 *
1720 * Note that the branching code sequences used for = and the general cases
1721 * of * and + are somewhat optimized: they use the same NOTHING node as
1722 * both the endmarker for their branch list and the body of the last branch.
1723 * It might seem that this node could be dispensed with entirely, but the
1724 * endmarker role is not redundant.
1725 */
1726 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001727regpiece(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001728{
1729 char_u *ret;
1730 int op;
1731 char_u *next;
1732 int flags;
1733 long minval;
1734 long maxval;
1735
1736 ret = regatom(&flags);
1737 if (ret == NULL)
1738 return NULL;
1739
1740 op = peekchr();
1741 if (re_multi_type(op) == NOT_MULTI)
1742 {
1743 *flagp = flags;
1744 return ret;
1745 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 /* default flags */
1747 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1748
1749 skipchr();
1750 switch (op)
1751 {
1752 case Magic('*'):
1753 if (flags & SIMPLE)
1754 reginsert(STAR, ret);
1755 else
1756 {
1757 /* Emit x* as (x&|), where & means "self". */
1758 reginsert(BRANCH, ret); /* Either x */
1759 regoptail(ret, regnode(BACK)); /* and loop */
1760 regoptail(ret, ret); /* back */
1761 regtail(ret, regnode(BRANCH)); /* or */
1762 regtail(ret, regnode(NOTHING)); /* null. */
1763 }
1764 break;
1765
1766 case Magic('+'):
1767 if (flags & SIMPLE)
1768 reginsert(PLUS, ret);
1769 else
1770 {
1771 /* Emit x+ as x(&|), where & means "self". */
1772 next = regnode(BRANCH); /* Either */
1773 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001774 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001775 regtail(next, regnode(BRANCH)); /* or */
1776 regtail(ret, regnode(NOTHING)); /* null. */
1777 }
1778 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1779 break;
1780
1781 case Magic('@'):
1782 {
1783 int lop = END;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001784 long nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001785
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001786 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001787 switch (no_Magic(getchr()))
1788 {
1789 case '=': lop = MATCH; break; /* \@= */
1790 case '!': lop = NOMATCH; break; /* \@! */
1791 case '>': lop = SUBPAT; break; /* \@> */
1792 case '<': switch (no_Magic(getchr()))
1793 {
1794 case '=': lop = BEHIND; break; /* \@<= */
1795 case '!': lop = NOBEHIND; break; /* \@<! */
1796 }
1797 }
1798 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001799 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001800 reg_magic == MAGIC_ALL);
1801 /* Look behind must match with behind_pos. */
1802 if (lop == BEHIND || lop == NOBEHIND)
1803 {
1804 regtail(ret, regnode(BHPOS));
1805 *flagp |= HASLOOKBH;
1806 }
1807 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001808 if (lop == BEHIND || lop == NOBEHIND)
1809 {
1810 if (nr < 0)
1811 nr = 0; /* no limit is same as zero limit */
1812 reginsert_nr(lop, nr, ret);
1813 }
1814 else
1815 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001816 break;
1817 }
1818
1819 case Magic('?'):
1820 case Magic('='):
1821 /* Emit x= as (x|) */
1822 reginsert(BRANCH, ret); /* Either x */
1823 regtail(ret, regnode(BRANCH)); /* or */
1824 next = regnode(NOTHING); /* null. */
1825 regtail(ret, next);
1826 regoptail(ret, next);
1827 break;
1828
1829 case Magic('{'):
1830 if (!read_limits(&minval, &maxval))
1831 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001832 if (flags & SIMPLE)
1833 {
1834 reginsert(BRACE_SIMPLE, ret);
1835 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1836 }
1837 else
1838 {
1839 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001840 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001841 reg_magic == MAGIC_ALL);
1842 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1843 regoptail(ret, regnode(BACK));
1844 regoptail(ret, ret);
1845 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1846 ++num_complex_braces;
1847 }
1848 if (minval > 0 && maxval > 0)
1849 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1850 break;
1851 }
1852 if (re_multi_type(peekchr()) != NOT_MULTI)
1853 {
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001854 // Can't have a multi follow a multi.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001855 if (peekchr() == Magic('*'))
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001856 EMSG2_RET_NULL(_("E61: Nested %s*"), reg_magic >= MAGIC_ON);
1857 EMSG3_RET_NULL(_("E62: Nested %s%c"), reg_magic == MAGIC_ALL,
1858 no_Magic(peekchr()));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001859 }
1860
1861 return ret;
1862}
1863
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001864/* When making changes to classchars also change nfa_classcodes. */
1865static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1866static int classcodes[] = {
1867 ANY, IDENT, SIDENT, KWORD, SKWORD,
1868 FNAME, SFNAME, PRINT, SPRINT,
1869 WHITE, NWHITE, DIGIT, NDIGIT,
1870 HEX, NHEX, OCTAL, NOCTAL,
1871 WORD, NWORD, HEAD, NHEAD,
1872 ALPHA, NALPHA, LOWER, NLOWER,
1873 UPPER, NUPPER
1874};
1875
Bram Moolenaar071d4272004-06-13 20:20:40 +00001876/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001877 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001878 *
1879 * Optimization: gobbles an entire sequence of ordinary characters so that
1880 * it can turn them into a single node, which is smaller to store and
1881 * faster to run. Don't do this when one_exactly is set.
1882 */
1883 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001884regatom(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001885{
1886 char_u *ret;
1887 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001888 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001889 char_u *p;
1890 int extra = 0;
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001891 int save_prev_at_start = prev_at_start;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001892
1893 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001894
1895 c = getchr();
1896 switch (c)
1897 {
1898 case Magic('^'):
1899 ret = regnode(BOL);
1900 break;
1901
1902 case Magic('$'):
1903 ret = regnode(EOL);
1904#if defined(FEAT_SYN_HL) || defined(PROTO)
1905 had_eol = TRUE;
1906#endif
1907 break;
1908
1909 case Magic('<'):
1910 ret = regnode(BOW);
1911 break;
1912
1913 case Magic('>'):
1914 ret = regnode(EOW);
1915 break;
1916
1917 case Magic('_'):
1918 c = no_Magic(getchr());
1919 if (c == '^') /* "\_^" is start-of-line */
1920 {
1921 ret = regnode(BOL);
1922 break;
1923 }
1924 if (c == '$') /* "\_$" is end-of-line */
1925 {
1926 ret = regnode(EOL);
1927#if defined(FEAT_SYN_HL) || defined(PROTO)
1928 had_eol = TRUE;
1929#endif
1930 break;
1931 }
1932
1933 extra = ADD_NL;
1934 *flagp |= HASNL;
1935
1936 /* "\_[" is character range plus newline */
1937 if (c == '[')
1938 goto collection;
1939
1940 /* "\_x" is character class plus newline */
Bram Moolenaar2f40d122017-10-24 21:49:36 +02001941 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001942
1943 /*
1944 * Character classes.
1945 */
1946 case Magic('.'):
1947 case Magic('i'):
1948 case Magic('I'):
1949 case Magic('k'):
1950 case Magic('K'):
1951 case Magic('f'):
1952 case Magic('F'):
1953 case Magic('p'):
1954 case Magic('P'):
1955 case Magic('s'):
1956 case Magic('S'):
1957 case Magic('d'):
1958 case Magic('D'):
1959 case Magic('x'):
1960 case Magic('X'):
1961 case Magic('o'):
1962 case Magic('O'):
1963 case Magic('w'):
1964 case Magic('W'):
1965 case Magic('h'):
1966 case Magic('H'):
1967 case Magic('a'):
1968 case Magic('A'):
1969 case Magic('l'):
1970 case Magic('L'):
1971 case Magic('u'):
1972 case Magic('U'):
1973 p = vim_strchr(classchars, no_Magic(c));
1974 if (p == NULL)
1975 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaara12a1612019-01-24 16:39:02 +01001976
Bram Moolenaar362e1a32006-03-06 23:29:24 +00001977 /* When '.' is followed by a composing char ignore the dot, so that
1978 * the composing char is matched here. */
1979 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1980 {
1981 c = getchr();
1982 goto do_multibyte;
1983 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001984 ret = regnode(classcodes[p - classchars] + extra);
1985 *flagp |= HASWIDTH | SIMPLE;
1986 break;
1987
1988 case Magic('n'):
1989 if (reg_string)
1990 {
1991 /* In a string "\n" matches a newline character. */
1992 ret = regnode(EXACTLY);
1993 regc(NL);
1994 regc(NUL);
1995 *flagp |= HASWIDTH | SIMPLE;
1996 }
1997 else
1998 {
1999 /* In buffer text "\n" matches the end of a line. */
2000 ret = regnode(NEWL);
2001 *flagp |= HASWIDTH | HASNL;
2002 }
2003 break;
2004
2005 case Magic('('):
2006 if (one_exactly)
2007 EMSG_ONE_RET_NULL;
2008 ret = reg(REG_PAREN, &flags);
2009 if (ret == NULL)
2010 return NULL;
2011 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2012 break;
2013
2014 case NUL:
2015 case Magic('|'):
2016 case Magic('&'):
2017 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002018 if (one_exactly)
2019 EMSG_ONE_RET_NULL;
Bram Moolenaar95f09602016-11-10 20:01:45 +01002020 IEMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002021 /* NOTREACHED */
2022
2023 case Magic('='):
2024 case Magic('?'):
2025 case Magic('+'):
2026 case Magic('@'):
2027 case Magic('{'):
2028 case Magic('*'):
2029 c = no_Magic(c);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01002030 EMSG3_RET_NULL(_("E64: %s%c follows nothing"),
2031 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL), c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002032 /* NOTREACHED */
2033
2034 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002035 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002036 {
2037 char_u *lp;
2038
2039 ret = regnode(EXACTLY);
2040 lp = reg_prev_sub;
2041 while (*lp != NUL)
2042 regc(*lp++);
2043 regc(NUL);
2044 if (*reg_prev_sub != NUL)
2045 {
2046 *flagp |= HASWIDTH;
2047 if ((lp - reg_prev_sub) == 1)
2048 *flagp |= SIMPLE;
2049 }
2050 }
2051 else
2052 EMSG_RET_NULL(_(e_nopresub));
2053 break;
2054
2055 case Magic('1'):
2056 case Magic('2'):
2057 case Magic('3'):
2058 case Magic('4'):
2059 case Magic('5'):
2060 case Magic('6'):
2061 case Magic('7'):
2062 case Magic('8'):
2063 case Magic('9'):
2064 {
2065 int refnum;
2066
2067 refnum = c - Magic('0');
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02002068 if (!seen_endbrace(refnum))
2069 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002070 ret = regnode(BACKREF + refnum);
2071 }
2072 break;
2073
Bram Moolenaar071d4272004-06-13 20:20:40 +00002074 case Magic('z'):
2075 {
2076 c = no_Magic(getchr());
2077 switch (c)
2078 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002079#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002080 case '(': if ((reg_do_extmatch & REX_SET) == 0)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002081 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002082 if (one_exactly)
2083 EMSG_ONE_RET_NULL;
2084 ret = reg(REG_ZPAREN, &flags);
2085 if (ret == NULL)
2086 return NULL;
2087 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2088 re_has_z = REX_SET;
2089 break;
2090
2091 case '1':
2092 case '2':
2093 case '3':
2094 case '4':
2095 case '5':
2096 case '6':
2097 case '7':
2098 case '8':
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002099 case '9': if ((reg_do_extmatch & REX_USE) == 0)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002100 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002101 ret = regnode(ZREF + c - '0');
2102 re_has_z = REX_USE;
2103 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002104#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002105
2106 case 's': ret = regnode(MOPEN + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002107 if (re_mult_next("\\zs") == FAIL)
2108 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002109 break;
2110
2111 case 'e': ret = regnode(MCLOSE + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002112 if (re_mult_next("\\ze") == FAIL)
2113 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002114 break;
2115
2116 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2117 }
2118 }
2119 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002120
2121 case Magic('%'):
2122 {
2123 c = no_Magic(getchr());
2124 switch (c)
2125 {
2126 /* () without a back reference */
2127 case '(':
2128 if (one_exactly)
2129 EMSG_ONE_RET_NULL;
2130 ret = reg(REG_NPAREN, &flags);
2131 if (ret == NULL)
2132 return NULL;
2133 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2134 break;
2135
2136 /* Catch \%^ and \%$ regardless of where they appear in the
2137 * pattern -- regardless of whether or not it makes sense. */
2138 case '^':
2139 ret = regnode(RE_BOF);
2140 break;
2141
2142 case '$':
2143 ret = regnode(RE_EOF);
2144 break;
2145
2146 case '#':
2147 ret = regnode(CURSOR);
2148 break;
2149
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002150 case 'V':
2151 ret = regnode(RE_VISUAL);
2152 break;
2153
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002154 case 'C':
2155 ret = regnode(RE_COMPOSING);
2156 break;
2157
Bram Moolenaar071d4272004-06-13 20:20:40 +00002158 /* \%[abc]: Emit as a list of branches, all ending at the last
2159 * branch which matches nothing. */
2160 case '[':
2161 if (one_exactly) /* doesn't nest */
2162 EMSG_ONE_RET_NULL;
2163 {
2164 char_u *lastbranch;
2165 char_u *lastnode = NULL;
2166 char_u *br;
2167
2168 ret = NULL;
2169 while ((c = getchr()) != ']')
2170 {
2171 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002172 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002173 reg_magic == MAGIC_ALL);
2174 br = regnode(BRANCH);
2175 if (ret == NULL)
2176 ret = br;
2177 else
Bram Moolenaar2a5b5272019-07-20 18:56:06 +02002178 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002179 regtail(lastnode, br);
Bram Moolenaar2a5b5272019-07-20 18:56:06 +02002180 if (reg_toolong)
2181 return NULL;
2182 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002183
2184 ungetchr();
2185 one_exactly = TRUE;
2186 lastnode = regatom(flagp);
2187 one_exactly = FALSE;
2188 if (lastnode == NULL)
2189 return NULL;
2190 }
2191 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002192 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002193 reg_magic == MAGIC_ALL);
2194 lastbranch = regnode(BRANCH);
2195 br = regnode(NOTHING);
2196 if (ret != JUST_CALC_SIZE)
2197 {
2198 regtail(lastnode, br);
2199 regtail(lastbranch, br);
2200 /* connect all branches to the NOTHING
2201 * branch at the end */
2202 for (br = ret; br != lastnode; )
2203 {
2204 if (OP(br) == BRANCH)
2205 {
2206 regtail(br, lastbranch);
Bram Moolenaar2a5b5272019-07-20 18:56:06 +02002207 if (reg_toolong)
2208 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002209 br = OPERAND(br);
2210 }
2211 else
2212 br = regnext(br);
2213 }
2214 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002215 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002216 break;
2217 }
2218
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002219 case 'd': /* %d123 decimal */
2220 case 'o': /* %o123 octal */
2221 case 'x': /* %xab hex 2 */
2222 case 'u': /* %uabcd hex 4 */
2223 case 'U': /* %U1234abcd hex 8 */
2224 {
Bram Moolenaar4c22a912017-11-02 22:29:38 +01002225 long i;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002226
2227 switch (c)
2228 {
2229 case 'd': i = getdecchrs(); break;
2230 case 'o': i = getoctchrs(); break;
2231 case 'x': i = gethexchrs(2); break;
2232 case 'u': i = gethexchrs(4); break;
2233 case 'U': i = gethexchrs(8); break;
2234 default: i = -1; break;
2235 }
2236
Bram Moolenaarab350f82019-02-28 06:25:00 +01002237 if (i < 0 || i > INT_MAX)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002238 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002239 _("E678: Invalid character after %s%%[dxouU]"),
2240 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002241 if (use_multibytecode(i))
2242 ret = regnode(MULTIBYTECODE);
2243 else
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002244 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002245 if (i == 0)
2246 regc(0x0a);
2247 else
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002248 regmbc(i);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002249 regc(NUL);
2250 *flagp |= HASWIDTH;
2251 break;
2252 }
2253
Bram Moolenaar071d4272004-06-13 20:20:40 +00002254 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002255 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2256 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002257 {
2258 long_u n = 0;
2259 int cmp;
2260
2261 cmp = c;
2262 if (cmp == '<' || cmp == '>')
2263 c = getchr();
2264 while (VIM_ISDIGIT(c))
2265 {
2266 n = n * 10 + (c - '0');
2267 c = getchr();
2268 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002269 if (c == '\'' && n == 0)
2270 {
2271 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2272 c = getchr();
2273 ret = regnode(RE_MARK);
2274 if (ret == JUST_CALC_SIZE)
2275 regsize += 2;
2276 else
2277 {
2278 *regcode++ = c;
2279 *regcode++ = cmp;
2280 }
2281 break;
2282 }
2283 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002284 {
2285 if (c == 'l')
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002286 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002287 ret = regnode(RE_LNUM);
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002288 if (save_prev_at_start)
2289 at_start = TRUE;
2290 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002291 else if (c == 'c')
2292 ret = regnode(RE_COL);
2293 else
2294 ret = regnode(RE_VCOL);
2295 if (ret == JUST_CALC_SIZE)
2296 regsize += 5;
2297 else
2298 {
2299 /* put the number and the optional
2300 * comparator after the opcode */
2301 regcode = re_put_long(regcode, n);
2302 *regcode++ = cmp;
2303 }
2304 break;
2305 }
2306 }
2307
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002308 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002309 reg_magic == MAGIC_ALL);
2310 }
2311 }
2312 break;
2313
2314 case Magic('['):
2315collection:
2316 {
2317 char_u *lp;
2318
2319 /*
2320 * If there is no matching ']', we assume the '[' is a normal
2321 * character. This makes 'incsearch' and ":help [" work.
2322 */
2323 lp = skip_anyof(regparse);
2324 if (*lp == ']') /* there is a matching ']' */
2325 {
2326 int startc = -1; /* > 0 when next '-' is a range */
2327 int endc;
2328
2329 /*
2330 * In a character class, different parsing rules apply.
2331 * Not even \ is special anymore, nothing is.
2332 */
2333 if (*regparse == '^') /* Complement of range. */
2334 {
2335 ret = regnode(ANYBUT + extra);
2336 regparse++;
2337 }
2338 else
2339 ret = regnode(ANYOF + extra);
2340
2341 /* At the start ']' and '-' mean the literal character. */
2342 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002343 {
2344 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002345 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002346 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002347
2348 while (*regparse != NUL && *regparse != ']')
2349 {
2350 if (*regparse == '-')
2351 {
2352 ++regparse;
2353 /* The '-' is not used for a range at the end and
2354 * after or before a '\n'. */
2355 if (*regparse == ']' || *regparse == NUL
2356 || startc == -1
2357 || (regparse[0] == '\\' && regparse[1] == 'n'))
2358 {
2359 regc('-');
2360 startc = '-'; /* [--x] is a range */
2361 }
2362 else
2363 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002364 /* Also accept "a-[.z.]" */
2365 endc = 0;
2366 if (*regparse == '[')
2367 endc = get_coll_element(&regparse);
2368 if (endc == 0)
2369 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002370 if (has_mbyte)
2371 endc = mb_ptr2char_adv(&regparse);
2372 else
Bram Moolenaardf177f62005-02-22 08:39:57 +00002373 endc = *regparse++;
2374 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002375
2376 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002377 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002378 endc = coll_get_char();
2379
Bram Moolenaar071d4272004-06-13 20:20:40 +00002380 if (startc > endc)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002381 EMSG_RET_NULL(_(e_reverse_range));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002382 if (has_mbyte && ((*mb_char2len)(startc) > 1
2383 || (*mb_char2len)(endc) > 1))
2384 {
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002385 /* Limit to a range of 256 chars. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002386 if (endc > startc + 256)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002387 EMSG_RET_NULL(_(e_large_class));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002388 while (++startc <= endc)
2389 regmbc(startc);
2390 }
2391 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002392 {
2393#ifdef EBCDIC
2394 int alpha_only = FALSE;
2395
2396 /* for alphabetical range skip the gaps
2397 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2398 if (isalpha(startc) && isalpha(endc))
2399 alpha_only = TRUE;
2400#endif
2401 while (++startc <= endc)
2402#ifdef EBCDIC
2403 if (!alpha_only || isalpha(startc))
2404#endif
2405 regc(startc);
2406 }
2407 startc = -1;
2408 }
2409 }
2410 /*
2411 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2412 * accepts "\t", "\e", etc., but only when the 'l' flag in
2413 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002414 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002415 */
2416 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002417 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002418 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002419 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002420 && vim_strchr(REGEXP_ABBR,
2421 regparse[1]) != NULL)))
2422 {
2423 regparse++;
2424 if (*regparse == 'n')
2425 {
2426 /* '\n' in range: also match NL */
2427 if (ret != JUST_CALC_SIZE)
2428 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002429 /* Using \n inside [^] does not change what
2430 * matches. "[^\n]" is the same as ".". */
2431 if (*ret == ANYOF)
2432 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002433 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002434 *flagp |= HASNL;
2435 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002436 /* else: must have had a \n already */
2437 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002438 regparse++;
2439 startc = -1;
2440 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002441 else if (*regparse == 'd'
2442 || *regparse == 'o'
2443 || *regparse == 'x'
2444 || *regparse == 'u'
2445 || *regparse == 'U')
2446 {
2447 startc = coll_get_char();
2448 if (startc == 0)
2449 regc(0x0a);
2450 else
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002451 regmbc(startc);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002452 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002453 else
2454 {
2455 startc = backslash_trans(*regparse++);
2456 regc(startc);
2457 }
2458 }
2459 else if (*regparse == '[')
2460 {
2461 int c_class;
2462 int cu;
2463
Bram Moolenaardf177f62005-02-22 08:39:57 +00002464 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002465 startc = -1;
2466 /* Characters assumed to be 8 bits! */
2467 switch (c_class)
2468 {
2469 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002470 c_class = get_equi_class(&regparse);
2471 if (c_class != 0)
2472 {
2473 /* produce equivalence class */
2474 reg_equi_class(c_class);
2475 }
2476 else if ((c_class =
2477 get_coll_element(&regparse)) != 0)
2478 {
2479 /* produce a collating element */
2480 regmbc(c_class);
2481 }
2482 else
2483 {
2484 /* literal '[', allow [[-x] as a range */
2485 startc = *regparse++;
2486 regc(startc);
2487 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002488 break;
2489 case CLASS_ALNUM:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002490 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002491 if (isalnum(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002492 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002493 break;
2494 case CLASS_ALPHA:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002495 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002496 if (isalpha(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002497 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002498 break;
2499 case CLASS_BLANK:
2500 regc(' ');
2501 regc('\t');
2502 break;
2503 case CLASS_CNTRL:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002504 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002505 if (iscntrl(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002506 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002507 break;
2508 case CLASS_DIGIT:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002509 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002510 if (VIM_ISDIGIT(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002511 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002512 break;
2513 case CLASS_GRAPH:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002514 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002515 if (isgraph(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002516 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002517 break;
2518 case CLASS_LOWER:
2519 for (cu = 1; cu <= 255; cu++)
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002520 if (MB_ISLOWER(cu) && cu != 170
2521 && cu != 186)
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002522 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002523 break;
2524 case CLASS_PRINT:
2525 for (cu = 1; cu <= 255; cu++)
2526 if (vim_isprintc(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002527 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002528 break;
2529 case CLASS_PUNCT:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002530 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002531 if (ispunct(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002532 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002533 break;
2534 case CLASS_SPACE:
2535 for (cu = 9; cu <= 13; cu++)
2536 regc(cu);
2537 regc(' ');
2538 break;
2539 case CLASS_UPPER:
2540 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002541 if (MB_ISUPPER(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002542 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002543 break;
2544 case CLASS_XDIGIT:
2545 for (cu = 1; cu <= 255; cu++)
2546 if (vim_isxdigit(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002547 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002548 break;
2549 case CLASS_TAB:
2550 regc('\t');
2551 break;
2552 case CLASS_RETURN:
2553 regc('\r');
2554 break;
2555 case CLASS_BACKSPACE:
2556 regc('\b');
2557 break;
2558 case CLASS_ESCAPE:
2559 regc('\033');
2560 break;
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01002561 case CLASS_IDENT:
2562 for (cu = 1; cu <= 255; cu++)
2563 if (vim_isIDc(cu))
2564 regmbc(cu);
2565 break;
2566 case CLASS_KEYWORD:
2567 for (cu = 1; cu <= 255; cu++)
2568 if (reg_iswordc(cu))
2569 regmbc(cu);
2570 break;
2571 case CLASS_FNAME:
2572 for (cu = 1; cu <= 255; cu++)
2573 if (vim_isfilec(cu))
2574 regmbc(cu);
2575 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002576 }
2577 }
2578 else
2579 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002580 if (has_mbyte)
2581 {
2582 int len;
2583
2584 /* produce a multibyte character, including any
2585 * following composing characters */
2586 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002587 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002588 if (enc_utf8 && utf_char2len(startc) != len)
2589 startc = -1; /* composing chars */
2590 while (--len >= 0)
2591 regc(*regparse++);
2592 }
2593 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002594 {
2595 startc = *regparse++;
2596 regc(startc);
2597 }
2598 }
2599 }
2600 regc(NUL);
2601 prevchr_len = 1; /* last char was the ']' */
2602 if (*regparse != ']')
2603 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2604 skipchr(); /* let's be friends with the lexer again */
2605 *flagp |= HASWIDTH | SIMPLE;
2606 break;
2607 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002608 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002609 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002610 }
2611 /* FALLTHROUGH */
2612
2613 default:
2614 {
2615 int len;
2616
Bram Moolenaar071d4272004-06-13 20:20:40 +00002617 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002618 * before a multi and when it's a composing char. */
2619 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002620 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002621do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002622 ret = regnode(MULTIBYTECODE);
2623 regmbc(c);
2624 *flagp |= HASWIDTH | SIMPLE;
2625 break;
2626 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002627
2628 ret = regnode(EXACTLY);
2629
2630 /*
2631 * Append characters as long as:
2632 * - there is no following multi, we then need the character in
2633 * front of it as a single character operand
2634 * - not running into a Magic character
2635 * - "one_exactly" is not set
2636 * But always emit at least one character. Might be a Multi,
2637 * e.g., a "[" without matching "]".
2638 */
2639 for (len = 0; c != NUL && (len == 0
2640 || (re_multi_type(peekchr()) == NOT_MULTI
2641 && !one_exactly
2642 && !is_Magic(c))); ++len)
2643 {
2644 c = no_Magic(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002645 if (has_mbyte)
2646 {
2647 regmbc(c);
2648 if (enc_utf8)
2649 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002650 int l;
2651
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002652 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002653 for (;;)
2654 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002655 l = utf_ptr2len(regparse);
2656 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002657 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002658 regmbc(utf_ptr2char(regparse));
2659 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002660 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002661 }
2662 }
2663 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002664 regc(c);
2665 c = getchr();
2666 }
2667 ungetchr();
2668
2669 regc(NUL);
2670 *flagp |= HASWIDTH;
2671 if (len == 1)
2672 *flagp |= SIMPLE;
2673 }
2674 break;
2675 }
2676
2677 return ret;
2678}
2679
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002680/*
2681 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2682 * character "c".
2683 */
2684 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002685use_multibytecode(int c)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002686{
2687 return has_mbyte && (*mb_char2len)(c) > 1
2688 && (re_multi_type(peekchr()) != NOT_MULTI
2689 || (enc_utf8 && utf_iscomposing(c)));
2690}
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002691
Bram Moolenaar071d4272004-06-13 20:20:40 +00002692/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002693 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002694 * Return pointer to generated code.
2695 */
2696 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002697regnode(int op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002698{
2699 char_u *ret;
2700
2701 ret = regcode;
2702 if (ret == JUST_CALC_SIZE)
2703 regsize += 3;
2704 else
2705 {
2706 *regcode++ = op;
2707 *regcode++ = NUL; /* Null "next" pointer. */
2708 *regcode++ = NUL;
2709 }
2710 return ret;
2711}
2712
2713/*
2714 * Emit (if appropriate) a byte of code
2715 */
2716 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002717regc(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002718{
2719 if (regcode == JUST_CALC_SIZE)
2720 regsize++;
2721 else
2722 *regcode++ = b;
2723}
2724
Bram Moolenaar071d4272004-06-13 20:20:40 +00002725/*
2726 * Emit (if appropriate) a multi-byte character of code
2727 */
2728 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002729regmbc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002730{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002731 if (!has_mbyte && c > 0xff)
2732 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002733 if (regcode == JUST_CALC_SIZE)
2734 regsize += (*mb_char2len)(c);
2735 else
2736 regcode += (*mb_char2bytes)(c, regcode);
2737}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002738
2739/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002740 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002741 *
2742 * Means relocating the operand.
2743 */
2744 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002745reginsert(int op, char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002746{
2747 char_u *src;
2748 char_u *dst;
2749 char_u *place;
2750
2751 if (regcode == JUST_CALC_SIZE)
2752 {
2753 regsize += 3;
2754 return;
2755 }
2756 src = regcode;
2757 regcode += 3;
2758 dst = regcode;
2759 while (src > opnd)
2760 *--dst = *--src;
2761
2762 place = opnd; /* Op node, where operand used to be. */
2763 *place++ = op;
2764 *place++ = NUL;
2765 *place = NUL;
2766}
2767
2768/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002769 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002770 * Add a number to the operator.
2771 */
2772 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002773reginsert_nr(int op, long val, char_u *opnd)
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002774{
2775 char_u *src;
2776 char_u *dst;
2777 char_u *place;
2778
2779 if (regcode == JUST_CALC_SIZE)
2780 {
2781 regsize += 7;
2782 return;
2783 }
2784 src = regcode;
2785 regcode += 7;
2786 dst = regcode;
2787 while (src > opnd)
2788 *--dst = *--src;
2789
2790 place = opnd; /* Op node, where operand used to be. */
2791 *place++ = op;
2792 *place++ = NUL;
2793 *place++ = NUL;
Bram Moolenaar2c519cf2019-03-21 21:45:34 +01002794 re_put_long(place, (long_u)val);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002795}
2796
2797/*
2798 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002799 * The operator has the given limit values as operands. Also set next pointer.
2800 *
2801 * Means relocating the operand.
2802 */
2803 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002804reginsert_limits(
2805 int op,
2806 long minval,
2807 long maxval,
2808 char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002809{
2810 char_u *src;
2811 char_u *dst;
2812 char_u *place;
2813
2814 if (regcode == JUST_CALC_SIZE)
2815 {
2816 regsize += 11;
2817 return;
2818 }
2819 src = regcode;
2820 regcode += 11;
2821 dst = regcode;
2822 while (src > opnd)
2823 *--dst = *--src;
2824
2825 place = opnd; /* Op node, where operand used to be. */
2826 *place++ = op;
2827 *place++ = NUL;
2828 *place++ = NUL;
2829 place = re_put_long(place, (long_u)minval);
2830 place = re_put_long(place, (long_u)maxval);
2831 regtail(opnd, place);
2832}
2833
2834/*
2835 * Write a long as four bytes at "p" and return pointer to the next char.
2836 */
2837 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002838re_put_long(char_u *p, long_u val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002839{
2840 *p++ = (char_u) ((val >> 24) & 0377);
2841 *p++ = (char_u) ((val >> 16) & 0377);
2842 *p++ = (char_u) ((val >> 8) & 0377);
2843 *p++ = (char_u) (val & 0377);
2844 return p;
2845}
2846
2847/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002848 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002849 */
2850 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002851regtail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002852{
2853 char_u *scan;
2854 char_u *temp;
2855 int offset;
2856
2857 if (p == JUST_CALC_SIZE)
2858 return;
2859
2860 /* Find last node. */
2861 scan = p;
2862 for (;;)
2863 {
2864 temp = regnext(scan);
2865 if (temp == NULL)
2866 break;
2867 scan = temp;
2868 }
2869
Bram Moolenaar582fd852005-03-28 20:58:01 +00002870 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002871 offset = (int)(scan - val);
2872 else
2873 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002874 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002875 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002876 * values in too many places. */
2877 if (offset > 0xffff)
2878 reg_toolong = TRUE;
2879 else
2880 {
2881 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2882 *(scan + 2) = (char_u) (offset & 0377);
2883 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002884}
2885
2886/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002887 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002888 */
2889 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002890regoptail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002891{
2892 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2893 if (p == NULL || p == JUST_CALC_SIZE
2894 || (OP(p) != BRANCH
2895 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2896 return;
2897 regtail(OPERAND(p), val);
2898}
2899
2900/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002901 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002902 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002903/*
2904 * Start parsing at "str".
2905 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002906 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002907initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002908{
2909 regparse = str;
2910 prevchr_len = 0;
2911 curchr = prevprevchr = prevchr = nextchr = -1;
2912 at_start = TRUE;
2913 prev_at_start = FALSE;
2914}
2915
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002916/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002917 * Save the current parse state, so that it can be restored and parsing
2918 * starts in the same state again.
2919 */
2920 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002921save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002922{
2923 ps->regparse = regparse;
2924 ps->prevchr_len = prevchr_len;
2925 ps->curchr = curchr;
2926 ps->prevchr = prevchr;
2927 ps->prevprevchr = prevprevchr;
2928 ps->nextchr = nextchr;
2929 ps->at_start = at_start;
2930 ps->prev_at_start = prev_at_start;
2931 ps->regnpar = regnpar;
2932}
2933
2934/*
2935 * Restore a previously saved parse state.
2936 */
2937 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002938restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002939{
2940 regparse = ps->regparse;
2941 prevchr_len = ps->prevchr_len;
2942 curchr = ps->curchr;
2943 prevchr = ps->prevchr;
2944 prevprevchr = ps->prevprevchr;
2945 nextchr = ps->nextchr;
2946 at_start = ps->at_start;
2947 prev_at_start = ps->prev_at_start;
2948 regnpar = ps->regnpar;
2949}
2950
2951
2952/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002953 * Get the next character without advancing.
2954 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002955 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002956peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002957{
Bram Moolenaardf177f62005-02-22 08:39:57 +00002958 static int after_slash = FALSE;
2959
Bram Moolenaar071d4272004-06-13 20:20:40 +00002960 if (curchr == -1)
2961 {
2962 switch (curchr = regparse[0])
2963 {
2964 case '.':
2965 case '[':
2966 case '~':
2967 /* magic when 'magic' is on */
2968 if (reg_magic >= MAGIC_ON)
2969 curchr = Magic(curchr);
2970 break;
2971 case '(':
2972 case ')':
2973 case '{':
2974 case '%':
2975 case '+':
2976 case '=':
2977 case '?':
2978 case '@':
2979 case '!':
2980 case '&':
2981 case '|':
2982 case '<':
2983 case '>':
2984 case '#': /* future ext. */
2985 case '"': /* future ext. */
2986 case '\'': /* future ext. */
2987 case ',': /* future ext. */
2988 case '-': /* future ext. */
2989 case ':': /* future ext. */
2990 case ';': /* future ext. */
2991 case '`': /* future ext. */
2992 case '/': /* Can't be used in / command */
2993 /* magic only after "\v" */
2994 if (reg_magic == MAGIC_ALL)
2995 curchr = Magic(curchr);
2996 break;
2997 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00002998 /* * is not magic as the very first character, eg "?*ptr", when
2999 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3000 * "\(\*" is not magic, thus must be magic if "after_slash" */
3001 if (reg_magic >= MAGIC_ON
3002 && !at_start
3003 && !(prev_at_start && prevchr == Magic('^'))
3004 && (after_slash
3005 || (prevchr != Magic('(')
3006 && prevchr != Magic('&')
3007 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003008 curchr = Magic('*');
3009 break;
3010 case '^':
3011 /* '^' is only magic as the very first character and if it's after
3012 * "\(", "\|", "\&' or "\n" */
3013 if (reg_magic >= MAGIC_OFF
3014 && (at_start
3015 || reg_magic == MAGIC_ALL
3016 || prevchr == Magic('(')
3017 || prevchr == Magic('|')
3018 || prevchr == Magic('&')
3019 || prevchr == Magic('n')
3020 || (no_Magic(prevchr) == '('
3021 && prevprevchr == Magic('%'))))
3022 {
3023 curchr = Magic('^');
3024 at_start = TRUE;
3025 prev_at_start = FALSE;
3026 }
3027 break;
3028 case '$':
3029 /* '$' is only magic as the very last char and if it's in front of
3030 * either "\|", "\)", "\&", or "\n" */
3031 if (reg_magic >= MAGIC_OFF)
3032 {
3033 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003034 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003035
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003036 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003037 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003038 || p[1] == 'm' || p[1] == 'M'
3039 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3040 {
3041 if (p[1] == 'v')
3042 is_magic_all = TRUE;
3043 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3044 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003045 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003046 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003047 if (p[0] == NUL
3048 || (p[0] == '\\'
3049 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3050 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003051 || (is_magic_all
3052 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003053 || reg_magic == MAGIC_ALL)
3054 curchr = Magic('$');
3055 }
3056 break;
3057 case '\\':
3058 {
3059 int c = regparse[1];
3060
3061 if (c == NUL)
3062 curchr = '\\'; /* trailing '\' */
3063 else if (
3064#ifdef EBCDIC
3065 vim_strchr(META, c)
3066#else
3067 c <= '~' && META_flags[c]
3068#endif
3069 )
3070 {
3071 /*
3072 * META contains everything that may be magic sometimes,
3073 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02003074 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +00003075 * magicness. Therefore, \ is so meta-magic that it is
3076 * not in META.
3077 */
3078 curchr = -1;
3079 prev_at_start = at_start;
3080 at_start = FALSE; /* be able to say "/\*ptr" */
3081 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003082 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003083 peekchr();
3084 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003085 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003086 curchr = toggle_Magic(curchr);
3087 }
3088 else if (vim_strchr(REGEXP_ABBR, c))
3089 {
3090 /*
3091 * Handle abbreviations, like "\t" for TAB -- webb
3092 */
3093 curchr = backslash_trans(c);
3094 }
3095 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3096 curchr = toggle_Magic(c);
3097 else
3098 {
3099 /*
3100 * Next character can never be (made) magic?
3101 * Then backslashing it won't do anything.
3102 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003103 if (has_mbyte)
3104 curchr = (*mb_ptr2char)(regparse + 1);
3105 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003106 curchr = c;
3107 }
3108 break;
3109 }
3110
Bram Moolenaar071d4272004-06-13 20:20:40 +00003111 default:
3112 if (has_mbyte)
3113 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003114 }
3115 }
3116
3117 return curchr;
3118}
3119
3120/*
3121 * Eat one lexed character. Do this in a way that we can undo it.
3122 */
3123 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003124skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003125{
3126 /* peekchr() eats a backslash, do the same here */
3127 if (*regparse == '\\')
3128 prevchr_len = 1;
3129 else
3130 prevchr_len = 0;
3131 if (regparse[prevchr_len] != NUL)
3132 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003133 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003134 /* exclude composing chars that mb_ptr2len does include */
3135 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003136 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003137 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003138 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003139 ++prevchr_len;
3140 }
3141 regparse += prevchr_len;
3142 prev_at_start = at_start;
3143 at_start = FALSE;
3144 prevprevchr = prevchr;
3145 prevchr = curchr;
3146 curchr = nextchr; /* use previously unget char, or -1 */
3147 nextchr = -1;
3148}
3149
3150/*
3151 * Skip a character while keeping the value of prev_at_start for at_start.
3152 * prevchr and prevprevchr are also kept.
3153 */
3154 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003155skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003156{
3157 int as = prev_at_start;
3158 int pr = prevchr;
3159 int prpr = prevprevchr;
3160
3161 skipchr();
3162 at_start = as;
3163 prevchr = pr;
3164 prevprevchr = prpr;
3165}
3166
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003167/*
3168 * Get the next character from the pattern. We know about magic and such, so
3169 * therefore we need a lexical analyzer.
3170 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003171 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003172getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003173{
3174 int chr = peekchr();
3175
3176 skipchr();
3177 return chr;
3178}
3179
3180/*
3181 * put character back. Works only once!
3182 */
3183 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003184ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003185{
3186 nextchr = curchr;
3187 curchr = prevchr;
3188 prevchr = prevprevchr;
3189 at_start = prev_at_start;
3190 prev_at_start = FALSE;
3191
3192 /* Backup regparse, so that it's at the same position as before the
3193 * getchr(). */
3194 regparse -= prevchr_len;
3195}
3196
3197/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003198 * Get and return the value of the hex string at the current position.
3199 * Return -1 if there is no valid hex number.
3200 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003201 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003202 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003203 * The parameter controls the maximum number of input characters. This will be
3204 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3205 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003206 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003207gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003208{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003209 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003210 int c;
3211 int i;
3212
3213 for (i = 0; i < maxinputlen; ++i)
3214 {
3215 c = regparse[0];
3216 if (!vim_isxdigit(c))
3217 break;
3218 nr <<= 4;
3219 nr |= hex2nr(c);
3220 ++regparse;
3221 }
3222
3223 if (i == 0)
3224 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003225 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003226}
3227
3228/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003229 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003230 * current position. Return -1 for invalid. Consumes all digits.
3231 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003232 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003233getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003234{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003235 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003236 int c;
3237 int i;
3238
3239 for (i = 0; ; ++i)
3240 {
3241 c = regparse[0];
3242 if (c < '0' || c > '9')
3243 break;
3244 nr *= 10;
3245 nr += c - '0';
3246 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003247 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003248 }
3249
3250 if (i == 0)
3251 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003252 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003253}
3254
3255/*
3256 * get and return the value of the octal string immediately after the current
3257 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3258 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3259 * treat 8 or 9 as recognised characters. Position is updated:
3260 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003261 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003262 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003263 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003264getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003265{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003266 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003267 int c;
3268 int i;
3269
3270 for (i = 0; i < 3 && nr < 040; ++i)
3271 {
3272 c = regparse[0];
3273 if (c < '0' || c > '7')
3274 break;
3275 nr <<= 3;
3276 nr |= hex2nr(c);
3277 ++regparse;
3278 }
3279
3280 if (i == 0)
3281 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003282 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003283}
3284
3285/*
3286 * Get a number after a backslash that is inside [].
3287 * When nothing is recognized return a backslash.
3288 */
3289 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003290coll_get_char(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003291{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003292 long nr = -1;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003293
3294 switch (*regparse++)
3295 {
3296 case 'd': nr = getdecchrs(); break;
3297 case 'o': nr = getoctchrs(); break;
3298 case 'x': nr = gethexchrs(2); break;
3299 case 'u': nr = gethexchrs(4); break;
3300 case 'U': nr = gethexchrs(8); break;
3301 }
Bram Moolenaarab350f82019-02-28 06:25:00 +01003302 if (nr < 0 || nr > INT_MAX)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003303 {
3304 /* If getting the number fails be backwards compatible: the character
3305 * is a backslash. */
3306 --regparse;
3307 nr = '\\';
3308 }
3309 return nr;
3310}
3311
3312/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003313 * read_limits - Read two integers to be taken as a minimum and maximum.
3314 * If the first character is '-', then the range is reversed.
3315 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3316 * missing, a very big number is the default.
3317 */
3318 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003319read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003320{
3321 int reverse = FALSE;
3322 char_u *first_char;
3323 long tmp;
3324
3325 if (*regparse == '-')
3326 {
3327 /* Starts with '-', so reverse the range later */
3328 regparse++;
3329 reverse = TRUE;
3330 }
3331 first_char = regparse;
3332 *minval = getdigits(&regparse);
3333 if (*regparse == ',') /* There is a comma */
3334 {
3335 if (vim_isdigit(*++regparse))
3336 *maxval = getdigits(&regparse);
3337 else
3338 *maxval = MAX_LIMIT;
3339 }
3340 else if (VIM_ISDIGIT(*first_char))
3341 *maxval = *minval; /* It was \{n} or \{-n} */
3342 else
3343 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3344 if (*regparse == '\\')
3345 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003346 if (*regparse != '}')
Bram Moolenaar1be45b22019-01-14 22:46:15 +01003347 EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"),
3348 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003349
3350 /*
3351 * Reverse the range if there was a '-', or make sure it is in the right
3352 * order otherwise.
3353 */
3354 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3355 {
3356 tmp = *minval;
3357 *minval = *maxval;
3358 *maxval = tmp;
3359 }
3360 skipchr(); /* let's be friends with the lexer again */
3361 return OK;
3362}
3363
3364/*
3365 * vim_regexec and friends
3366 */
3367
3368/*
3369 * Global work variables for vim_regexec().
3370 */
3371
Bram Moolenaar071d4272004-06-13 20:20:40 +00003372/*
3373 * Structure used to save the current input state, when it needs to be
3374 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003375 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003376 */
3377typedef struct
3378{
3379 union
3380 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003381 char_u *ptr; /* rex.input pointer, for single-line regexp */
3382 lpos_T pos; /* rex.input pos, for multi-line regexp */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003383 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003384 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003385} regsave_T;
3386
3387/* struct to save start/end pointer/position in for \(\) */
3388typedef struct
3389{
3390 union
3391 {
3392 char_u *ptr;
3393 lpos_T pos;
3394 } se_u;
3395} save_se_T;
3396
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003397/* used for BEHIND and NOBEHIND matching */
3398typedef struct regbehind_S
3399{
3400 regsave_T save_after;
3401 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003402 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003403 save_se_T save_start[NSUBEXP];
3404 save_se_T save_end[NSUBEXP];
3405} regbehind_T;
3406
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003407static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm, int *timed_out);
Bram Moolenaar09463262017-06-17 20:55:06 +02003408static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, int *timed_out);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003409static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003410#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003411static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003412#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003413static void save_subexpr(regbehind_T *bp);
3414static void restore_subexpr(regbehind_T *bp);
3415static void reg_nextline(void);
3416static void reg_save(regsave_T *save, garray_T *gap);
3417static void reg_restore(regsave_T *save, garray_T *gap);
3418static int reg_save_equal(regsave_T *save);
3419static void save_se_multi(save_se_T *savep, lpos_T *posp);
3420static void save_se_one(save_se_T *savep, char_u **pp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003421
3422/* Save the sub-expressions before attempting a match. */
3423#define save_se(savep, posp, pp) \
3424 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3425
3426/* After a failed match restore the sub-expressions. */
3427#define restore_se(savep, posp, pp) { \
3428 if (REG_MULTI) \
3429 *(posp) = (savep)->se_u.pos; \
3430 else \
3431 *(pp) = (savep)->se_u.ptr; }
3432
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003433static int re_num_cmp(long_u val, char_u *scan);
3434static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar09463262017-06-17 20:55:06 +02003435static int regmatch(char_u *prog, proftime_T *tm, int *timed_out);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003436static int regrepeat(char_u *p, long maxcount);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003437
3438#ifdef DEBUG
Bram Moolenaar5843f5f2019-08-20 20:13:45 +02003439static int regnarrate = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003440#endif
3441
3442/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003443 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3444 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003445 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003446 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003447static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003448static unsigned reg_tofreelen;
3449
3450/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02003451 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003452 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003453 * done:
3454 * single-line multi-line
3455 * reg_match &regmatch_T NULL
3456 * reg_mmatch NULL &regmmatch_T
3457 * reg_startp reg_match->startp <invalid>
3458 * reg_endp reg_match->endp <invalid>
3459 * reg_startpos <invalid> reg_mmatch->startpos
3460 * reg_endpos <invalid> reg_mmatch->endpos
3461 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003462 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003463 * reg_firstlnum <invalid> first line in which to search
3464 * reg_maxline 0 last line nr
3465 * reg_line_lbr FALSE or TRUE FALSE
3466 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003467typedef struct {
3468 regmatch_T *reg_match;
3469 regmmatch_T *reg_mmatch;
3470 char_u **reg_startp;
3471 char_u **reg_endp;
3472 lpos_T *reg_startpos;
3473 lpos_T *reg_endpos;
3474 win_T *reg_win;
3475 buf_T *reg_buf;
3476 linenr_T reg_firstlnum;
3477 linenr_T reg_maxline;
3478 int reg_line_lbr; /* "\n" in string is line break */
3479
Bram Moolenaar0270f382018-07-17 05:43:58 +02003480 // The current match-position is stord in these variables:
3481 linenr_T lnum; // line number, relative to first line
3482 char_u *line; // start of current line
3483 char_u *input; // current input, points into "regline"
3484
3485 int need_clear_subexpr; // subexpressions still need to be cleared
3486#ifdef FEAT_SYN_HL
3487 int need_clear_zsubexpr; // extmatch subexpressions still need to be
3488 // cleared
3489#endif
3490
Bram Moolenaar6100d022016-10-02 16:51:57 +02003491 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3492 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3493 * contains '\c' or '\C' the value is overruled. */
3494 int reg_ic;
3495
Bram Moolenaar0270f382018-07-17 05:43:58 +02003496 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z
Bram Moolenaar6100d022016-10-02 16:51:57 +02003497 * flag in the regexp. Defaults to false, always. */
3498 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003499
3500 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3501 * there is no maximum. */
3502 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003503
3504 // State for the NFA engine regexec.
3505 int nfa_has_zend; // NFA regexp \ze operator encountered.
3506 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
3507 int nfa_nsubexpr; // Number of sub expressions actually being used
3508 // during execution. 1 if only the whole match
3509 // (subexpr 0) is used.
3510 // listid is global, so that it increases on recursive calls to
3511 // nfa_regmatch(), which means we don't have to clear the lastlist field of
3512 // all the states.
3513 int nfa_listid;
3514 int nfa_alt_listid;
3515
3516#ifdef FEAT_SYN_HL
3517 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
3518#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003519} regexec_T;
3520
3521static regexec_T rex;
3522static int rex_in_use = FALSE;
3523
Bram Moolenaar071d4272004-06-13 20:20:40 +00003524
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003525/* Values for rs_state in regitem_T. */
3526typedef enum regstate_E
3527{
3528 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3529 , RS_MOPEN /* MOPEN + [0-9] */
3530 , RS_MCLOSE /* MCLOSE + [0-9] */
3531#ifdef FEAT_SYN_HL
3532 , RS_ZOPEN /* ZOPEN + [0-9] */
3533 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3534#endif
3535 , RS_BRANCH /* BRANCH */
3536 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3537 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3538 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3539 , RS_NOMATCH /* NOMATCH */
3540 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3541 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3542 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3543 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3544} regstate_T;
3545
3546/*
3547 * When there are alternatives a regstate_T is put on the regstack to remember
3548 * what we are doing.
3549 * Before it may be another type of item, depending on rs_state, to remember
3550 * more things.
3551 */
3552typedef struct regitem_S
3553{
Bram Moolenaarbeb75742019-02-12 22:33:00 +01003554 regstate_T rs_state; // what we are doing, one of RS_ above
3555 short rs_no; // submatch nr or BEHIND/NOBEHIND
3556 char_u *rs_scan; // current node in program
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003557 union
3558 {
3559 save_se_T sesave;
3560 regsave_T regsave;
Bram Moolenaarbeb75742019-02-12 22:33:00 +01003561 } rs_un; // room for saving rex.input
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003562} regitem_T;
3563
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003564static regitem_T *regstack_push(regstate_T state, char_u *scan);
3565static void regstack_pop(char_u **scan);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003566
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003567/* used for STAR, PLUS and BRACE_SIMPLE matching */
3568typedef struct regstar_S
3569{
3570 int nextb; /* next byte */
3571 int nextb_ic; /* next byte reverse case */
3572 long count;
3573 long minval;
3574 long maxval;
3575} regstar_T;
3576
3577/* used to store input position when a BACK was encountered, so that we now if
3578 * we made any progress since the last time. */
3579typedef struct backpos_S
3580{
3581 char_u *bp_scan; /* "scan" where BACK was encountered */
3582 regsave_T bp_pos; /* last input position */
3583} backpos_T;
3584
3585/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003586 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3587 * to avoid invoking malloc() and free() often.
3588 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3589 * or regbehind_T.
3590 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003591 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003592static garray_T regstack = {0, 0, 0, 0, NULL};
3593static garray_T backpos = {0, 0, 0, 0, NULL};
3594
3595/*
3596 * Both for regstack and backpos tables we use the following strategy of
3597 * allocation (to reduce malloc/free calls):
3598 * - Initial size is fairly small.
3599 * - When needed, the tables are grown bigger (8 times at first, double after
3600 * that).
3601 * - After executing the match we free the memory only if the array has grown.
3602 * Thus the memory is kept allocated when it's at the initial size.
3603 * This makes it fast while not keeping a lot of memory allocated.
3604 * A three times speed increase was observed when using many simple patterns.
3605 */
3606#define REGSTACK_INITIAL 2048
3607#define BACKPOS_INITIAL 64
3608
3609#if defined(EXITFREE) || defined(PROTO)
3610 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003611free_regexp_stuff(void)
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003612{
3613 ga_clear(&regstack);
3614 ga_clear(&backpos);
3615 vim_free(reg_tofree);
3616 vim_free(reg_prev_sub);
3617}
3618#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003619
Bram Moolenaar071d4272004-06-13 20:20:40 +00003620/*
Bram Moolenaar221cd9f2019-01-31 15:34:40 +01003621 * Return TRUE if character 'c' is included in 'iskeyword' option for
3622 * "reg_buf" buffer.
3623 */
3624 static int
3625reg_iswordc(int c)
3626{
3627 return vim_iswordc_buf(c, rex.reg_buf);
3628}
3629
3630/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003631 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3632 */
3633 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01003634reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003635{
3636 /* when looking behind for a match/no-match lnum is negative. But we
3637 * can't go before line 1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003638 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003639 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003640 if (lnum > rex.reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003641 /* Must have matched the "\n" in the last line. */
3642 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02003643 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003644}
3645
3646static regsave_T behind_pos;
3647
3648#ifdef FEAT_SYN_HL
3649static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3650static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3651static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3652static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3653#endif
3654
3655/* TRUE if using multi-line regexp. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003656#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003657
Bram Moolenaar071d4272004-06-13 20:20:40 +00003658/*
3659 * Match a regexp against a string.
3660 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3661 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003662 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003663 *
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003664 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003665 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003666 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003667bt_regexec_nl(
3668 regmatch_T *rmp,
3669 char_u *line, /* string to match against */
3670 colnr_T col, /* column to start looking for match */
3671 int line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003672{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003673 rex.reg_match = rmp;
3674 rex.reg_mmatch = NULL;
3675 rex.reg_maxline = 0;
3676 rex.reg_line_lbr = line_lbr;
3677 rex.reg_buf = curbuf;
3678 rex.reg_win = NULL;
3679 rex.reg_ic = rmp->rm_ic;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003680 rex.reg_icombine = FALSE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003681 rex.reg_maxcol = 0;
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003682
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003683 return bt_regexec_both(line, col, NULL, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003684}
3685
Bram Moolenaar071d4272004-06-13 20:20:40 +00003686/*
3687 * Match a regexp against multiple lines.
3688 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3689 * Uses curbuf for line count and 'iskeyword'.
3690 *
3691 * Return zero if there is no match. Return number of lines contained in the
3692 * match otherwise.
3693 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003694 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003695bt_regexec_multi(
3696 regmmatch_T *rmp,
3697 win_T *win, /* window in which to search or NULL */
3698 buf_T *buf, /* buffer in which to search */
3699 linenr_T lnum, /* nr of line to start looking for match */
3700 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003701 proftime_T *tm, /* timeout limit or NULL */
3702 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003703{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003704 rex.reg_match = NULL;
3705 rex.reg_mmatch = rmp;
3706 rex.reg_buf = buf;
3707 rex.reg_win = win;
3708 rex.reg_firstlnum = lnum;
3709 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
3710 rex.reg_line_lbr = FALSE;
3711 rex.reg_ic = rmp->rmm_ic;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003712 rex.reg_icombine = FALSE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003713 rex.reg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003714
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003715 return bt_regexec_both(NULL, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003716}
3717
3718/*
3719 * Match a regexp against a string ("line" points to the string) or multiple
3720 * lines ("line" is NULL, use reg_getline()).
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003721 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003722 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003723 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003724bt_regexec_both(
3725 char_u *line,
3726 colnr_T col, /* column to start looking for match */
Bram Moolenaar09463262017-06-17 20:55:06 +02003727 proftime_T *tm, /* timeout limit or NULL */
3728 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003729{
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003730 bt_regprog_T *prog;
3731 char_u *s;
3732 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003733
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003734 /* Create "regstack" and "backpos" if they are not allocated yet.
3735 * We allocate *_INITIAL amount of bytes first and then set the grow size
3736 * to much bigger value to avoid many malloc calls in case of deep regular
3737 * expressions. */
3738 if (regstack.ga_data == NULL)
3739 {
3740 /* Use an item size of 1 byte, since we push different things
3741 * onto the regstack. */
3742 ga_init2(&regstack, 1, REGSTACK_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003743 (void)ga_grow(&regstack, REGSTACK_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003744 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3745 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003746
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003747 if (backpos.ga_data == NULL)
3748 {
3749 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003750 (void)ga_grow(&backpos, BACKPOS_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003751 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3752 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003753
Bram Moolenaar071d4272004-06-13 20:20:40 +00003754 if (REG_MULTI)
3755 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003756 prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003757 line = reg_getline((linenr_T)0);
Bram Moolenaar6100d022016-10-02 16:51:57 +02003758 rex.reg_startpos = rex.reg_mmatch->startpos;
3759 rex.reg_endpos = rex.reg_mmatch->endpos;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003760 }
3761 else
3762 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003763 prog = (bt_regprog_T *)rex.reg_match->regprog;
3764 rex.reg_startp = rex.reg_match->startp;
3765 rex.reg_endp = rex.reg_match->endp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003766 }
3767
3768 /* Be paranoid... */
3769 if (prog == NULL || line == NULL)
3770 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003771 emsg(_(e_null));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003772 goto theend;
3773 }
3774
3775 /* Check validity of program. */
3776 if (prog_magic_wrong())
3777 goto theend;
3778
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003779 /* If the start column is past the maximum column: no need to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003780 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003781 goto theend;
3782
Bram Moolenaar6100d022016-10-02 16:51:57 +02003783 /* If pattern contains "\c" or "\C": overrule value of rex.reg_ic */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003784 if (prog->regflags & RF_ICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003785 rex.reg_ic = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003786 else if (prog->regflags & RF_NOICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003787 rex.reg_ic = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003788
Bram Moolenaar6100d022016-10-02 16:51:57 +02003789 /* If pattern contains "\Z" overrule value of rex.reg_icombine */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003790 if (prog->regflags & RF_ICOMBINE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003791 rex.reg_icombine = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003792
3793 /* If there is a "must appear" string, look for it. */
3794 if (prog->regmust != NULL)
3795 {
3796 int c;
3797
Bram Moolenaar071d4272004-06-13 20:20:40 +00003798 if (has_mbyte)
3799 c = (*mb_ptr2char)(prog->regmust);
3800 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003801 c = *prog->regmust;
3802 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003803
3804 /*
3805 * This is used very often, esp. for ":global". Use three versions of
3806 * the loop to avoid overhead of conditions.
3807 */
Bram Moolenaara12a1612019-01-24 16:39:02 +01003808 if (!rex.reg_ic && !has_mbyte)
Bram Moolenaar05159a02005-02-26 23:04:13 +00003809 while ((s = vim_strbyte(s, c)) != NULL)
3810 {
3811 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3812 break; /* Found it. */
3813 ++s;
3814 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02003815 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar05159a02005-02-26 23:04:13 +00003816 while ((s = vim_strchr(s, c)) != NULL)
3817 {
3818 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3819 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003820 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003821 }
Bram Moolenaar05159a02005-02-26 23:04:13 +00003822 else
3823 while ((s = cstrchr(s, c)) != NULL)
3824 {
3825 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3826 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003827 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003828 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003829 if (s == NULL) /* Not present. */
3830 goto theend;
3831 }
3832
Bram Moolenaar0270f382018-07-17 05:43:58 +02003833 rex.line = line;
3834 rex.lnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003835 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003836
3837 /* Simplest case: Anchored match need be tried only once. */
3838 if (prog->reganch)
3839 {
3840 int c;
3841
Bram Moolenaar071d4272004-06-13 20:20:40 +00003842 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003843 c = (*mb_ptr2char)(rex.line + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003844 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02003845 c = rex.line[col];
Bram Moolenaar071d4272004-06-13 20:20:40 +00003846 if (prog->regstart == NUL
3847 || prog->regstart == c
Bram Moolenaara12a1612019-01-24 16:39:02 +01003848 || (rex.reg_ic
3849 && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003850 || (c < 255 && prog->regstart < 255 &&
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003851 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar09463262017-06-17 20:55:06 +02003852 retval = regtry(prog, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003853 else
3854 retval = 0;
3855 }
3856 else
3857 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003858#ifdef FEAT_RELTIME
3859 int tm_count = 0;
3860#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003861 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003862 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003863 {
3864 if (prog->regstart != NUL)
3865 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003866 /* Skip until the char we know it must start with.
3867 * Used often, do some work to avoid call overhead. */
Bram Moolenaara12a1612019-01-24 16:39:02 +01003868 if (!rex.reg_ic && !has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003869 s = vim_strbyte(rex.line + col, prog->regstart);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003870 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02003871 s = cstrchr(rex.line + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003872 if (s == NULL)
3873 {
3874 retval = 0;
3875 break;
3876 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02003877 col = (int)(s - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003878 }
3879
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003880 /* Check for maximum column to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003881 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003882 {
3883 retval = 0;
3884 break;
3885 }
3886
Bram Moolenaar09463262017-06-17 20:55:06 +02003887 retval = regtry(prog, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003888 if (retval > 0)
3889 break;
3890
3891 /* if not currently on the first line, get it again */
Bram Moolenaar0270f382018-07-17 05:43:58 +02003892 if (rex.lnum != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003893 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003894 rex.lnum = 0;
3895 rex.line = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003896 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02003897 if (rex.line[col] == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003898 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003899 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003900 col += (*mb_ptr2len)(rex.line + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003901 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003902 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003903#ifdef FEAT_RELTIME
3904 /* Check for timeout once in a twenty times to avoid overhead. */
3905 if (tm != NULL && ++tm_count == 20)
3906 {
3907 tm_count = 0;
3908 if (profile_passed_limit(tm))
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003909 {
3910 if (timed_out != NULL)
3911 *timed_out = TRUE;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003912 break;
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003913 }
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003914 }
3915#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003916 }
3917 }
3918
Bram Moolenaar071d4272004-06-13 20:20:40 +00003919theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003920 /* Free "reg_tofree" when it's a bit big.
3921 * Free regstack and backpos if they are bigger than their initial size. */
3922 if (reg_tofreelen > 400)
Bram Moolenaard23a8232018-02-10 18:45:26 +01003923 VIM_CLEAR(reg_tofree);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003924 if (regstack.ga_maxlen > REGSTACK_INITIAL)
3925 ga_clear(&regstack);
3926 if (backpos.ga_maxlen > BACKPOS_INITIAL)
3927 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003928
Bram Moolenaar071d4272004-06-13 20:20:40 +00003929 return retval;
3930}
3931
3932#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00003933/*
3934 * Create a new extmatch and mark it as referenced once.
3935 */
3936 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01003937make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003938{
3939 reg_extmatch_T *em;
3940
Bram Moolenaarc799fe22019-05-28 23:08:19 +02003941 em = ALLOC_CLEAR_ONE(reg_extmatch_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003942 if (em != NULL)
3943 em->refcnt = 1;
3944 return em;
3945}
3946
3947/*
3948 * Add a reference to an extmatch.
3949 */
3950 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01003951ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003952{
3953 if (em != NULL)
3954 em->refcnt++;
3955 return em;
3956}
3957
3958/*
3959 * Remove a reference to an extmatch. If there are no references left, free
3960 * the info.
3961 */
3962 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003963unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003964{
3965 int i;
3966
3967 if (em != NULL && --em->refcnt <= 0)
3968 {
3969 for (i = 0; i < NSUBEXP; ++i)
3970 vim_free(em->matches[i]);
3971 vim_free(em);
3972 }
3973}
3974#endif
3975
3976/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02003977 * regtry - try match of "prog" with at rex.line["col"].
Bram Moolenaar071d4272004-06-13 20:20:40 +00003978 * Returns 0 for failure, number of lines contained in the match otherwise.
3979 */
3980 static long
Bram Moolenaar09463262017-06-17 20:55:06 +02003981regtry(
3982 bt_regprog_T *prog,
3983 colnr_T col,
3984 proftime_T *tm, /* timeout limit or NULL */
3985 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003986{
Bram Moolenaar0270f382018-07-17 05:43:58 +02003987 rex.input = rex.line + col;
3988 rex.need_clear_subexpr = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003989#ifdef FEAT_SYN_HL
Bram Moolenaar0270f382018-07-17 05:43:58 +02003990 // Clear the external match subpointers if necessary.
3991 rex.need_clear_zsubexpr = (prog->reghasz == REX_SET);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003992#endif
3993
Bram Moolenaar09463262017-06-17 20:55:06 +02003994 if (regmatch(prog->program + 1, tm, timed_out) == 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003995 return 0;
3996
3997 cleanup_subexpr();
3998 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003999 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004000 if (rex.reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004001 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004002 rex.reg_startpos[0].lnum = 0;
4003 rex.reg_startpos[0].col = col;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004004 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004005 if (rex.reg_endpos[0].lnum < 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004006 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004007 rex.reg_endpos[0].lnum = rex.lnum;
4008 rex.reg_endpos[0].col = (int)(rex.input - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004009 }
4010 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004011 /* Use line number of "\ze". */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004012 rex.lnum = rex.reg_endpos[0].lnum;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004013 }
4014 else
4015 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004016 if (rex.reg_startp[0] == NULL)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004017 rex.reg_startp[0] = rex.line + col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004018 if (rex.reg_endp[0] == NULL)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004019 rex.reg_endp[0] = rex.input;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004020 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004021#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004022 /* Package any found \z(...\) matches for export. Default is none. */
4023 unref_extmatch(re_extmatch_out);
4024 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004025
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004026 if (prog->reghasz == REX_SET)
4027 {
4028 int i;
4029
4030 cleanup_zsubexpr();
4031 re_extmatch_out = make_extmatch();
4032 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004033 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004034 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004035 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004036 /* Only accept single line matches. */
4037 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004038 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4039 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004040 re_extmatch_out->matches[i] =
4041 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004042 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004043 reg_endzpos[i].col - reg_startzpos[i].col);
4044 }
4045 else
4046 {
4047 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4048 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004049 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004050 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004051 }
4052 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004053 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004054#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02004055 return 1 + rex.lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004056}
4057
Bram Moolenaar071d4272004-06-13 20:20:40 +00004058/*
4059 * Get class of previous character.
4060 */
4061 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004062reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004063{
Bram Moolenaar0270f382018-07-17 05:43:58 +02004064 if (rex.input > rex.line)
4065 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01004066 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004067 return -1;
4068}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004069
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004070/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02004071 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004072 */
4073 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004074reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004075{
4076 pos_T top, bot;
4077 linenr_T lnum;
4078 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004079 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004080 int mode;
4081 colnr_T start, end;
4082 colnr_T start2, end2;
4083 colnr_T cols;
4084
4085 /* Check if the buffer is the current buffer. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004086 if (rex.reg_buf != curbuf || VIsual.lnum == 0)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004087 return FALSE;
4088
4089 if (VIsual_active)
4090 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004091 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004092 {
4093 top = VIsual;
4094 bot = wp->w_cursor;
4095 }
4096 else
4097 {
4098 top = wp->w_cursor;
4099 bot = VIsual;
4100 }
4101 mode = VIsual_mode;
4102 }
4103 else
4104 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004105 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004106 {
4107 top = curbuf->b_visual.vi_start;
4108 bot = curbuf->b_visual.vi_end;
4109 }
4110 else
4111 {
4112 top = curbuf->b_visual.vi_end;
4113 bot = curbuf->b_visual.vi_start;
4114 }
4115 mode = curbuf->b_visual.vi_mode;
4116 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004117 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004118 if (lnum < top.lnum || lnum > bot.lnum)
4119 return FALSE;
4120
4121 if (mode == 'v')
4122 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004123 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004124 if ((lnum == top.lnum && col < top.col)
4125 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4126 return FALSE;
4127 }
4128 else if (mode == Ctrl_V)
4129 {
4130 getvvcol(wp, &top, &start, NULL, &end);
4131 getvvcol(wp, &bot, &start2, NULL, &end2);
4132 if (start2 < start)
4133 start = start2;
4134 if (end2 > end)
4135 end = end2;
4136 if (top.col == MAXCOL || bot.col == MAXCOL)
4137 end = MAXCOL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004138 cols = win_linetabsize(wp, rex.line, (colnr_T)(rex.input - rex.line));
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004139 if (cols < start || cols > end - (*p_sel == 'e'))
4140 return FALSE;
4141 }
4142 return TRUE;
4143}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004144
Bram Moolenaar0270f382018-07-17 05:43:58 +02004145#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004146
4147/*
4148 * The arguments from BRACE_LIMITS are stored here. They are actually local
4149 * to regmatch(), but they are here to reduce the amount of stack space used
4150 * (it can be called recursively many times).
4151 */
4152static long bl_minval;
4153static long bl_maxval;
4154
4155/*
4156 * regmatch - main matching routine
4157 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004158 * Conceptually the strategy is simple: Check to see whether the current node
4159 * matches, push an item onto the regstack and loop to see whether the rest
4160 * matches, and then act accordingly. In practice we make some effort to
4161 * avoid using the regstack, in particular by going through "ordinary" nodes
4162 * (that don't need to know whether the rest of the match failed) by a nested
4163 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004164 *
Bram Moolenaar0270f382018-07-17 05:43:58 +02004165 * Returns TRUE when there is a match. Leaves rex.input and rex.lnum just after
Bram Moolenaar071d4272004-06-13 20:20:40 +00004166 * the last matched character.
Bram Moolenaar0270f382018-07-17 05:43:58 +02004167 * Returns FALSE when there is no match. Leaves rex.input and rex.lnum in an
Bram Moolenaar071d4272004-06-13 20:20:40 +00004168 * undefined state!
4169 */
4170 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004171regmatch(
Bram Moolenaar09463262017-06-17 20:55:06 +02004172 char_u *scan, /* Current node. */
4173 proftime_T *tm UNUSED, /* timeout limit or NULL */
4174 int *timed_out UNUSED) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004175{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004176 char_u *next; /* Next node. */
4177 int op;
4178 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004179 regitem_T *rp;
4180 int no;
4181 int status; /* one of the RA_ values: */
4182#define RA_FAIL 1 /* something failed, abort */
4183#define RA_CONT 2 /* continue in inner loop */
4184#define RA_BREAK 3 /* break inner loop */
4185#define RA_MATCH 4 /* successful match */
4186#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar09463262017-06-17 20:55:06 +02004187#ifdef FEAT_RELTIME
4188 int tm_count = 0;
4189#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00004190
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004191 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004192 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004193 regstack.ga_len = 0;
4194 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004195
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004196 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004197 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004198 */
4199 for (;;)
4200 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004201 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4202 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004203 fast_breakcheck();
4204
4205#ifdef DEBUG
4206 if (scan != NULL && regnarrate)
4207 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004208 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004209 mch_errmsg("(\n");
4210 }
4211#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004212
4213 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004214 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004215 * regstack.
4216 */
4217 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004218 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004219 if (got_int || scan == NULL)
4220 {
4221 status = RA_FAIL;
4222 break;
4223 }
Bram Moolenaar09463262017-06-17 20:55:06 +02004224#ifdef FEAT_RELTIME
4225 /* Check for timeout once in a 100 times to avoid overhead. */
4226 if (tm != NULL && ++tm_count == 100)
4227 {
4228 tm_count = 0;
4229 if (profile_passed_limit(tm))
4230 {
4231 if (timed_out != NULL)
4232 *timed_out = TRUE;
4233 status = RA_FAIL;
4234 break;
4235 }
4236 }
4237#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004238 status = RA_CONT;
4239
Bram Moolenaar071d4272004-06-13 20:20:40 +00004240#ifdef DEBUG
4241 if (regnarrate)
4242 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004243 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004244 mch_errmsg("...\n");
4245# ifdef FEAT_SYN_HL
4246 if (re_extmatch_in != NULL)
4247 {
4248 int i;
4249
4250 mch_errmsg(_("External submatches:\n"));
4251 for (i = 0; i < NSUBEXP; i++)
4252 {
4253 mch_errmsg(" \"");
4254 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004255 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004256 mch_errmsg("\"\n");
4257 }
4258 }
4259# endif
4260 }
4261#endif
4262 next = regnext(scan);
4263
4264 op = OP(scan);
4265 /* Check for character class with NL added. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004266 if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
Bram Moolenaar0270f382018-07-17 05:43:58 +02004267 && *rex.input == NUL && rex.lnum <= rex.reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004268 {
4269 reg_nextline();
4270 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004271 else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n')
Bram Moolenaar071d4272004-06-13 20:20:40 +00004272 {
4273 ADVANCE_REGINPUT();
4274 }
4275 else
4276 {
4277 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004278 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004279 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004280 c = (*mb_ptr2char)(rex.input);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004281 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02004282 c = *rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004283 switch (op)
4284 {
4285 case BOL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004286 if (rex.input != rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004287 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004288 break;
4289
4290 case EOL:
4291 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004292 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004293 break;
4294
4295 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004296 /* We're not at the beginning of the file when below the first
4297 * line where we started, not at the start of the line or we
4298 * didn't start at the first line of the buffer. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004299 if (rex.lnum != 0 || rex.input != rex.line
Bram Moolenaar6100d022016-10-02 16:51:57 +02004300 || (REG_MULTI && rex.reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004301 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004302 break;
4303
4304 case RE_EOF:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004305 if (rex.lnum != rex.reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004306 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004307 break;
4308
4309 case CURSOR:
4310 /* Check if the buffer is in a window and compare the
Bram Moolenaar6100d022016-10-02 16:51:57 +02004311 * rex.reg_win->w_cursor position to the match position. */
4312 if (rex.reg_win == NULL
Bram Moolenaar0270f382018-07-17 05:43:58 +02004313 || (rex.lnum + rex.reg_firstlnum
Bram Moolenaar6100d022016-10-02 16:51:57 +02004314 != rex.reg_win->w_cursor.lnum)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004315 || ((colnr_T)(rex.input - rex.line)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004316 != rex.reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004317 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004318 break;
4319
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004320 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004321 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004322 {
4323 int mark = OPERAND(scan)[0];
4324 int cmp = OPERAND(scan)[1];
4325 pos_T *pos;
4326
Bram Moolenaar6100d022016-10-02 16:51:57 +02004327 pos = getmark_buf(rex.reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004328 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004329 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004330 || (pos->lnum == rex.lnum + rex.reg_firstlnum
4331 ? (pos->col == (colnr_T)(rex.input - rex.line)
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004332 ? (cmp == '<' || cmp == '>')
Bram Moolenaar0270f382018-07-17 05:43:58 +02004333 : (pos->col < (colnr_T)(rex.input - rex.line)
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004334 ? cmp != '>'
4335 : cmp != '<'))
Bram Moolenaar0270f382018-07-17 05:43:58 +02004336 : (pos->lnum < rex.lnum + rex.reg_firstlnum
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004337 ? cmp != '>'
4338 : cmp != '<')))
4339 status = RA_NOMATCH;
4340 }
4341 break;
4342
4343 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004344 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004345 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004346 break;
4347
Bram Moolenaar071d4272004-06-13 20:20:40 +00004348 case RE_LNUM:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004349 if (!REG_MULTI || !re_num_cmp((long_u)(rex.lnum + rex.reg_firstlnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00004350 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004351 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004352 break;
4353
4354 case RE_COL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004355 if (!re_num_cmp((long_u)(rex.input - rex.line) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004356 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004357 break;
4358
4359 case RE_VCOL:
4360 if (!re_num_cmp((long_u)win_linetabsize(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004361 rex.reg_win == NULL ? curwin : rex.reg_win,
Bram Moolenaar0270f382018-07-17 05:43:58 +02004362 rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004363 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004364 break;
4365
Bram Moolenaar0270f382018-07-17 05:43:58 +02004366 case BOW: /* \<word; rex.input points to w */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004367 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004368 status = RA_NOMATCH;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004369 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004370 {
4371 int this_class;
4372
4373 /* Get class of current and previous char (if it exists). */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004374 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004375 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004376 status = RA_NOMATCH; /* not on a word at all */
4377 else if (reg_prev_class() == this_class)
4378 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004379 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004380 else
4381 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004382 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line
4383 && vim_iswordc_buf(rex.input[-1], rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004384 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004385 }
4386 break;
4387
Bram Moolenaar0270f382018-07-17 05:43:58 +02004388 case EOW: /* word\>; rex.input points after d */
4389 if (rex.input == rex.line) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004390 status = RA_NOMATCH;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004391 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004392 {
4393 int this_class, prev_class;
4394
4395 /* Get class of current and previous char (if it exists). */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004396 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004397 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004398 if (this_class == prev_class
4399 || prev_class == 0 || prev_class == 1)
4400 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004401 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004402 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004403 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004404 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf)
4405 || (rex.input[0] != NUL
Bram Moolenaar6100d022016-10-02 16:51:57 +02004406 && vim_iswordc_buf(c, rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004407 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004408 }
4409 break; /* Matched with EOW */
4410
4411 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004412 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004413 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004414 status = RA_NOMATCH;
4415 else
4416 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004417 break;
4418
4419 case IDENT:
4420 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004421 status = RA_NOMATCH;
4422 else
4423 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004424 break;
4425
4426 case SIDENT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004427 if (VIM_ISDIGIT(*rex.input) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004428 status = RA_NOMATCH;
4429 else
4430 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004431 break;
4432
4433 case KWORD:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004434 if (!vim_iswordp_buf(rex.input, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004435 status = RA_NOMATCH;
4436 else
4437 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004438 break;
4439
4440 case SKWORD:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004441 if (VIM_ISDIGIT(*rex.input)
4442 || !vim_iswordp_buf(rex.input, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004443 status = RA_NOMATCH;
4444 else
4445 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004446 break;
4447
4448 case FNAME:
4449 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004450 status = RA_NOMATCH;
4451 else
4452 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004453 break;
4454
4455 case SFNAME:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004456 if (VIM_ISDIGIT(*rex.input) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004457 status = RA_NOMATCH;
4458 else
4459 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004460 break;
4461
4462 case PRINT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004463 if (!vim_isprintc(PTR2CHAR(rex.input)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004464 status = RA_NOMATCH;
4465 else
4466 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004467 break;
4468
4469 case SPRINT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004470 if (VIM_ISDIGIT(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004471 status = RA_NOMATCH;
4472 else
4473 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004474 break;
4475
4476 case WHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004477 if (!VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004478 status = RA_NOMATCH;
4479 else
4480 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004481 break;
4482
4483 case NWHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004484 if (c == NUL || VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004485 status = RA_NOMATCH;
4486 else
4487 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004488 break;
4489
4490 case DIGIT:
4491 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004492 status = RA_NOMATCH;
4493 else
4494 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004495 break;
4496
4497 case NDIGIT:
4498 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004499 status = RA_NOMATCH;
4500 else
4501 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004502 break;
4503
4504 case HEX:
4505 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004506 status = RA_NOMATCH;
4507 else
4508 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004509 break;
4510
4511 case NHEX:
4512 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004513 status = RA_NOMATCH;
4514 else
4515 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004516 break;
4517
4518 case OCTAL:
4519 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004520 status = RA_NOMATCH;
4521 else
4522 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004523 break;
4524
4525 case NOCTAL:
4526 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004527 status = RA_NOMATCH;
4528 else
4529 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004530 break;
4531
4532 case WORD:
4533 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004534 status = RA_NOMATCH;
4535 else
4536 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004537 break;
4538
4539 case NWORD:
4540 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004541 status = RA_NOMATCH;
4542 else
4543 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004544 break;
4545
4546 case HEAD:
4547 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004548 status = RA_NOMATCH;
4549 else
4550 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004551 break;
4552
4553 case NHEAD:
4554 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004555 status = RA_NOMATCH;
4556 else
4557 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004558 break;
4559
4560 case ALPHA:
4561 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004562 status = RA_NOMATCH;
4563 else
4564 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004565 break;
4566
4567 case NALPHA:
4568 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004569 status = RA_NOMATCH;
4570 else
4571 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004572 break;
4573
4574 case LOWER:
4575 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004576 status = RA_NOMATCH;
4577 else
4578 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004579 break;
4580
4581 case NLOWER:
4582 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004583 status = RA_NOMATCH;
4584 else
4585 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004586 break;
4587
4588 case UPPER:
4589 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004590 status = RA_NOMATCH;
4591 else
4592 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004593 break;
4594
4595 case NUPPER:
4596 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004597 status = RA_NOMATCH;
4598 else
4599 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004600 break;
4601
4602 case EXACTLY:
4603 {
4604 int len;
4605 char_u *opnd;
4606
4607 opnd = OPERAND(scan);
4608 /* Inline the first byte, for speed. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004609 if (*opnd != *rex.input
Bram Moolenaara12a1612019-01-24 16:39:02 +01004610 && (!rex.reg_ic
4611 || (!enc_utf8
4612 && MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004613 status = RA_NOMATCH;
4614 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004615 {
4616 /* match empty string always works; happens when "~" is
4617 * empty. */
4618 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004619 else
4620 {
Bram Moolenaara12a1612019-01-24 16:39:02 +01004621 if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic))
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004622 {
4623 len = 1; /* matched a single byte above */
4624 }
4625 else
4626 {
4627 /* Need to match first byte again for multi-byte. */
4628 len = (int)STRLEN(opnd);
Bram Moolenaar0270f382018-07-17 05:43:58 +02004629 if (cstrncmp(opnd, rex.input, &len) != 0)
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004630 status = RA_NOMATCH;
4631 }
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004632 /* Check for following composing character, unless %C
4633 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004634 if (status != RA_NOMATCH
4635 && enc_utf8
Bram Moolenaar0270f382018-07-17 05:43:58 +02004636 && UTF_COMPOSINGLIKE(rex.input, rex.input + len)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004637 && !rex.reg_icombine
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004638 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004639 {
4640 /* raaron: This code makes a composing character get
4641 * ignored, which is the correct behavior (sometimes)
4642 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004643 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004644 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004645 if (status != RA_NOMATCH)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004646 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004647 }
4648 }
4649 break;
4650
4651 case ANYOF:
4652 case ANYBUT:
4653 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004654 status = RA_NOMATCH;
4655 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4656 status = RA_NOMATCH;
4657 else
4658 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004659 break;
4660
Bram Moolenaar071d4272004-06-13 20:20:40 +00004661 case MULTIBYTECODE:
4662 if (has_mbyte)
4663 {
4664 int i, len;
4665 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004666 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004667
4668 opnd = OPERAND(scan);
4669 /* Safety check (just in case 'encoding' was changed since
4670 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004671 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004672 {
4673 status = RA_NOMATCH;
4674 break;
4675 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004676 if (enc_utf8)
Bram Moolenaarace95982017-03-29 17:30:27 +02004677 opndc = utf_ptr2char(opnd);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004678 if (enc_utf8 && utf_iscomposing(opndc))
4679 {
4680 /* When only a composing char is given match at any
4681 * position where that composing char appears. */
4682 status = RA_NOMATCH;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004683 for (i = 0; rex.input[i] != NUL;
4684 i += utf_ptr2len(rex.input + i))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004685 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004686 inpc = utf_ptr2char(rex.input + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004687 if (!utf_iscomposing(inpc))
4688 {
4689 if (i > 0)
4690 break;
4691 }
4692 else if (opndc == inpc)
4693 {
4694 /* Include all following composing chars. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004695 len = i + utfc_ptr2len(rex.input + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004696 status = RA_MATCH;
4697 break;
4698 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004699 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004700 }
4701 else
4702 for (i = 0; i < len; ++i)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004703 if (opnd[i] != rex.input[i])
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004704 {
4705 status = RA_NOMATCH;
4706 break;
4707 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004708 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004709 }
4710 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004711 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004712 break;
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004713 case RE_COMPOSING:
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004714 if (enc_utf8)
4715 {
4716 /* Skip composing characters. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004717 while (utf_iscomposing(utf_ptr2char(rex.input)))
4718 MB_CPTR_ADV(rex.input);
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004719 }
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004720 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004721
4722 case NOTHING:
4723 break;
4724
4725 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004726 {
4727 int i;
4728 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004729
Bram Moolenaar582fd852005-03-28 20:58:01 +00004730 /*
4731 * When we run into BACK we need to check if we don't keep
4732 * looping without matching any input. The second and later
4733 * times a BACK is encountered it fails if the input is still
4734 * at the same position as the previous time.
4735 * The positions are stored in "backpos" and found by the
4736 * current value of "scan", the position in the RE program.
4737 */
4738 bp = (backpos_T *)backpos.ga_data;
4739 for (i = 0; i < backpos.ga_len; ++i)
4740 if (bp[i].bp_scan == scan)
4741 break;
4742 if (i == backpos.ga_len)
4743 {
4744 /* First time at this BACK, make room to store the pos. */
4745 if (ga_grow(&backpos, 1) == FAIL)
4746 status = RA_FAIL;
4747 else
4748 {
4749 /* get "ga_data" again, it may have changed */
4750 bp = (backpos_T *)backpos.ga_data;
4751 bp[i].bp_scan = scan;
4752 ++backpos.ga_len;
4753 }
4754 }
4755 else if (reg_save_equal(&bp[i].bp_pos))
4756 /* Still at same position as last time, fail. */
4757 status = RA_NOMATCH;
4758
4759 if (status != RA_FAIL && status != RA_NOMATCH)
4760 reg_save(&bp[i].bp_pos, &backpos);
4761 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004762 break;
4763
Bram Moolenaar071d4272004-06-13 20:20:40 +00004764 case MOPEN + 0: /* Match start: \zs */
4765 case MOPEN + 1: /* \( */
4766 case MOPEN + 2:
4767 case MOPEN + 3:
4768 case MOPEN + 4:
4769 case MOPEN + 5:
4770 case MOPEN + 6:
4771 case MOPEN + 7:
4772 case MOPEN + 8:
4773 case MOPEN + 9:
4774 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004775 no = op - MOPEN;
4776 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004777 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004778 if (rp == NULL)
4779 status = RA_FAIL;
4780 else
4781 {
4782 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004783 save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
4784 &rex.reg_startp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004785 /* We simply continue and handle the result when done. */
4786 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004787 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004788 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004789
4790 case NOPEN: /* \%( */
4791 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004792 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004793 status = RA_FAIL;
4794 /* We simply continue and handle the result when done. */
4795 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004796
4797#ifdef FEAT_SYN_HL
4798 case ZOPEN + 1:
4799 case ZOPEN + 2:
4800 case ZOPEN + 3:
4801 case ZOPEN + 4:
4802 case ZOPEN + 5:
4803 case ZOPEN + 6:
4804 case ZOPEN + 7:
4805 case ZOPEN + 8:
4806 case ZOPEN + 9:
4807 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004808 no = op - ZOPEN;
4809 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004810 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004811 if (rp == NULL)
4812 status = RA_FAIL;
4813 else
4814 {
4815 rp->rs_no = no;
4816 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4817 &reg_startzp[no]);
4818 /* We simply continue and handle the result when done. */
4819 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004820 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004821 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004822#endif
4823
4824 case MCLOSE + 0: /* Match end: \ze */
4825 case MCLOSE + 1: /* \) */
4826 case MCLOSE + 2:
4827 case MCLOSE + 3:
4828 case MCLOSE + 4:
4829 case MCLOSE + 5:
4830 case MCLOSE + 6:
4831 case MCLOSE + 7:
4832 case MCLOSE + 8:
4833 case MCLOSE + 9:
4834 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004835 no = op - MCLOSE;
4836 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004837 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004838 if (rp == NULL)
4839 status = RA_FAIL;
4840 else
4841 {
4842 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004843 save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
4844 &rex.reg_endp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004845 /* We simply continue and handle the result when done. */
4846 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004847 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004848 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004849
4850#ifdef FEAT_SYN_HL
4851 case ZCLOSE + 1: /* \) after \z( */
4852 case ZCLOSE + 2:
4853 case ZCLOSE + 3:
4854 case ZCLOSE + 4:
4855 case ZCLOSE + 5:
4856 case ZCLOSE + 6:
4857 case ZCLOSE + 7:
4858 case ZCLOSE + 8:
4859 case ZCLOSE + 9:
4860 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004861 no = op - ZCLOSE;
4862 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004863 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004864 if (rp == NULL)
4865 status = RA_FAIL;
4866 else
4867 {
4868 rp->rs_no = no;
4869 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4870 &reg_endzp[no]);
4871 /* We simply continue and handle the result when done. */
4872 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004873 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004874 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004875#endif
4876
4877 case BACKREF + 1:
4878 case BACKREF + 2:
4879 case BACKREF + 3:
4880 case BACKREF + 4:
4881 case BACKREF + 5:
4882 case BACKREF + 6:
4883 case BACKREF + 7:
4884 case BACKREF + 8:
4885 case BACKREF + 9:
4886 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004887 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004888
4889 no = op - BACKREF;
4890 cleanup_subexpr();
4891 if (!REG_MULTI) /* Single-line regexp */
4892 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004893 if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004894 {
4895 /* Backref was not set: Match an empty string. */
4896 len = 0;
4897 }
4898 else
4899 {
4900 /* Compare current input with back-ref in the same
4901 * line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004902 len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
Bram Moolenaar0270f382018-07-17 05:43:58 +02004903 if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004904 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004905 }
4906 }
4907 else /* Multi-line regexp */
4908 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004909 if (rex.reg_startpos[no].lnum < 0
4910 || rex.reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004911 {
4912 /* Backref was not set: Match an empty string. */
4913 len = 0;
4914 }
4915 else
4916 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004917 if (rex.reg_startpos[no].lnum == rex.lnum
4918 && rex.reg_endpos[no].lnum == rex.lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004919 {
4920 /* Compare back-ref within the current line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004921 len = rex.reg_endpos[no].col
4922 - rex.reg_startpos[no].col;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004923 if (cstrncmp(rex.line + rex.reg_startpos[no].col,
4924 rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004925 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004926 }
4927 else
4928 {
4929 /* Messy situation: Need to compare between two
4930 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02004931 int r = match_with_backref(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004932 rex.reg_startpos[no].lnum,
4933 rex.reg_startpos[no].col,
4934 rex.reg_endpos[no].lnum,
4935 rex.reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02004936 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02004937
4938 if (r != RA_MATCH)
4939 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004940 }
4941 }
4942 }
4943
4944 /* Matched the backref, skip over it. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004945 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004946 }
4947 break;
4948
4949#ifdef FEAT_SYN_HL
4950 case ZREF + 1:
4951 case ZREF + 2:
4952 case ZREF + 3:
4953 case ZREF + 4:
4954 case ZREF + 5:
4955 case ZREF + 6:
4956 case ZREF + 7:
4957 case ZREF + 8:
4958 case ZREF + 9:
4959 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004960 int len;
4961
4962 cleanup_zsubexpr();
4963 no = op - ZREF;
4964 if (re_extmatch_in != NULL
4965 && re_extmatch_in->matches[no] != NULL)
4966 {
4967 len = (int)STRLEN(re_extmatch_in->matches[no]);
4968 if (cstrncmp(re_extmatch_in->matches[no],
Bram Moolenaar0270f382018-07-17 05:43:58 +02004969 rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004970 status = RA_NOMATCH;
4971 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02004972 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004973 }
4974 else
4975 {
4976 /* Backref was not set: Match an empty string. */
4977 }
4978 }
4979 break;
4980#endif
4981
4982 case BRANCH:
4983 {
4984 if (OP(next) != BRANCH) /* No choice. */
4985 next = OPERAND(scan); /* Avoid recursion. */
4986 else
4987 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004988 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004989 if (rp == NULL)
4990 status = RA_FAIL;
4991 else
4992 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004993 }
4994 }
4995 break;
4996
4997 case BRACE_LIMITS:
4998 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004999 if (OP(next) == BRACE_SIMPLE)
5000 {
5001 bl_minval = OPERAND_MIN(scan);
5002 bl_maxval = OPERAND_MAX(scan);
5003 }
5004 else if (OP(next) >= BRACE_COMPLEX
5005 && OP(next) < BRACE_COMPLEX + 10)
5006 {
5007 no = OP(next) - BRACE_COMPLEX;
5008 brace_min[no] = OPERAND_MIN(scan);
5009 brace_max[no] = OPERAND_MAX(scan);
5010 brace_count[no] = 0;
5011 }
5012 else
5013 {
Bram Moolenaar95f09602016-11-10 20:01:45 +01005014 internal_error("BRACE_LIMITS");
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005015 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005016 }
5017 }
5018 break;
5019
5020 case BRACE_COMPLEX + 0:
5021 case BRACE_COMPLEX + 1:
5022 case BRACE_COMPLEX + 2:
5023 case BRACE_COMPLEX + 3:
5024 case BRACE_COMPLEX + 4:
5025 case BRACE_COMPLEX + 5:
5026 case BRACE_COMPLEX + 6:
5027 case BRACE_COMPLEX + 7:
5028 case BRACE_COMPLEX + 8:
5029 case BRACE_COMPLEX + 9:
5030 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005031 no = op - BRACE_COMPLEX;
5032 ++brace_count[no];
5033
5034 /* If not matched enough times yet, try one more */
5035 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005036 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005037 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005038 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005039 if (rp == NULL)
5040 status = RA_FAIL;
5041 else
5042 {
5043 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005044 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005045 next = OPERAND(scan);
5046 /* We continue and handle the result when done. */
5047 }
5048 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005049 }
5050
5051 /* If matched enough times, may try matching some more */
5052 if (brace_min[no] <= brace_max[no])
5053 {
5054 /* Range is the normal way around, use longest match */
5055 if (brace_count[no] <= brace_max[no])
5056 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005057 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005058 if (rp == NULL)
5059 status = RA_FAIL;
5060 else
5061 {
5062 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005063 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005064 next = OPERAND(scan);
5065 /* We continue and handle the result when done. */
5066 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005067 }
5068 }
5069 else
5070 {
5071 /* Range is backwards, use shortest match first */
5072 if (brace_count[no] <= brace_min[no])
5073 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005074 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005075 if (rp == NULL)
5076 status = RA_FAIL;
5077 else
5078 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005079 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005080 /* We continue and handle the result when done. */
5081 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005082 }
5083 }
5084 }
5085 break;
5086
5087 case BRACE_SIMPLE:
5088 case STAR:
5089 case PLUS:
5090 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005091 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005092
5093 /*
5094 * Lookahead to avoid useless match attempts when we know
5095 * what character comes next.
5096 */
5097 if (OP(next) == EXACTLY)
5098 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005099 rst.nextb = *OPERAND(next);
Bram Moolenaar6100d022016-10-02 16:51:57 +02005100 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005101 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005102 if (MB_ISUPPER(rst.nextb))
5103 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005104 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005105 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005106 }
5107 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005108 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005109 }
5110 else
5111 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005112 rst.nextb = NUL;
5113 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005114 }
5115 if (op != BRACE_SIMPLE)
5116 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005117 rst.minval = (op == STAR) ? 0 : 1;
5118 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005119 }
5120 else
5121 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005122 rst.minval = bl_minval;
5123 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005124 }
5125
5126 /*
5127 * When maxval > minval, try matching as much as possible, up
5128 * to maxval. When maxval < minval, try matching at least the
5129 * minimal number (since the range is backwards, that's also
5130 * maxval!).
5131 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005132 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005133 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005134 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005135 status = RA_FAIL;
5136 break;
5137 }
5138 if (rst.minval <= rst.maxval
5139 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5140 {
5141 /* It could match. Prepare for trying to match what
5142 * follows. The code is below. Parameters are stored in
5143 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005144 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005145 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005146 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005147 status = RA_FAIL;
5148 }
5149 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005150 status = RA_FAIL;
5151 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005152 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005153 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005154 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005155 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005156 if (rp == NULL)
5157 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005158 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005159 {
5160 *(((regstar_T *)rp) - 1) = rst;
5161 status = RA_BREAK; /* skip the restore bits */
5162 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005163 }
5164 }
5165 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005166 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005167
Bram Moolenaar071d4272004-06-13 20:20:40 +00005168 }
5169 break;
5170
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005171 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005172 case MATCH:
5173 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005174 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005175 if (rp == NULL)
5176 status = RA_FAIL;
5177 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005178 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005179 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005180 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005181 next = OPERAND(scan);
5182 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005183 }
5184 break;
5185
5186 case BEHIND:
5187 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005188 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005189 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005190 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005191 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005192 status = RA_FAIL;
5193 }
5194 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005195 status = RA_FAIL;
5196 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005197 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005198 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005199 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005200 if (rp == NULL)
5201 status = RA_FAIL;
5202 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005203 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005204 /* Need to save the subexpr to be able to restore them
5205 * when there is a match but we don't use it. */
5206 save_subexpr(((regbehind_T *)rp) - 1);
5207
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005208 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005209 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005210 /* First try if what follows matches. If it does then we
5211 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005212 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005213 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005214 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005215
5216 case BHPOS:
5217 if (REG_MULTI)
5218 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005219 if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line)
5220 || behind_pos.rs_u.pos.lnum != rex.lnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005221 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005222 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02005223 else if (behind_pos.rs_u.ptr != rex.input)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005224 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005225 break;
5226
5227 case NEWL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02005228 if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005229 || rex.reg_line_lbr)
5230 && (c != '\n' || !rex.reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005231 status = RA_NOMATCH;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005232 else if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005233 ADVANCE_REGINPUT();
5234 else
5235 reg_nextline();
5236 break;
5237
5238 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005239 status = RA_MATCH; /* Success! */
5240 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005241
5242 default:
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005243 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00005244#ifdef DEBUG
5245 printf("Illegal op code %d\n", op);
5246#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005247 status = RA_FAIL;
5248 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005249 }
5250 }
5251
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005252 /* If we can't continue sequentially, break the inner loop. */
5253 if (status != RA_CONT)
5254 break;
5255
5256 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005257 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005258
5259 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005260
5261 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005262 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005263 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005264 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005265 while (regstack.ga_len > 0 && status != RA_FAIL)
5266 {
5267 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5268 switch (rp->rs_state)
5269 {
5270 case RS_NOPEN:
5271 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005272 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005273 break;
5274
5275 case RS_MOPEN:
5276 /* Pop the state. Restore pointers when there is no match. */
5277 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005278 restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
5279 &rex.reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005280 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005281 break;
5282
5283#ifdef FEAT_SYN_HL
5284 case RS_ZOPEN:
5285 /* Pop the state. Restore pointers when there is no match. */
5286 if (status == RA_NOMATCH)
5287 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5288 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005289 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005290 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005291#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005292
5293 case RS_MCLOSE:
5294 /* Pop the state. Restore pointers when there is no match. */
5295 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005296 restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
5297 &rex.reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005298 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005299 break;
5300
5301#ifdef FEAT_SYN_HL
5302 case RS_ZCLOSE:
5303 /* Pop the state. Restore pointers when there is no match. */
5304 if (status == RA_NOMATCH)
5305 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5306 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005307 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005308 break;
5309#endif
5310
5311 case RS_BRANCH:
5312 if (status == RA_MATCH)
5313 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005314 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005315 else
5316 {
5317 if (status != RA_BREAK)
5318 {
5319 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005320 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005321 scan = rp->rs_scan;
5322 }
5323 if (scan == NULL || OP(scan) != BRANCH)
5324 {
5325 /* no more branches, didn't find a match */
5326 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005327 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005328 }
5329 else
5330 {
5331 /* Prepare to try a branch. */
5332 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005333 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005334 scan = OPERAND(scan);
5335 }
5336 }
5337 break;
5338
5339 case RS_BRCPLX_MORE:
5340 /* Pop the state. Restore pointers when there is no match. */
5341 if (status == RA_NOMATCH)
5342 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005343 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005344 --brace_count[rp->rs_no]; /* decrement match count */
5345 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005346 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005347 break;
5348
5349 case RS_BRCPLX_LONG:
5350 /* Pop the state. Restore pointers when there is no match. */
5351 if (status == RA_NOMATCH)
5352 {
5353 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005354 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005355 --brace_count[rp->rs_no];
5356 /* continue with the items after "\{}" */
5357 status = RA_CONT;
5358 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005359 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005360 if (status == RA_CONT)
5361 scan = regnext(scan);
5362 break;
5363
5364 case RS_BRCPLX_SHORT:
5365 /* Pop the state. Restore pointers when there is no match. */
5366 if (status == RA_NOMATCH)
5367 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005368 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005369 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005370 if (status == RA_NOMATCH)
5371 {
5372 scan = OPERAND(scan);
5373 status = RA_CONT;
5374 }
5375 break;
5376
5377 case RS_NOMATCH:
5378 /* Pop the state. If the operand matches for NOMATCH or
5379 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5380 * except for SUBPAT, and continue with the next item. */
5381 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5382 status = RA_NOMATCH;
5383 else
5384 {
5385 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005386 if (rp->rs_no != SUBPAT) /* zero-width */
5387 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005388 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005389 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005390 if (status == RA_CONT)
5391 scan = regnext(scan);
5392 break;
5393
5394 case RS_BEHIND1:
5395 if (status == RA_NOMATCH)
5396 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005397 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005398 regstack.ga_len -= sizeof(regbehind_T);
5399 }
5400 else
5401 {
5402 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5403 * the behind part does (not) match before the current
5404 * position in the input. This must be done at every
5405 * position in the input and checking if the match ends at
5406 * the current position. */
5407
5408 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005409 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005410
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005411 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005412 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005413 * result, hitting the start of the line or the previous
5414 * line (for multi-line matching).
5415 * Set behind_pos to where the match should end, BHPOS
5416 * will match it. Save the current value. */
5417 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5418 behind_pos = rp->rs_un.regsave;
5419
5420 rp->rs_state = RS_BEHIND2;
5421
Bram Moolenaar582fd852005-03-28 20:58:01 +00005422 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005423 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005424 }
5425 break;
5426
5427 case RS_BEHIND2:
5428 /*
5429 * Looping for BEHIND / NOBEHIND match.
5430 */
5431 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5432 {
5433 /* found a match that ends where "next" started */
5434 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5435 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005436 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5437 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005438 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005439 {
5440 /* But we didn't want a match. Need to restore the
5441 * subexpr, because what follows matched, so they have
5442 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005443 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005444 restore_subexpr(((regbehind_T *)rp) - 1);
5445 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005446 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005447 regstack.ga_len -= sizeof(regbehind_T);
5448 }
5449 else
5450 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005451 long limit;
5452
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005453 /* No match or a match that doesn't end where we want it: Go
5454 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005455 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005456 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005457 if (REG_MULTI)
5458 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005459 if (limit > 0
5460 && ((rp->rs_un.regsave.rs_u.pos.lnum
5461 < behind_pos.rs_u.pos.lnum
Bram Moolenaar0270f382018-07-17 05:43:58 +02005462 ? (colnr_T)STRLEN(rex.line)
Bram Moolenaar61602c52013-06-01 19:54:43 +02005463 : behind_pos.rs_u.pos.col)
5464 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5465 no = FAIL;
5466 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005467 {
5468 if (rp->rs_un.regsave.rs_u.pos.lnum
5469 < behind_pos.rs_u.pos.lnum
5470 || reg_getline(
5471 --rp->rs_un.regsave.rs_u.pos.lnum)
5472 == NULL)
5473 no = FAIL;
5474 else
5475 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005476 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005477 rp->rs_un.regsave.rs_u.pos.col =
Bram Moolenaar0270f382018-07-17 05:43:58 +02005478 (colnr_T)STRLEN(rex.line);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005479 }
5480 }
5481 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005482 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005483 if (has_mbyte)
Bram Moolenaarbc197192018-02-13 16:35:06 +01005484 {
5485 char_u *line =
Bram Moolenaar866f3552019-01-01 22:19:08 +01005486 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum);
Bram Moolenaarbc197192018-02-13 16:35:06 +01005487
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005488 rp->rs_un.regsave.rs_u.pos.col -=
Bram Moolenaarbc197192018-02-13 16:35:06 +01005489 (*mb_head_off)(line, line
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005490 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaarbc197192018-02-13 16:35:06 +01005491 }
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005492 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005493 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005494 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005495 }
5496 else
5497 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005498 if (rp->rs_un.regsave.rs_u.ptr == rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005499 no = FAIL;
5500 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005501 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005502 MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005503 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5504 - rp->rs_un.regsave.rs_u.ptr) > limit)
5505 no = FAIL;
5506 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005507 }
5508 if (no == OK)
5509 {
5510 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005511 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005512 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005513 if (status == RA_MATCH)
5514 {
5515 /* We did match, so subexpr may have been changed,
5516 * need to restore them for the next try. */
5517 status = RA_NOMATCH;
5518 restore_subexpr(((regbehind_T *)rp) - 1);
5519 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005520 }
5521 else
5522 {
5523 /* Can't advance. For NOBEHIND that's a match. */
5524 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5525 if (rp->rs_no == NOBEHIND)
5526 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005527 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5528 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005529 status = RA_MATCH;
5530 }
5531 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005532 {
5533 /* We do want a proper match. Need to restore the
5534 * subexpr if we had a match, because they may have
5535 * been set. */
5536 if (status == RA_MATCH)
5537 {
5538 status = RA_NOMATCH;
5539 restore_subexpr(((regbehind_T *)rp) - 1);
5540 }
5541 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005542 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005543 regstack.ga_len -= sizeof(regbehind_T);
5544 }
5545 }
5546 break;
5547
5548 case RS_STAR_LONG:
5549 case RS_STAR_SHORT:
5550 {
5551 regstar_T *rst = ((regstar_T *)rp) - 1;
5552
5553 if (status == RA_MATCH)
5554 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005555 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005556 regstack.ga_len -= sizeof(regstar_T);
5557 break;
5558 }
5559
5560 /* Tried once already, restore input pointers. */
5561 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005562 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005563
5564 /* Repeat until we found a position where it could match. */
5565 for (;;)
5566 {
5567 if (status != RA_BREAK)
5568 {
5569 /* Tried first position already, advance. */
5570 if (rp->rs_state == RS_STAR_LONG)
5571 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005572 /* Trying for longest match, but couldn't or
5573 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005574 if (--rst->count < rst->minval)
5575 break;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005576 if (rex.input == rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005577 {
5578 /* backup to last char of previous line */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005579 --rex.lnum;
5580 rex.line = reg_getline(rex.lnum);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005581 /* Just in case regrepeat() didn't count
5582 * right. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005583 if (rex.line == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005584 break;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005585 rex.input = rex.line + STRLEN(rex.line);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005586 fast_breakcheck();
5587 }
5588 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02005589 MB_PTR_BACK(rex.line, rex.input);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005590 }
5591 else
5592 {
5593 /* Range is backwards, use shortest match first.
5594 * Careful: maxval and minval are exchanged!
5595 * Couldn't or didn't match: try advancing one
5596 * char. */
5597 if (rst->count == rst->minval
5598 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5599 break;
5600 ++rst->count;
5601 }
5602 if (got_int)
5603 break;
5604 }
5605 else
5606 status = RA_NOMATCH;
5607
5608 /* If it could match, try it. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005609 if (rst->nextb == NUL || *rex.input == rst->nextb
5610 || *rex.input == rst->nextb_ic)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005611 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005612 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005613 scan = regnext(rp->rs_scan);
5614 status = RA_CONT;
5615 break;
5616 }
5617 }
5618 if (status != RA_CONT)
5619 {
5620 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005621 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005622 regstack.ga_len -= sizeof(regstar_T);
5623 status = RA_NOMATCH;
5624 }
5625 }
5626 break;
5627 }
5628
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005629 /* If we want to continue the inner loop or didn't pop a state
5630 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005631 if (status == RA_CONT || rp == (regitem_T *)
5632 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5633 break;
5634 }
5635
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005636 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005637 if (status == RA_CONT)
5638 continue;
5639
5640 /*
5641 * If the regstack is empty or something failed we are done.
5642 */
5643 if (regstack.ga_len == 0 || status == RA_FAIL)
5644 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005645 if (scan == NULL)
5646 {
5647 /*
5648 * We get here only if there's trouble -- normally "case END" is
5649 * the terminating point.
5650 */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005651 emsg(_(e_re_corr));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005652#ifdef DEBUG
5653 printf("Premature EOL\n");
5654#endif
5655 }
5656 return (status == RA_MATCH);
5657 }
5658
5659 } /* End of loop until the regstack is empty. */
5660
5661 /* NOTREACHED */
5662}
5663
5664/*
5665 * Push an item onto the regstack.
5666 * Returns pointer to new item. Returns NULL when out of memory.
5667 */
5668 static regitem_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01005669regstack_push(regstate_T state, char_u *scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005670{
5671 regitem_T *rp;
5672
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005673 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005674 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005675 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005676 return NULL;
5677 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005678 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005679 return NULL;
5680
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005681 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005682 rp->rs_state = state;
5683 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005684
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005685 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005686 return rp;
5687}
5688
5689/*
5690 * Pop an item from the regstack.
5691 */
5692 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01005693regstack_pop(char_u **scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005694{
5695 regitem_T *rp;
5696
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005697 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005698 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005699
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005700 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005701}
5702
Bram Moolenaar071d4272004-06-13 20:20:40 +00005703/*
5704 * regrepeat - repeatedly match something simple, return how many.
Bram Moolenaar0270f382018-07-17 05:43:58 +02005705 * Advances rex.input (and rex.lnum) to just after the matched chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005706 */
5707 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01005708regrepeat(
5709 char_u *p,
5710 long maxcount) /* maximum number of matches allowed */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005711{
5712 long count = 0;
5713 char_u *scan;
5714 char_u *opnd;
5715 int mask;
5716 int testval = 0;
5717
Bram Moolenaar0270f382018-07-17 05:43:58 +02005718 scan = rex.input; /* Make local copy of rex.input for speed. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005719 opnd = OPERAND(p);
5720 switch (OP(p))
5721 {
5722 case ANY:
5723 case ANY + ADD_NL:
5724 while (count < maxcount)
5725 {
5726 /* Matching anything means we continue until end-of-line (or
5727 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5728 while (*scan != NUL && count < maxcount)
5729 {
5730 ++count;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005731 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005732 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02005733 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005734 || rex.reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005735 break;
5736 ++count; /* count the line-break */
5737 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005738 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005739 if (got_int)
5740 break;
5741 }
5742 break;
5743
5744 case IDENT:
5745 case IDENT + ADD_NL:
5746 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005747 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005748 case SIDENT:
5749 case SIDENT + ADD_NL:
5750 while (count < maxcount)
5751 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005752 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005753 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005754 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005755 }
5756 else if (*scan == NUL)
5757 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005758 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005759 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005760 break;
5761 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005762 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005763 if (got_int)
5764 break;
5765 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005766 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005767 ++scan;
5768 else
5769 break;
5770 ++count;
5771 }
5772 break;
5773
5774 case KWORD:
5775 case KWORD + ADD_NL:
5776 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005777 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005778 case SKWORD:
5779 case SKWORD + ADD_NL:
5780 while (count < maxcount)
5781 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005782 if (vim_iswordp_buf(scan, rex.reg_buf)
Bram Moolenaarf813a182013-01-30 13:59:37 +01005783 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005784 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005785 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005786 }
5787 else if (*scan == NUL)
5788 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005789 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005790 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005791 break;
5792 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005793 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005794 if (got_int)
5795 break;
5796 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005797 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005798 ++scan;
5799 else
5800 break;
5801 ++count;
5802 }
5803 break;
5804
5805 case FNAME:
5806 case FNAME + ADD_NL:
5807 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005808 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005809 case SFNAME:
5810 case SFNAME + ADD_NL:
5811 while (count < maxcount)
5812 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005813 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005814 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005815 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005816 }
5817 else if (*scan == NUL)
5818 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005819 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005820 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005821 break;
5822 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005823 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005824 if (got_int)
5825 break;
5826 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005827 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005828 ++scan;
5829 else
5830 break;
5831 ++count;
5832 }
5833 break;
5834
5835 case PRINT:
5836 case PRINT + ADD_NL:
5837 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005838 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005839 case SPRINT:
5840 case SPRINT + ADD_NL:
5841 while (count < maxcount)
5842 {
5843 if (*scan == NUL)
5844 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005845 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005846 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005847 break;
5848 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005849 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005850 if (got_int)
5851 break;
5852 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005853 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5854 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005855 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005856 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005857 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005858 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005859 ++scan;
5860 else
5861 break;
5862 ++count;
5863 }
5864 break;
5865
5866 case WHITE:
5867 case WHITE + ADD_NL:
5868 testval = mask = RI_WHITE;
5869do_class:
5870 while (count < maxcount)
5871 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005872 int l;
Bram Moolenaara12a1612019-01-24 16:39:02 +01005873
Bram Moolenaar071d4272004-06-13 20:20:40 +00005874 if (*scan == NUL)
5875 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005876 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005877 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005878 break;
5879 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005880 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005881 if (got_int)
5882 break;
5883 }
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005884 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005885 {
5886 if (testval != 0)
5887 break;
5888 scan += l;
5889 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005890 else if ((class_tab[*scan] & mask) == testval)
5891 ++scan;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005892 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005893 ++scan;
5894 else
5895 break;
5896 ++count;
5897 }
5898 break;
5899
5900 case NWHITE:
5901 case NWHITE + ADD_NL:
5902 mask = RI_WHITE;
5903 goto do_class;
5904 case DIGIT:
5905 case DIGIT + ADD_NL:
5906 testval = mask = RI_DIGIT;
5907 goto do_class;
5908 case NDIGIT:
5909 case NDIGIT + ADD_NL:
5910 mask = RI_DIGIT;
5911 goto do_class;
5912 case HEX:
5913 case HEX + ADD_NL:
5914 testval = mask = RI_HEX;
5915 goto do_class;
5916 case NHEX:
5917 case NHEX + ADD_NL:
5918 mask = RI_HEX;
5919 goto do_class;
5920 case OCTAL:
5921 case OCTAL + ADD_NL:
5922 testval = mask = RI_OCTAL;
5923 goto do_class;
5924 case NOCTAL:
5925 case NOCTAL + ADD_NL:
5926 mask = RI_OCTAL;
5927 goto do_class;
5928 case WORD:
5929 case WORD + ADD_NL:
5930 testval = mask = RI_WORD;
5931 goto do_class;
5932 case NWORD:
5933 case NWORD + ADD_NL:
5934 mask = RI_WORD;
5935 goto do_class;
5936 case HEAD:
5937 case HEAD + ADD_NL:
5938 testval = mask = RI_HEAD;
5939 goto do_class;
5940 case NHEAD:
5941 case NHEAD + ADD_NL:
5942 mask = RI_HEAD;
5943 goto do_class;
5944 case ALPHA:
5945 case ALPHA + ADD_NL:
5946 testval = mask = RI_ALPHA;
5947 goto do_class;
5948 case NALPHA:
5949 case NALPHA + ADD_NL:
5950 mask = RI_ALPHA;
5951 goto do_class;
5952 case LOWER:
5953 case LOWER + ADD_NL:
5954 testval = mask = RI_LOWER;
5955 goto do_class;
5956 case NLOWER:
5957 case NLOWER + ADD_NL:
5958 mask = RI_LOWER;
5959 goto do_class;
5960 case UPPER:
5961 case UPPER + ADD_NL:
5962 testval = mask = RI_UPPER;
5963 goto do_class;
5964 case NUPPER:
5965 case NUPPER + ADD_NL:
5966 mask = RI_UPPER;
5967 goto do_class;
5968
5969 case EXACTLY:
5970 {
5971 int cu, cl;
5972
5973 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005974 * would have been used for it. It does handle single-byte
5975 * characters, such as latin1. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02005976 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005977 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005978 cu = MB_TOUPPER(*opnd);
5979 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005980 while (count < maxcount && (*scan == cu || *scan == cl))
5981 {
5982 count++;
5983 scan++;
5984 }
5985 }
5986 else
5987 {
5988 cu = *opnd;
5989 while (count < maxcount && *scan == cu)
5990 {
5991 count++;
5992 scan++;
5993 }
5994 }
5995 break;
5996 }
5997
Bram Moolenaar071d4272004-06-13 20:20:40 +00005998 case MULTIBYTECODE:
5999 {
6000 int i, len, cf = 0;
6001
6002 /* Safety check (just in case 'encoding' was changed since
6003 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006004 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006005 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006006 if (rex.reg_ic && enc_utf8)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006007 cf = utf_fold(utf_ptr2char(opnd));
Bram Moolenaar069dd082015-05-04 09:56:49 +02006008 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006009 {
6010 for (i = 0; i < len; ++i)
6011 if (opnd[i] != scan[i])
6012 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006013 if (i < len && (!rex.reg_ic || !enc_utf8
Bram Moolenaar071d4272004-06-13 20:20:40 +00006014 || utf_fold(utf_ptr2char(scan)) != cf))
6015 break;
6016 scan += len;
6017 ++count;
6018 }
6019 }
6020 }
6021 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006022
6023 case ANYOF:
6024 case ANYOF + ADD_NL:
6025 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02006026 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006027
6028 case ANYBUT:
6029 case ANYBUT + ADD_NL:
6030 while (count < maxcount)
6031 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00006032 int len;
Bram Moolenaara12a1612019-01-24 16:39:02 +01006033
Bram Moolenaar071d4272004-06-13 20:20:40 +00006034 if (*scan == NUL)
6035 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006036 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02006037 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006038 break;
6039 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02006040 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006041 if (got_int)
6042 break;
6043 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02006044 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006045 ++scan;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006046 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006047 {
6048 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6049 break;
6050 scan += len;
6051 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006052 else
6053 {
6054 if ((cstrchr(opnd, *scan) == NULL) == testval)
6055 break;
6056 ++scan;
6057 }
6058 ++count;
6059 }
6060 break;
6061
6062 case NEWL:
6063 while (count < maxcount
Bram Moolenaar0270f382018-07-17 05:43:58 +02006064 && ((*scan == NUL && rex.lnum <= rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02006065 && !rex.reg_line_lbr && REG_MULTI)
6066 || (*scan == '\n' && rex.reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006067 {
6068 count++;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006069 if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006070 ADVANCE_REGINPUT();
6071 else
6072 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02006073 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006074 if (got_int)
6075 break;
6076 }
6077 break;
6078
6079 default: /* Oh dear. Called inappropriately. */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006080 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006081#ifdef DEBUG
6082 printf("Called regrepeat with op code %d\n", OP(p));
6083#endif
6084 break;
6085 }
6086
Bram Moolenaar0270f382018-07-17 05:43:58 +02006087 rex.input = scan;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006088
6089 return (int)count;
6090}
6091
6092/*
6093 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006094 * Returns NULL when calculating size, when there is no next item and when
6095 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006096 */
6097 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006098regnext(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006099{
6100 int offset;
6101
Bram Moolenaard3005802009-11-25 17:21:32 +00006102 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006103 return NULL;
6104
6105 offset = NEXT(p);
6106 if (offset == 0)
6107 return NULL;
6108
Bram Moolenaar582fd852005-03-28 20:58:01 +00006109 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006110 return p - offset;
6111 else
6112 return p + offset;
6113}
6114
6115/*
6116 * Check the regexp program for its magic number.
6117 * Return TRUE if it's wrong.
6118 */
6119 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006120prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006121{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006122 regprog_T *prog;
6123
Bram Moolenaar6100d022016-10-02 16:51:57 +02006124 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006125 if (prog->engine == &nfa_regengine)
6126 /* For NFA matcher we don't check the magic */
6127 return FALSE;
6128
6129 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006130 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006131 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006132 return TRUE;
6133 }
6134 return FALSE;
6135}
6136
6137/*
6138 * Cleanup the subexpressions, if this wasn't done yet.
6139 * This construction is used to clear the subexpressions only when they are
6140 * used (to increase speed).
6141 */
6142 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006143cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006144{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006145 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006146 {
6147 if (REG_MULTI)
6148 {
6149 /* Use 0xff to set lnum to -1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006150 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6151 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006152 }
6153 else
6154 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006155 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6156 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006157 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006158 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006159 }
6160}
6161
6162#ifdef FEAT_SYN_HL
6163 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006164cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006165{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006166 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006167 {
6168 if (REG_MULTI)
6169 {
6170 /* Use 0xff to set lnum to -1 */
6171 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6172 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6173 }
6174 else
6175 {
6176 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6177 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6178 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006179 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006180 }
6181}
6182#endif
6183
6184/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006185 * Save the current subexpr to "bp", so that they can be restored
6186 * later by restore_subexpr().
6187 */
6188 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006189save_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006190{
6191 int i;
6192
Bram Moolenaar0270f382018-07-17 05:43:58 +02006193 /* When "rex.need_clear_subexpr" is set we don't need to save the values, only
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006194 * remember that this flag needs to be set again when restoring. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006195 bp->save_need_clear_subexpr = rex.need_clear_subexpr;
6196 if (!rex.need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006197 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006198 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006199 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006200 if (REG_MULTI)
6201 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006202 bp->save_start[i].se_u.pos = rex.reg_startpos[i];
6203 bp->save_end[i].se_u.pos = rex.reg_endpos[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006204 }
6205 else
6206 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006207 bp->save_start[i].se_u.ptr = rex.reg_startp[i];
6208 bp->save_end[i].se_u.ptr = rex.reg_endp[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006209 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006210 }
6211 }
6212}
6213
6214/*
6215 * Restore the subexpr from "bp".
6216 */
6217 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006218restore_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006219{
6220 int i;
6221
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006222 /* Only need to restore saved values when they are not to be cleared. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006223 rex.need_clear_subexpr = bp->save_need_clear_subexpr;
6224 if (!rex.need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006225 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006226 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006227 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006228 if (REG_MULTI)
6229 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006230 rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
6231 rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006232 }
6233 else
6234 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006235 rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
6236 rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006237 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006238 }
6239 }
6240}
6241
6242/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02006243 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006244 */
6245 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006246reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006247{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006248 rex.line = reg_getline(++rex.lnum);
6249 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006250 fast_breakcheck();
6251}
6252
6253/*
6254 * Save the input line and position in a regsave_T.
6255 */
6256 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006257reg_save(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006258{
6259 if (REG_MULTI)
6260 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006261 save->rs_u.pos.col = (colnr_T)(rex.input - rex.line);
6262 save->rs_u.pos.lnum = rex.lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006263 }
6264 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02006265 save->rs_u.ptr = rex.input;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006266 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006267}
6268
6269/*
6270 * Restore the input line and position from a regsave_T.
6271 */
6272 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006273reg_restore(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006274{
6275 if (REG_MULTI)
6276 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006277 if (rex.lnum != save->rs_u.pos.lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006278 {
6279 /* only call reg_getline() when the line number changed to save
6280 * a bit of time */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006281 rex.lnum = save->rs_u.pos.lnum;
6282 rex.line = reg_getline(rex.lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006283 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006284 rex.input = rex.line + save->rs_u.pos.col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006285 }
6286 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02006287 rex.input = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006288 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006289}
6290
6291/*
6292 * Return TRUE if current position is equal to saved position.
6293 */
6294 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006295reg_save_equal(regsave_T *save)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006296{
6297 if (REG_MULTI)
Bram Moolenaar0270f382018-07-17 05:43:58 +02006298 return rex.lnum == save->rs_u.pos.lnum
6299 && rex.input == rex.line + save->rs_u.pos.col;
6300 return rex.input == save->rs_u.ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006301}
6302
6303/*
6304 * Tentatively set the sub-expression start to the current position (after
6305 * calling regmatch() they will have changed). Need to save the existing
6306 * values for when there is no match.
6307 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6308 * depending on REG_MULTI.
6309 */
6310 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006311save_se_multi(save_se_T *savep, lpos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006312{
6313 savep->se_u.pos = *posp;
Bram Moolenaar0270f382018-07-17 05:43:58 +02006314 posp->lnum = rex.lnum;
6315 posp->col = (colnr_T)(rex.input - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006316}
6317
6318 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006319save_se_one(save_se_T *savep, char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006320{
6321 savep->se_u.ptr = *pp;
Bram Moolenaar0270f382018-07-17 05:43:58 +02006322 *pp = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006323}
6324
6325/*
6326 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6327 */
6328 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006329re_num_cmp(long_u val, char_u *scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006330{
6331 long_u n = OPERAND_MIN(scan);
6332
6333 if (OPERAND_CMP(scan) == '>')
6334 return val > n;
6335 if (OPERAND_CMP(scan) == '<')
6336 return val < n;
6337 return val == n;
6338}
6339
Bram Moolenaar580abea2013-06-14 20:31:28 +02006340/*
6341 * Check whether a backreference matches.
6342 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006343 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6344 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006345 */
6346 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006347match_with_backref(
6348 linenr_T start_lnum,
6349 colnr_T start_col,
6350 linenr_T end_lnum,
6351 colnr_T end_col,
6352 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006353{
6354 linenr_T clnum = start_lnum;
6355 colnr_T ccol = start_col;
6356 int len;
6357 char_u *p;
6358
6359 if (bytelen != NULL)
6360 *bytelen = 0;
6361 for (;;)
6362 {
6363 /* Since getting one line may invalidate the other, need to make copy.
6364 * Slow! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006365 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006366 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006367 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02006368 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6369 {
6370 len += 50; /* get some extra */
6371 vim_free(reg_tofree);
6372 reg_tofree = alloc(len);
6373 if (reg_tofree == NULL)
6374 return RA_FAIL; /* out of memory!*/
6375 reg_tofreelen = len;
6376 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006377 STRCPY(reg_tofree, rex.line);
6378 rex.input = reg_tofree + (rex.input - rex.line);
6379 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006380 }
6381
6382 /* Get the line to compare with. */
6383 p = reg_getline(clnum);
6384 if (clnum == end_lnum)
6385 len = end_col - ccol;
6386 else
6387 len = (int)STRLEN(p + ccol);
6388
Bram Moolenaar0270f382018-07-17 05:43:58 +02006389 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006390 return RA_NOMATCH; /* doesn't match */
6391 if (bytelen != NULL)
6392 *bytelen += len;
6393 if (clnum == end_lnum)
6394 break; /* match and at end! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006395 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006396 return RA_NOMATCH; /* text too short */
6397
6398 /* Advance to next line. */
6399 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006400 if (bytelen != NULL)
6401 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006402 ++clnum;
6403 ccol = 0;
6404 if (got_int)
6405 return RA_FAIL;
6406 }
6407
Bram Moolenaar0270f382018-07-17 05:43:58 +02006408 /* found a match! Note that rex.line may now point to a copy of the line,
Bram Moolenaar580abea2013-06-14 20:31:28 +02006409 * that should not matter. */
6410 return RA_MATCH;
6411}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006412
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006413#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006414
6415/*
6416 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6417 */
6418 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006419regdump(char_u *pattern, bt_regprog_T *r)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006420{
6421 char_u *s;
6422 int op = EXACTLY; /* Arbitrary non-END op. */
6423 char_u *next;
6424 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006425 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006426
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006427#ifdef BT_REGEXP_LOG
6428 f = fopen("bt_regexp_log.log", "a");
6429#else
6430 f = stdout;
6431#endif
6432 if (f == NULL)
6433 return;
6434 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006435
6436 s = r->program + 1;
6437 /*
6438 * Loop until we find the END that isn't before a referred next (an END
6439 * can also appear in a NOMATCH operand).
6440 */
6441 while (op != END || s <= end)
6442 {
6443 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006444 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006445 next = regnext(s);
6446 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006447 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006448 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006449 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006450 if (end < next)
6451 end = next;
6452 if (op == BRACE_LIMITS)
6453 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006454 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006455 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006456 s += 8;
6457 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006458 else if (op == BEHIND || op == NOBEHIND)
6459 {
6460 /* one int */
6461 fprintf(f, " count %ld", OPERAND_MIN(s));
6462 s += 4;
6463 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006464 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6465 {
Bram Moolenaarbdace832019-03-02 10:13:42 +01006466 /* one int plus comparator */
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006467 fprintf(f, " count %ld", OPERAND_MIN(s));
6468 s += 5;
6469 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006470 s += 3;
6471 if (op == ANYOF || op == ANYOF + ADD_NL
6472 || op == ANYBUT || op == ANYBUT + ADD_NL
6473 || op == EXACTLY)
6474 {
6475 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006476 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006477 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006478 fprintf(f, "%c", *s++);
6479 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006480 s++;
6481 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006482 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006483 }
6484
6485 /* Header fields of interest. */
6486 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006487 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006488 ? (char *)transchar(r->regstart)
6489 : "multibyte", r->regstart);
6490 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006491 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006492 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006493 fprintf(f, "must have \"%s\"", r->regmust);
6494 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006495
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006496#ifdef BT_REGEXP_LOG
6497 fclose(f);
6498#endif
6499}
6500#endif /* BT_REGEXP_DUMP */
6501
6502#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006503/*
6504 * regprop - printable representation of opcode
6505 */
6506 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006507regprop(char_u *op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006508{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006509 char *p;
6510 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006511
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006512 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006513
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006514 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006515 {
6516 case BOL:
6517 p = "BOL";
6518 break;
6519 case EOL:
6520 p = "EOL";
6521 break;
6522 case RE_BOF:
6523 p = "BOF";
6524 break;
6525 case RE_EOF:
6526 p = "EOF";
6527 break;
6528 case CURSOR:
6529 p = "CURSOR";
6530 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006531 case RE_VISUAL:
6532 p = "RE_VISUAL";
6533 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006534 case RE_LNUM:
6535 p = "RE_LNUM";
6536 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006537 case RE_MARK:
6538 p = "RE_MARK";
6539 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006540 case RE_COL:
6541 p = "RE_COL";
6542 break;
6543 case RE_VCOL:
6544 p = "RE_VCOL";
6545 break;
6546 case BOW:
6547 p = "BOW";
6548 break;
6549 case EOW:
6550 p = "EOW";
6551 break;
6552 case ANY:
6553 p = "ANY";
6554 break;
6555 case ANY + ADD_NL:
6556 p = "ANY+NL";
6557 break;
6558 case ANYOF:
6559 p = "ANYOF";
6560 break;
6561 case ANYOF + ADD_NL:
6562 p = "ANYOF+NL";
6563 break;
6564 case ANYBUT:
6565 p = "ANYBUT";
6566 break;
6567 case ANYBUT + ADD_NL:
6568 p = "ANYBUT+NL";
6569 break;
6570 case IDENT:
6571 p = "IDENT";
6572 break;
6573 case IDENT + ADD_NL:
6574 p = "IDENT+NL";
6575 break;
6576 case SIDENT:
6577 p = "SIDENT";
6578 break;
6579 case SIDENT + ADD_NL:
6580 p = "SIDENT+NL";
6581 break;
6582 case KWORD:
6583 p = "KWORD";
6584 break;
6585 case KWORD + ADD_NL:
6586 p = "KWORD+NL";
6587 break;
6588 case SKWORD:
6589 p = "SKWORD";
6590 break;
6591 case SKWORD + ADD_NL:
6592 p = "SKWORD+NL";
6593 break;
6594 case FNAME:
6595 p = "FNAME";
6596 break;
6597 case FNAME + ADD_NL:
6598 p = "FNAME+NL";
6599 break;
6600 case SFNAME:
6601 p = "SFNAME";
6602 break;
6603 case SFNAME + ADD_NL:
6604 p = "SFNAME+NL";
6605 break;
6606 case PRINT:
6607 p = "PRINT";
6608 break;
6609 case PRINT + ADD_NL:
6610 p = "PRINT+NL";
6611 break;
6612 case SPRINT:
6613 p = "SPRINT";
6614 break;
6615 case SPRINT + ADD_NL:
6616 p = "SPRINT+NL";
6617 break;
6618 case WHITE:
6619 p = "WHITE";
6620 break;
6621 case WHITE + ADD_NL:
6622 p = "WHITE+NL";
6623 break;
6624 case NWHITE:
6625 p = "NWHITE";
6626 break;
6627 case NWHITE + ADD_NL:
6628 p = "NWHITE+NL";
6629 break;
6630 case DIGIT:
6631 p = "DIGIT";
6632 break;
6633 case DIGIT + ADD_NL:
6634 p = "DIGIT+NL";
6635 break;
6636 case NDIGIT:
6637 p = "NDIGIT";
6638 break;
6639 case NDIGIT + ADD_NL:
6640 p = "NDIGIT+NL";
6641 break;
6642 case HEX:
6643 p = "HEX";
6644 break;
6645 case HEX + ADD_NL:
6646 p = "HEX+NL";
6647 break;
6648 case NHEX:
6649 p = "NHEX";
6650 break;
6651 case NHEX + ADD_NL:
6652 p = "NHEX+NL";
6653 break;
6654 case OCTAL:
6655 p = "OCTAL";
6656 break;
6657 case OCTAL + ADD_NL:
6658 p = "OCTAL+NL";
6659 break;
6660 case NOCTAL:
6661 p = "NOCTAL";
6662 break;
6663 case NOCTAL + ADD_NL:
6664 p = "NOCTAL+NL";
6665 break;
6666 case WORD:
6667 p = "WORD";
6668 break;
6669 case WORD + ADD_NL:
6670 p = "WORD+NL";
6671 break;
6672 case NWORD:
6673 p = "NWORD";
6674 break;
6675 case NWORD + ADD_NL:
6676 p = "NWORD+NL";
6677 break;
6678 case HEAD:
6679 p = "HEAD";
6680 break;
6681 case HEAD + ADD_NL:
6682 p = "HEAD+NL";
6683 break;
6684 case NHEAD:
6685 p = "NHEAD";
6686 break;
6687 case NHEAD + ADD_NL:
6688 p = "NHEAD+NL";
6689 break;
6690 case ALPHA:
6691 p = "ALPHA";
6692 break;
6693 case ALPHA + ADD_NL:
6694 p = "ALPHA+NL";
6695 break;
6696 case NALPHA:
6697 p = "NALPHA";
6698 break;
6699 case NALPHA + ADD_NL:
6700 p = "NALPHA+NL";
6701 break;
6702 case LOWER:
6703 p = "LOWER";
6704 break;
6705 case LOWER + ADD_NL:
6706 p = "LOWER+NL";
6707 break;
6708 case NLOWER:
6709 p = "NLOWER";
6710 break;
6711 case NLOWER + ADD_NL:
6712 p = "NLOWER+NL";
6713 break;
6714 case UPPER:
6715 p = "UPPER";
6716 break;
6717 case UPPER + ADD_NL:
6718 p = "UPPER+NL";
6719 break;
6720 case NUPPER:
6721 p = "NUPPER";
6722 break;
6723 case NUPPER + ADD_NL:
6724 p = "NUPPER+NL";
6725 break;
6726 case BRANCH:
6727 p = "BRANCH";
6728 break;
6729 case EXACTLY:
6730 p = "EXACTLY";
6731 break;
6732 case NOTHING:
6733 p = "NOTHING";
6734 break;
6735 case BACK:
6736 p = "BACK";
6737 break;
6738 case END:
6739 p = "END";
6740 break;
6741 case MOPEN + 0:
6742 p = "MATCH START";
6743 break;
6744 case MOPEN + 1:
6745 case MOPEN + 2:
6746 case MOPEN + 3:
6747 case MOPEN + 4:
6748 case MOPEN + 5:
6749 case MOPEN + 6:
6750 case MOPEN + 7:
6751 case MOPEN + 8:
6752 case MOPEN + 9:
6753 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6754 p = NULL;
6755 break;
6756 case MCLOSE + 0:
6757 p = "MATCH END";
6758 break;
6759 case MCLOSE + 1:
6760 case MCLOSE + 2:
6761 case MCLOSE + 3:
6762 case MCLOSE + 4:
6763 case MCLOSE + 5:
6764 case MCLOSE + 6:
6765 case MCLOSE + 7:
6766 case MCLOSE + 8:
6767 case MCLOSE + 9:
6768 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6769 p = NULL;
6770 break;
6771 case BACKREF + 1:
6772 case BACKREF + 2:
6773 case BACKREF + 3:
6774 case BACKREF + 4:
6775 case BACKREF + 5:
6776 case BACKREF + 6:
6777 case BACKREF + 7:
6778 case BACKREF + 8:
6779 case BACKREF + 9:
6780 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6781 p = NULL;
6782 break;
6783 case NOPEN:
6784 p = "NOPEN";
6785 break;
6786 case NCLOSE:
6787 p = "NCLOSE";
6788 break;
6789#ifdef FEAT_SYN_HL
6790 case ZOPEN + 1:
6791 case ZOPEN + 2:
6792 case ZOPEN + 3:
6793 case ZOPEN + 4:
6794 case ZOPEN + 5:
6795 case ZOPEN + 6:
6796 case ZOPEN + 7:
6797 case ZOPEN + 8:
6798 case ZOPEN + 9:
6799 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6800 p = NULL;
6801 break;
6802 case ZCLOSE + 1:
6803 case ZCLOSE + 2:
6804 case ZCLOSE + 3:
6805 case ZCLOSE + 4:
6806 case ZCLOSE + 5:
6807 case ZCLOSE + 6:
6808 case ZCLOSE + 7:
6809 case ZCLOSE + 8:
6810 case ZCLOSE + 9:
6811 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6812 p = NULL;
6813 break;
6814 case ZREF + 1:
6815 case ZREF + 2:
6816 case ZREF + 3:
6817 case ZREF + 4:
6818 case ZREF + 5:
6819 case ZREF + 6:
6820 case ZREF + 7:
6821 case ZREF + 8:
6822 case ZREF + 9:
6823 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6824 p = NULL;
6825 break;
6826#endif
6827 case STAR:
6828 p = "STAR";
6829 break;
6830 case PLUS:
6831 p = "PLUS";
6832 break;
6833 case NOMATCH:
6834 p = "NOMATCH";
6835 break;
6836 case MATCH:
6837 p = "MATCH";
6838 break;
6839 case BEHIND:
6840 p = "BEHIND";
6841 break;
6842 case NOBEHIND:
6843 p = "NOBEHIND";
6844 break;
6845 case SUBPAT:
6846 p = "SUBPAT";
6847 break;
6848 case BRACE_LIMITS:
6849 p = "BRACE_LIMITS";
6850 break;
6851 case BRACE_SIMPLE:
6852 p = "BRACE_SIMPLE";
6853 break;
6854 case BRACE_COMPLEX + 0:
6855 case BRACE_COMPLEX + 1:
6856 case BRACE_COMPLEX + 2:
6857 case BRACE_COMPLEX + 3:
6858 case BRACE_COMPLEX + 4:
6859 case BRACE_COMPLEX + 5:
6860 case BRACE_COMPLEX + 6:
6861 case BRACE_COMPLEX + 7:
6862 case BRACE_COMPLEX + 8:
6863 case BRACE_COMPLEX + 9:
6864 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6865 p = NULL;
6866 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006867 case MULTIBYTECODE:
6868 p = "MULTIBYTECODE";
6869 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006870 case NEWL:
6871 p = "NEWL";
6872 break;
6873 default:
6874 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6875 p = NULL;
6876 break;
6877 }
6878 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006879 STRCAT(buf, p);
6880 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006881}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006882#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006883
Bram Moolenaarfb031402014-09-09 17:18:49 +02006884/*
6885 * Used in a place where no * or \+ can follow.
6886 */
6887 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006888re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02006889{
6890 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01006891 {
6892 semsg(_("E888: (NFA regexp) cannot repeat %s"), what);
6893 rc_did_emsg = TRUE;
6894 return FAIL;
6895 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02006896 return OK;
6897}
6898
Bram Moolenaar071d4272004-06-13 20:20:40 +00006899typedef struct
6900{
6901 int a, b, c;
6902} decomp_T;
6903
6904
6905/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006906static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006907{
6908 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6909 {0x5d0,0,0}, /* 0xfb21 alt alef */
6910 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6911 {0x5d4,0,0}, /* 0xfb23 alt he */
6912 {0x5db,0,0}, /* 0xfb24 alt kaf */
6913 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6914 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6915 {0x5e8,0,0}, /* 0xfb27 alt resh */
6916 {0x5ea,0,0}, /* 0xfb28 alt tav */
6917 {'+', 0, 0}, /* 0xfb29 alt plus */
6918 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6919 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6920 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6921 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6922 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6923 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6924 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6925 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6926 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6927 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6928 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6929 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6930 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
6931 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
6932 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
6933 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
6934 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
6935 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
6936 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
6937 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
6938 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
6939 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
6940 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
6941 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
6942 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
6943 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
6944 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
6945 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
6946 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
6947 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
6948 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
6949 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
6950 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
6951 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
6952 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
6953 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
6954 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
6955 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
6956};
6957
6958 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006959mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006960{
6961 decomp_T d;
6962
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02006963 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006964 {
6965 d = decomp_table[c - 0xfb20];
6966 *c1 = d.a;
6967 *c2 = d.b;
6968 *c3 = d.c;
6969 }
6970 else
6971 {
6972 *c1 = c;
6973 *c2 = *c3 = 0;
6974 }
6975}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006976
6977/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02006978 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006979 * Return 0 if strings match, non-zero otherwise.
6980 * Correct the length "*n" when composing characters are ignored.
6981 */
6982 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006983cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006984{
6985 int result;
6986
Bram Moolenaar6100d022016-10-02 16:51:57 +02006987 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006988 result = STRNCMP(s1, s2, *n);
6989 else
6990 result = MB_STRNICMP(s1, s2, *n);
6991
Bram Moolenaar071d4272004-06-13 20:20:40 +00006992 /* if it failed and it's utf8 and we want to combineignore: */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006993 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006994 {
6995 char_u *str1, *str2;
6996 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006997 int junk;
6998
6999 /* we have to handle the strcmp ourselves, since it is necessary to
7000 * deal with the composing characters by ignoring them: */
7001 str1 = s1;
7002 str2 = s2;
7003 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007004 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007005 {
7006 c1 = mb_ptr2char_adv(&str1);
7007 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007008
7009 /* decompose the character if necessary, into 'base' characters
7010 * because I don't care about Arabic, I will hard-code the Hebrew
7011 * which I *do* care about! So sue me... */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007012 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007013 {
7014 /* decomposition necessary? */
7015 mb_decompose(c1, &c11, &junk, &junk);
7016 mb_decompose(c2, &c12, &junk, &junk);
7017 c1 = c11;
7018 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007019 if (c11 != c12
7020 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007021 break;
7022 }
7023 }
7024 result = c2 - c1;
7025 if (result == 0)
7026 *n = (int)(str2 - s2);
7027 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007028
7029 return result;
7030}
7031
7032/*
7033 * cstrchr: This function is used a lot for simple searches, keep it fast!
7034 */
7035 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007036cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007037{
7038 char_u *p;
7039 int cc;
7040
Bram Moolenaara12a1612019-01-24 16:39:02 +01007041 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007042 return vim_strchr(s, c);
7043
7044 /* tolower() and toupper() can be slow, comparing twice should be a lot
7045 * faster (esp. when using MS Visual C++!).
7046 * For UTF-8 need to use folded case. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007047 if (enc_utf8 && c > 0x80)
7048 cc = utf_fold(c);
7049 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007050 if (MB_ISUPPER(c))
7051 cc = MB_TOLOWER(c);
7052 else if (MB_ISLOWER(c))
7053 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007054 else
7055 return vim_strchr(s, c);
7056
Bram Moolenaar071d4272004-06-13 20:20:40 +00007057 if (has_mbyte)
7058 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007059 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007060 {
7061 if (enc_utf8 && c > 0x80)
7062 {
7063 if (utf_fold(utf_ptr2char(p)) == cc)
7064 return p;
7065 }
7066 else if (*p == c || *p == cc)
7067 return p;
7068 }
7069 }
7070 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007071 /* Faster version for when there are no multi-byte characters. */
7072 for (p = s; *p != NUL; ++p)
7073 if (*p == c || *p == cc)
7074 return p;
7075
7076 return NULL;
7077}
7078
7079/***************************************************************
7080 * regsub stuff *
7081 ***************************************************************/
7082
Bram Moolenaar071d4272004-06-13 20:20:40 +00007083/*
7084 * We should define ftpr as a pointer to a function returning a pointer to
7085 * a function returning a pointer to a function ...
7086 * This is impossible, so we declare a pointer to a function returning a
7087 * pointer to a function returning void. This should work for all compilers.
7088 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007089typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007090
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007091static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007092
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007093 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007094do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007095{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007096 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007097
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007098 return (fptr_T)NULL;
7099}
7100
7101 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007102do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007103{
7104 *d = MB_TOUPPER(c);
7105
7106 return (fptr_T)do_Upper;
7107}
7108
7109 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007110do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007111{
7112 *d = MB_TOLOWER(c);
7113
7114 return (fptr_T)NULL;
7115}
7116
7117 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007118do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007119{
7120 *d = MB_TOLOWER(c);
7121
7122 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007123}
7124
7125/*
7126 * regtilde(): Replace tildes in the pattern by the old pattern.
7127 *
7128 * Short explanation of the tilde: It stands for the previous replacement
7129 * pattern. If that previous pattern also contains a ~ we should go back a
7130 * step further... But we insert the previous pattern into the current one
7131 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007132 * This still does not handle the case where "magic" changes. So require the
7133 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007134 *
7135 * The tildes are parsed once before the first call to vim_regsub().
7136 */
7137 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007138regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007139{
7140 char_u *newsub = source;
7141 char_u *tmpsub;
7142 char_u *p;
7143 int len;
7144 int prevlen;
7145
7146 for (p = newsub; *p; ++p)
7147 {
7148 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7149 {
7150 if (reg_prev_sub != NULL)
7151 {
7152 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7153 prevlen = (int)STRLEN(reg_prev_sub);
Bram Moolenaar964b3742019-05-24 18:54:09 +02007154 tmpsub = alloc(STRLEN(newsub) + prevlen);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007155 if (tmpsub != NULL)
7156 {
7157 /* copy prefix */
7158 len = (int)(p - newsub); /* not including ~ */
7159 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007160 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007161 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7162 /* copy postfix */
7163 if (!magic)
7164 ++p; /* back off \ */
7165 STRCPY(tmpsub + len + prevlen, p + 1);
7166
7167 if (newsub != source) /* already allocated newsub */
7168 vim_free(newsub);
7169 newsub = tmpsub;
7170 p = newsub + len + prevlen;
7171 }
7172 }
7173 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007174 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007175 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007176 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007177 --p;
7178 }
7179 else
7180 {
7181 if (*p == '\\' && p[1]) /* skip escaped characters */
7182 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007183 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007184 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007185 }
7186 }
7187
7188 vim_free(reg_prev_sub);
7189 if (newsub != source) /* newsub was allocated, just keep it */
7190 reg_prev_sub = newsub;
7191 else /* no ~ found, need to save newsub */
7192 reg_prev_sub = vim_strsave(newsub);
7193 return newsub;
7194}
7195
7196#ifdef FEAT_EVAL
7197static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7198
Bram Moolenaar6100d022016-10-02 16:51:57 +02007199/* These pointers are used for reg_submatch(). Needed for when the
7200 * substitution string is an expression that contains a call to substitute()
7201 * and submatch(). */
7202typedef struct {
7203 regmatch_T *sm_match;
7204 regmmatch_T *sm_mmatch;
7205 linenr_T sm_firstlnum;
7206 linenr_T sm_maxline;
7207 int sm_line_lbr;
7208} regsubmatch_T;
7209
7210static regsubmatch_T rsm; /* can only be used when can_f_submatch is TRUE */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007211#endif
7212
Bram Moolenaarb005cd82019-09-04 15:54:55 +02007213#ifdef FEAT_EVAL
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007214
7215/*
7216 * Put the submatches in "argv[0]" which is a list passed into call_func() by
7217 * vim_regsub_both().
7218 */
7219 static int
7220fill_submatch_list(int argc UNUSED, typval_T *argv, int argcount)
7221{
7222 listitem_T *li;
7223 int i;
7224 char_u *s;
7225
7226 if (argcount == 0)
7227 /* called function doesn't take an argument */
7228 return 0;
7229
7230 /* Relies on sl_list to be the first item in staticList10_T. */
7231 init_static_list((staticList10_T *)(argv->vval.v_list));
7232
7233 /* There are always 10 list items in staticList10_T. */
7234 li = argv->vval.v_list->lv_first;
7235 for (i = 0; i < 10; ++i)
7236 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007237 s = rsm.sm_match->startp[i];
7238 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007239 s = NULL;
7240 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007241 s = vim_strnsave(s, (int)(rsm.sm_match->endp[i] - s));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007242 li->li_tv.v_type = VAR_STRING;
7243 li->li_tv.vval.v_string = s;
7244 li = li->li_next;
7245 }
7246 return 1;
7247}
7248
7249 static void
7250clear_submatch_list(staticList10_T *sl)
7251{
7252 int i;
7253
7254 for (i = 0; i < 10; ++i)
7255 vim_free(sl->sl_items[i].li_tv.vval.v_string);
7256}
Bram Moolenaarb005cd82019-09-04 15:54:55 +02007257#endif
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007258
Bram Moolenaar071d4272004-06-13 20:20:40 +00007259/*
7260 * vim_regsub() - perform substitutions after a vim_regexec() or
7261 * vim_regexec_multi() match.
7262 *
7263 * If "copy" is TRUE really copy into "dest".
7264 * If "copy" is FALSE nothing is copied, this is just to find out the length
7265 * of the result.
7266 *
7267 * If "backslash" is TRUE, a backslash will be removed later, need to double
7268 * them to keep them, and insert a backslash before a CR to avoid it being
7269 * replaced with a line break later.
7270 *
7271 * Note: The matched text must not change between the call of
7272 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7273 * references invalid!
7274 *
7275 * Returns the size of the replacement, including terminating NUL.
7276 */
7277 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007278vim_regsub(
7279 regmatch_T *rmp,
7280 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007281 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007282 char_u *dest,
7283 int copy,
7284 int magic,
7285 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007286{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007287 int result;
7288 regexec_T rex_save;
7289 int rex_in_use_save = rex_in_use;
7290
7291 if (rex_in_use)
7292 /* Being called recursively, save the state. */
7293 rex_save = rex;
7294 rex_in_use = TRUE;
7295
7296 rex.reg_match = rmp;
7297 rex.reg_mmatch = NULL;
7298 rex.reg_maxline = 0;
7299 rex.reg_buf = curbuf;
7300 rex.reg_line_lbr = TRUE;
7301 result = vim_regsub_both(source, expr, dest, copy, magic, backslash);
7302
7303 rex_in_use = rex_in_use_save;
7304 if (rex_in_use)
7305 rex = rex_save;
7306
7307 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007308}
Bram Moolenaar071d4272004-06-13 20:20:40 +00007309
7310 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007311vim_regsub_multi(
7312 regmmatch_T *rmp,
7313 linenr_T lnum,
7314 char_u *source,
7315 char_u *dest,
7316 int copy,
7317 int magic,
7318 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007319{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007320 int result;
7321 regexec_T rex_save;
7322 int rex_in_use_save = rex_in_use;
7323
7324 if (rex_in_use)
7325 /* Being called recursively, save the state. */
7326 rex_save = rex;
7327 rex_in_use = TRUE;
7328
7329 rex.reg_match = NULL;
7330 rex.reg_mmatch = rmp;
7331 rex.reg_buf = curbuf; /* always works on the current buffer! */
7332 rex.reg_firstlnum = lnum;
7333 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7334 rex.reg_line_lbr = FALSE;
7335 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash);
7336
7337 rex_in_use = rex_in_use_save;
7338 if (rex_in_use)
7339 rex = rex_save;
7340
7341 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007342}
7343
7344 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007345vim_regsub_both(
7346 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007347 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007348 char_u *dest,
7349 int copy,
7350 int magic,
7351 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007352{
7353 char_u *src;
7354 char_u *dst;
7355 char_u *s;
7356 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007357 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007358 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007359 fptr_T func_all = (fptr_T)NULL;
7360 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007361 linenr_T clnum = 0; /* init for GCC */
7362 int len = 0; /* init for GCC */
7363#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007364 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007365#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007366
7367 /* Be paranoid... */
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007368 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007369 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007370 emsg(_(e_null));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007371 return 0;
7372 }
7373 if (prog_magic_wrong())
7374 return 0;
7375 src = source;
7376 dst = dest;
7377
7378 /*
7379 * When the substitute part starts with "\=" evaluate it as an expression.
7380 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007381 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007382 {
7383#ifdef FEAT_EVAL
7384 /* To make sure that the length doesn't change between checking the
7385 * length and copying the string, and to speed up things, the
7386 * resulting string is saved from the call with "copy" == FALSE to the
7387 * call with "copy" == TRUE. */
7388 if (copy)
7389 {
7390 if (eval_result != NULL)
7391 {
7392 STRCPY(dest, eval_result);
7393 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01007394 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007395 }
7396 }
7397 else
7398 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007399 int prev_can_f_submatch = can_f_submatch;
7400 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007401
7402 vim_free(eval_result);
7403
7404 /* The expression may contain substitute(), which calls us
7405 * recursively. Make sure submatch() gets the text from the first
Bram Moolenaar6100d022016-10-02 16:51:57 +02007406 * level. */
7407 if (can_f_submatch)
7408 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007409 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007410 rsm.sm_match = rex.reg_match;
7411 rsm.sm_mmatch = rex.reg_mmatch;
7412 rsm.sm_firstlnum = rex.reg_firstlnum;
7413 rsm.sm_maxline = rex.reg_maxline;
7414 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007415
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007416 if (expr != NULL)
7417 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007418 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007419 char_u buf[NUMBUFLEN];
7420 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007421 staticList10_T matchList;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02007422 funcexe_T funcexe;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007423
7424 rettv.v_type = VAR_STRING;
7425 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007426 argv[0].v_type = VAR_LIST;
7427 argv[0].vval.v_list = &matchList.sl_list;
7428 matchList.sl_list.lv_len = 0;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02007429 vim_memset(&funcexe, 0, sizeof(funcexe));
7430 funcexe.argv_func = fill_submatch_list;
7431 funcexe.evaluate = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007432 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007433 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007434 s = expr->vval.v_string;
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02007435 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007436 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007437 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007438 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007439 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007440
Bram Moolenaar6100d022016-10-02 16:51:57 +02007441 s = partial_name(partial);
Bram Moolenaarc6538bc2019-08-03 18:17:11 +02007442 funcexe.partial = partial;
7443 call_func(s, -1, &rettv, 1, argv, &funcexe);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007444 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007445 if (matchList.sl_list.lv_len > 0)
7446 /* fill_submatch_list() was called */
7447 clear_submatch_list(&matchList);
7448
Bram Moolenaard155d7a2018-12-21 16:04:21 +01007449 eval_result = tv_get_string_buf_chk(&rettv, buf);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007450 if (eval_result != NULL)
7451 eval_result = vim_strsave(eval_result);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007452 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007453 }
7454 else
7455 eval_result = eval_to_string(source + 2, NULL, TRUE);
7456
Bram Moolenaar071d4272004-06-13 20:20:40 +00007457 if (eval_result != NULL)
7458 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007459 int had_backslash = FALSE;
7460
Bram Moolenaar91acfff2017-03-12 19:22:36 +01007461 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007462 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007463 /* Change NL to CR, so that it becomes a line break,
7464 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007465 * Skip over a backslashed character. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007466 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007467 *s = CAR;
7468 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007469 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007470 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007471 /* Change NL to CR here too, so that this works:
7472 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7473 * abc\
7474 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007475 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007476 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007477 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007478 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007479 had_backslash = TRUE;
7480 }
7481 }
7482 if (had_backslash && backslash)
7483 {
7484 /* Backslashes will be consumed, need to double them. */
7485 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7486 if (s != NULL)
7487 {
7488 vim_free(eval_result);
7489 eval_result = s;
7490 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007491 }
7492
7493 dst += STRLEN(eval_result);
7494 }
7495
Bram Moolenaar6100d022016-10-02 16:51:57 +02007496 can_f_submatch = prev_can_f_submatch;
7497 if (can_f_submatch)
7498 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007499 }
7500#endif
7501 }
7502 else
7503 while ((c = *src++) != NUL)
7504 {
7505 if (c == '&' && magic)
7506 no = 0;
7507 else if (c == '\\' && *src != NUL)
7508 {
7509 if (*src == '&' && !magic)
7510 {
7511 ++src;
7512 no = 0;
7513 }
7514 else if ('0' <= *src && *src <= '9')
7515 {
7516 no = *src++ - '0';
7517 }
7518 else if (vim_strchr((char_u *)"uUlLeE", *src))
7519 {
7520 switch (*src++)
7521 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007522 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007523 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007524 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007525 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007526 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007527 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007528 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007529 continue;
7530 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007531 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007532 continue;
7533 }
7534 }
7535 }
7536 if (no < 0) /* Ordinary character. */
7537 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007538 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7539 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007540 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007541 if (copy)
7542 {
7543 *dst++ = c;
7544 *dst++ = *src++;
7545 *dst++ = *src++;
7546 }
7547 else
7548 {
7549 dst += 3;
7550 src += 2;
7551 }
7552 continue;
7553 }
7554
Bram Moolenaar071d4272004-06-13 20:20:40 +00007555 if (c == '\\' && *src != NUL)
7556 {
7557 /* Check for abbreviations -- webb */
7558 switch (*src)
7559 {
7560 case 'r': c = CAR; ++src; break;
7561 case 'n': c = NL; ++src; break;
7562 case 't': c = TAB; ++src; break;
7563 /* Oh no! \e already has meaning in subst pat :-( */
7564 /* case 'e': c = ESC; ++src; break; */
7565 case 'b': c = Ctrl_H; ++src; break;
7566
7567 /* If "backslash" is TRUE the backslash will be removed
7568 * later. Used to insert a literal CR. */
7569 default: if (backslash)
7570 {
7571 if (copy)
7572 *dst = '\\';
7573 ++dst;
7574 }
7575 c = *src++;
7576 }
7577 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00007578 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007579 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007580
Bram Moolenaardb552d602006-03-23 22:59:57 +00007581 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007582 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007583 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007584 func_one = (fptr_T)(func_one(&cc, c));
7585 else if (func_all != (fptr_T)NULL)
7586 /* Turbo C complains without the typecast */
7587 func_all = (fptr_T)(func_all(&cc, c));
7588 else /* just copy */
7589 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007590
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007591 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007592 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007593 int totlen = mb_ptr2len(src - 1);
7594
Bram Moolenaar071d4272004-06-13 20:20:40 +00007595 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007596 mb_char2bytes(cc, dst);
7597 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007598 if (enc_utf8)
7599 {
7600 int clen = utf_ptr2len(src - 1);
7601
7602 /* If the character length is shorter than "totlen", there
7603 * are composing characters; copy them as-is. */
7604 if (clen < totlen)
7605 {
7606 if (copy)
7607 mch_memmove(dst + 1, src - 1 + clen,
7608 (size_t)(totlen - clen));
7609 dst += totlen - clen;
7610 }
7611 }
7612 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007613 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01007614 else if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007615 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007616 dst++;
7617 }
7618 else
7619 {
7620 if (REG_MULTI)
7621 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007622 clnum = rex.reg_mmatch->startpos[no].lnum;
7623 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007624 s = NULL;
7625 else
7626 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007627 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
7628 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7629 len = rex.reg_mmatch->endpos[no].col
7630 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007631 else
7632 len = (int)STRLEN(s);
7633 }
7634 }
7635 else
7636 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007637 s = rex.reg_match->startp[no];
7638 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007639 s = NULL;
7640 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007641 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007642 }
7643 if (s != NULL)
7644 {
7645 for (;;)
7646 {
7647 if (len == 0)
7648 {
7649 if (REG_MULTI)
7650 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007651 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007652 break;
7653 if (copy)
7654 *dst = CAR;
7655 ++dst;
7656 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02007657 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7658 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007659 else
7660 len = (int)STRLEN(s);
7661 }
7662 else
7663 break;
7664 }
7665 else if (*s == NUL) /* we hit NUL. */
7666 {
7667 if (copy)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007668 emsg(_(e_re_damg));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007669 goto exit;
7670 }
7671 else
7672 {
7673 if (backslash && (*s == CAR || *s == '\\'))
7674 {
7675 /*
7676 * Insert a backslash in front of a CR, otherwise
7677 * it will be replaced by a line break.
7678 * Number of backslashes will be halved later,
7679 * double them here.
7680 */
7681 if (copy)
7682 {
7683 dst[0] = '\\';
7684 dst[1] = *s;
7685 }
7686 dst += 2;
7687 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007688 else
7689 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007690 if (has_mbyte)
7691 c = mb_ptr2char(s);
7692 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007693 c = *s;
7694
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007695 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007696 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007697 func_one = (fptr_T)(func_one(&cc, c));
7698 else if (func_all != (fptr_T)NULL)
7699 /* Turbo C complains without the typecast */
7700 func_all = (fptr_T)(func_all(&cc, c));
7701 else /* just copy */
7702 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007703
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007704 if (has_mbyte)
7705 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007706 int l;
7707
7708 /* Copy composing characters separately, one
7709 * at a time. */
7710 if (enc_utf8)
7711 l = utf_ptr2len(s) - 1;
7712 else
7713 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007714
7715 s += l;
7716 len -= l;
7717 if (copy)
7718 mb_char2bytes(cc, dst);
7719 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007720 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01007721 else if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007722 *dst = cc;
7723 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007724 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007725
Bram Moolenaar071d4272004-06-13 20:20:40 +00007726 ++s;
7727 --len;
7728 }
7729 }
7730 }
7731 no = -1;
7732 }
7733 }
7734 if (copy)
7735 *dst = NUL;
7736
7737exit:
7738 return (int)((dst - dest) + 1);
7739}
7740
7741#ifdef FEAT_EVAL
7742/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007743 * Call reg_getline() with the line numbers from the submatch. If a
7744 * substitute() was used the reg_maxline and other values have been
7745 * overwritten.
7746 */
7747 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007748reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007749{
7750 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007751 linenr_T save_first = rex.reg_firstlnum;
7752 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007753
Bram Moolenaar6100d022016-10-02 16:51:57 +02007754 rex.reg_firstlnum = rsm.sm_firstlnum;
7755 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007756
7757 s = reg_getline(lnum);
7758
Bram Moolenaar6100d022016-10-02 16:51:57 +02007759 rex.reg_firstlnum = save_first;
7760 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007761 return s;
7762}
7763
7764/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007765 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007766 * allocated memory.
7767 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7768 */
7769 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007770reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007771{
7772 char_u *retval = NULL;
7773 char_u *s;
7774 int len;
7775 int round;
7776 linenr_T lnum;
7777
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007778 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007779 return NULL;
7780
Bram Moolenaar6100d022016-10-02 16:51:57 +02007781 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007782 {
7783 /*
7784 * First round: compute the length and allocate memory.
7785 * Second round: copy the text.
7786 */
7787 for (round = 1; round <= 2; ++round)
7788 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007789 lnum = rsm.sm_mmatch->startpos[no].lnum;
7790 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007791 return NULL;
7792
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01007793 s = reg_getline_submatch(lnum);
7794 if (s == NULL) // anti-crash check, cannot happen?
Bram Moolenaar071d4272004-06-13 20:20:40 +00007795 break;
Bram Moolenaar64c8ed32019-03-20 21:18:34 +01007796 s += rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007797 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007798 {
7799 /* Within one line: take form start to end col. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007800 len = rsm.sm_mmatch->endpos[no].col
7801 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007802 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007803 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007804 ++len;
7805 }
7806 else
7807 {
7808 /* Multiple lines: take start line from start col, middle
7809 * lines completely and end line up to end col. */
7810 len = (int)STRLEN(s);
7811 if (round == 2)
7812 {
7813 STRCPY(retval, s);
7814 retval[len] = '\n';
7815 }
7816 ++len;
7817 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007818 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007819 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007820 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007821 if (round == 2)
7822 STRCPY(retval + len, s);
7823 len += (int)STRLEN(s);
7824 if (round == 2)
7825 retval[len] = '\n';
7826 ++len;
7827 }
7828 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007829 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02007830 rsm.sm_mmatch->endpos[no].col);
7831 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007832 if (round == 2)
7833 retval[len] = NUL;
7834 ++len;
7835 }
7836
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007837 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007838 {
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02007839 retval = alloc(len);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007840 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007841 return NULL;
7842 }
7843 }
7844 }
7845 else
7846 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007847 s = rsm.sm_match->startp[no];
7848 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007849 retval = NULL;
7850 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007851 retval = vim_strnsave(s, (int)(rsm.sm_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007852 }
7853
7854 return retval;
7855}
Bram Moolenaar41571762014-04-02 19:00:58 +02007856
7857/*
7858 * Used for the submatch() function with the optional non-zero argument: get
7859 * the list of strings from the n'th submatch in allocated memory with NULs
7860 * represented in NLs.
7861 * Returns a list of allocated strings. Returns NULL when not in a ":s"
7862 * command, for a non-existing submatch and for any error.
7863 */
7864 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007865reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02007866{
7867 char_u *s;
7868 linenr_T slnum;
7869 linenr_T elnum;
7870 colnr_T scol;
7871 colnr_T ecol;
7872 int i;
7873 list_T *list;
7874 int error = FALSE;
7875
7876 if (!can_f_submatch || no < 0)
7877 return NULL;
7878
Bram Moolenaar6100d022016-10-02 16:51:57 +02007879 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02007880 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007881 slnum = rsm.sm_mmatch->startpos[no].lnum;
7882 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02007883 if (slnum < 0 || elnum < 0)
7884 return NULL;
7885
Bram Moolenaar6100d022016-10-02 16:51:57 +02007886 scol = rsm.sm_mmatch->startpos[no].col;
7887 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02007888
7889 list = list_alloc();
7890 if (list == NULL)
7891 return NULL;
7892
7893 s = reg_getline_submatch(slnum) + scol;
7894 if (slnum == elnum)
7895 {
7896 if (list_append_string(list, s, ecol - scol) == FAIL)
7897 error = TRUE;
7898 }
7899 else
7900 {
7901 if (list_append_string(list, s, -1) == FAIL)
7902 error = TRUE;
7903 for (i = 1; i < elnum - slnum; i++)
7904 {
7905 s = reg_getline_submatch(slnum + i);
7906 if (list_append_string(list, s, -1) == FAIL)
7907 error = TRUE;
7908 }
7909 s = reg_getline_submatch(elnum);
7910 if (list_append_string(list, s, ecol) == FAIL)
7911 error = TRUE;
7912 }
7913 }
7914 else
7915 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007916 s = rsm.sm_match->startp[no];
7917 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02007918 return NULL;
7919 list = list_alloc();
7920 if (list == NULL)
7921 return NULL;
7922 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02007923 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02007924 error = TRUE;
7925 }
7926
7927 if (error)
7928 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02007929 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02007930 return NULL;
7931 }
7932 return list;
7933}
Bram Moolenaar071d4272004-06-13 20:20:40 +00007934#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007935
7936static regengine_T bt_regengine =
7937{
7938 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02007939 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007940 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01007941 bt_regexec_multi,
7942 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007943};
7944
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007945#include "regexp_nfa.c"
7946
7947static regengine_T nfa_regengine =
7948{
7949 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02007950 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007951 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01007952 nfa_regexec_multi,
7953 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007954};
7955
7956/* Which regexp engine to use? Needed for vim_regcomp().
7957 * Must match with 'regexpengine'. */
7958static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01007959
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007960#ifdef DEBUG
7961static char_u regname[][30] = {
7962 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02007963 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007964 "NFA Regexp Engine"
7965 };
7966#endif
7967
7968/*
7969 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02007970 * Returns the program in allocated memory.
7971 * Use vim_regfree() to free the memory.
7972 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007973 */
7974 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007975vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007976{
7977 regprog_T *prog = NULL;
7978 char_u *expr = expr_arg;
Bram Moolenaarcd625122019-02-22 17:29:43 +01007979 int save_called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007980
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007981 regexp_engine = p_re;
7982
7983 /* Check for prefix "\%#=", that sets the regexp engine */
7984 if (STRNCMP(expr, "\\%#=", 4) == 0)
7985 {
7986 int newengine = expr[4] - '0';
7987
7988 if (newengine == AUTOMATIC_ENGINE
7989 || newengine == BACKTRACKING_ENGINE
7990 || newengine == NFA_ENGINE)
7991 {
7992 regexp_engine = expr[4] - '0';
7993 expr += 5;
7994#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007995 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02007996 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007997#endif
7998 }
7999 else
8000 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008001 emsg(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008002 regexp_engine = AUTOMATIC_ENGINE;
8003 }
8004 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02008005#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008006 bt_regengine.expr = expr;
8007 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008008#endif
Bram Moolenaar8bfd9462019-02-16 18:07:57 +01008009 // reg_iswordc() uses rex.reg_buf
8010 rex.reg_buf = curbuf;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008011
8012 /*
8013 * First try the NFA engine, unless backtracking was requested.
8014 */
Bram Moolenaarcd625122019-02-22 17:29:43 +01008015 save_called_emsg = called_emsg;
8016 called_emsg = FALSE;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008017 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01008018 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008019 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008020 else
8021 prog = bt_regengine.regcomp(expr, re_flags);
8022
Bram Moolenaarfda37292014-11-05 14:27:36 +01008023 /* Check for error compiling regexp with initial engine. */
8024 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008025 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008026#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008027 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8028 {
8029 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008030 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008031 if (f)
8032 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008033 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008034 fclose(f);
8035 }
8036 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008037 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01008038 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008039 }
8040#endif
8041 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01008042 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008043 * The NFA engine also fails for patterns that it can't handle well
8044 * but are still valid patterns, thus a retry should work.
Bram Moolenaarcd625122019-02-22 17:29:43 +01008045 * But don't try if an error message was given.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008046 */
Bram Moolenaarcd625122019-02-22 17:29:43 +01008047 if (regexp_engine == AUTOMATIC_ENGINE && !called_emsg)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008048 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008049 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008050 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008051 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008052 }
Bram Moolenaarcd625122019-02-22 17:29:43 +01008053 called_emsg |= save_called_emsg;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008054
Bram Moolenaarfda37292014-11-05 14:27:36 +01008055 if (prog != NULL)
8056 {
8057 /* Store the info needed to call regcomp() again when the engine turns
8058 * out to be very slow when executing it. */
8059 prog->re_engine = regexp_engine;
8060 prog->re_flags = re_flags;
8061 }
8062
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008063 return prog;
8064}
8065
8066/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008067 * Free a compiled regexp program, returned by vim_regcomp().
8068 */
8069 void
Bram Moolenaar05540972016-01-30 20:31:25 +01008070vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02008071{
8072 if (prog != NULL)
8073 prog->engine->regfree(prog);
8074}
8075
Bram Moolenaarfda37292014-11-05 14:27:36 +01008076#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01008077 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01008078report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008079{
8080 if (p_verbose > 0)
8081 {
8082 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01008083 msg_puts(_("Switching to backtracking RE engine for pattern: "));
8084 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008085 verbose_leave();
8086 }
8087}
8088#endif
8089
Bram Moolenaar113e1072019-01-20 15:30:40 +01008090#if (defined(FEAT_X11) && (defined(FEAT_TITLE) || defined(FEAT_XCLIPBOARD))) \
8091 || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02008092/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01008093 * Return whether "prog" is currently being executed.
8094 */
8095 int
8096regprog_in_use(regprog_T *prog)
8097{
8098 return prog->re_in_use;
8099}
Bram Moolenaar113e1072019-01-20 15:30:40 +01008100#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01008101
8102/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008103 * Match a regexp against a string.
8104 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008105 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008106 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01008107 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008108 *
8109 * Return TRUE if there is a match, FALSE if not.
8110 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01008111 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008112vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01008113 regmatch_T *rmp,
8114 char_u *line, /* string to match against */
8115 colnr_T col, /* column to start looking for match */
8116 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008117{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008118 int result;
8119 regexec_T rex_save;
8120 int rex_in_use_save = rex_in_use;
8121
Bram Moolenaar0270f382018-07-17 05:43:58 +02008122 // Cannot use the same prog recursively, it contains state.
8123 if (rmp->regprog->re_in_use)
8124 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008125 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02008126 return FALSE;
8127 }
8128 rmp->regprog->re_in_use = TRUE;
8129
Bram Moolenaar6100d022016-10-02 16:51:57 +02008130 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02008131 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02008132 rex_save = rex;
8133 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008134
Bram Moolenaar6100d022016-10-02 16:51:57 +02008135 rex.reg_startp = NULL;
8136 rex.reg_endp = NULL;
8137 rex.reg_startpos = NULL;
8138 rex.reg_endpos = NULL;
8139
8140 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02008141 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008142
8143 /* NFA engine aborted because it's very slow. */
8144 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8145 && result == NFA_TOO_EXPENSIVE)
8146 {
8147 int save_p_re = p_re;
8148 int re_flags = rmp->regprog->re_flags;
8149 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8150
8151 p_re = BACKTRACKING_ENGINE;
8152 vim_regfree(rmp->regprog);
8153 if (pat != NULL)
8154 {
8155#ifdef FEAT_EVAL
8156 report_re_switch(pat);
8157#endif
8158 rmp->regprog = vim_regcomp(pat, re_flags);
8159 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02008160 {
8161 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008162 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02008163 rmp->regprog->re_in_use = FALSE;
8164 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01008165 vim_free(pat);
8166 }
8167
8168 p_re = save_p_re;
8169 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02008170
8171 rex_in_use = rex_in_use_save;
8172 if (rex_in_use)
8173 rex = rex_save;
8174
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008175 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008176}
8177
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008178/*
8179 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008180 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008181 */
8182 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008183vim_regexec_prog(
8184 regprog_T **prog,
8185 int ignore_case,
8186 char_u *line,
8187 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008188{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008189 int r;
8190 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008191
8192 regmatch.regprog = *prog;
8193 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008194 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008195 *prog = regmatch.regprog;
8196 return r;
8197}
8198
8199/*
8200 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008201 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008202 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008203 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008204vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008205{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008206 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008207}
8208
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008209/*
8210 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008211 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008212 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008213 */
8214 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008215vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008216{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008217 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008218}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008219
8220/*
8221 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008222 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
8223 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008224 * Uses curbuf for line count and 'iskeyword'.
8225 *
8226 * Return zero if there is no match. Return number of lines contained in the
8227 * match otherwise.
8228 */
8229 long
Bram Moolenaar05540972016-01-30 20:31:25 +01008230vim_regexec_multi(
8231 regmmatch_T *rmp,
Bram Moolenaard23a8232018-02-10 18:45:26 +01008232 win_T *win, /* window in which to search or NULL */
8233 buf_T *buf, /* buffer in which to search */
8234 linenr_T lnum, /* nr of line to start looking for match */
8235 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008236 proftime_T *tm, /* timeout limit or NULL */
8237 int *timed_out) /* flag is set when timeout limit reached */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008238{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008239 int result;
8240 regexec_T rex_save;
8241 int rex_in_use_save = rex_in_use;
8242
Bram Moolenaar0270f382018-07-17 05:43:58 +02008243 // Cannot use the same prog recursively, it contains state.
8244 if (rmp->regprog->re_in_use)
8245 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008246 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02008247 return FALSE;
8248 }
8249 rmp->regprog->re_in_use = TRUE;
8250
Bram Moolenaar6100d022016-10-02 16:51:57 +02008251 if (rex_in_use)
8252 /* Being called recursively, save the state. */
8253 rex_save = rex;
8254 rex_in_use = TRUE;
8255
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008256 result = rmp->regprog->engine->regexec_multi(
8257 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02008258 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008259
8260 /* NFA engine aborted because it's very slow. */
8261 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8262 && result == NFA_TOO_EXPENSIVE)
8263 {
8264 int save_p_re = p_re;
8265 int re_flags = rmp->regprog->re_flags;
8266 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8267
8268 p_re = BACKTRACKING_ENGINE;
8269 vim_regfree(rmp->regprog);
8270 if (pat != NULL)
8271 {
8272#ifdef FEAT_EVAL
8273 report_re_switch(pat);
8274#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008275#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008276 // checking for \z misuse was already done when compiling for NFA,
8277 // allow all here
8278 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008279#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01008280 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008281#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008282 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008283#endif
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008284
Bram Moolenaarfda37292014-11-05 14:27:36 +01008285 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02008286 {
8287 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008288 result = rmp->regprog->engine->regexec_multi(
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008289 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02008290 rmp->regprog->re_in_use = FALSE;
8291 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01008292 vim_free(pat);
8293 }
8294 p_re = save_p_re;
8295 }
8296
Bram Moolenaar6100d022016-10-02 16:51:57 +02008297 rex_in_use = rex_in_use_save;
8298 if (rex_in_use)
8299 rex = rex_save;
8300
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008301 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008302}