blob: 06076fde57d4d76d769598de0cd072fa9885e15f [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100259no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260{
261 if (is_Magic(x))
262 return un_Magic(x);
263 return x;
264}
265
266 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100267toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268{
269 if (is_Magic(x))
270 return un_Magic(x);
271 return Magic(x);
272}
273
274/*
275 * The first byte of the regexp internal "program" is actually this magic
276 * number; the start node begins in the second byte. It's used to catch the
277 * most severe mutilation of the program by the caller.
278 */
279
280#define REGMAGIC 0234
281
282/*
283 * Opcode notes:
284 *
285 * BRANCH The set of branches constituting a single choice are hooked
286 * together with their "next" pointers, since precedence prevents
287 * anything being concatenated to any individual branch. The
288 * "next" pointer of the last BRANCH in a choice points to the
289 * thing following the whole choice. This is also where the
290 * final "next" pointer of each individual branch points; each
291 * branch starts with the operand node of a BRANCH node.
292 *
293 * BACK Normal "next" pointers all implicitly point forward; BACK
294 * exists to make loop structures possible.
295 *
296 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
297 * BRANCH structures using BACK. Simple cases (one character
298 * per match) are implemented with STAR and PLUS for speed
299 * and to minimize recursive plunges.
300 *
301 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
302 * node, and defines the min and max limits to be used for that
303 * node.
304 *
305 * MOPEN,MCLOSE ...are numbered at compile time.
306 * ZOPEN,ZCLOSE ...ditto
307 */
308
309/*
310 * A node is one char of opcode followed by two chars of "next" pointer.
311 * "Next" pointers are stored as two 8-bit bytes, high order first. The
312 * value is a positive offset from the opcode of the node containing it.
313 * An operand, if any, simply follows the node. (Note that much of the
314 * code generation knows about this implicit relationship.)
315 *
316 * Using two bytes for the "next" pointer is vast overkill for most things,
317 * but allows patterns to get big without disasters.
318 */
319#define OP(p) ((int)*(p))
320#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
321#define OPERAND(p) ((p) + 3)
322/* Obtain an operand that was stored as four bytes, MSB first. */
323#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
324 + ((long)(p)[5] << 8) + (long)(p)[6])
325/* Obtain a second operand stored as four bytes. */
326#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
327/* Obtain a second single-byte operand stored after a four bytes operand. */
328#define OPERAND_CMP(p) (p)[7]
329
330/*
331 * Utility definitions.
332 */
333#define UCHARAT(p) ((int)*(char_u *)(p))
334
335/* Used for an error (down from) vim_regcomp(): give the error message, set
336 * rc_did_emsg and return NULL */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100337#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
338#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
339#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
340#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +0100341#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100342#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200343#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344
Bram Moolenaar95f09602016-11-10 20:01:45 +0100345
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346#define MAX_LIMIT (32767L << 16L)
347
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100348static int cstrncmp(char_u *s1, char_u *s2, int *n);
349static char_u *cstrchr(char_u *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200351#ifdef BT_REGEXP_DUMP
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100352static void regdump(char_u *, bt_regprog_T *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200353#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354#ifdef DEBUG
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100355static char_u *regprop(char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356#endif
357
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static int re_mult_next(char *what);
Bram Moolenaarfb031402014-09-09 17:18:49 +0200359
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200360static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200361static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
Bram Moolenaar6c95fbc2017-06-05 17:53:37 +0200362#ifdef FEAT_MBYTE
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200363static char_u e_large_class[] = N_("E945: Range too large in character class");
Bram Moolenaar6c95fbc2017-06-05 17:53:37 +0200364#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200365static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
366static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
367static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200368#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200369static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
Bram Moolenaarbcf94422018-06-23 14:21:42 +0200370static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200371#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200372static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200373static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar0270f382018-07-17 05:43:58 +0200374static char_u e_recursive[] = N_("E956: Cannot use pattern recursively");
375
Bram Moolenaar071d4272004-06-13 20:20:40 +0000376#define NOT_MULTI 0
377#define MULTI_ONE 1
378#define MULTI_MULT 2
379/*
380 * Return NOT_MULTI if c is not a "multi" operator.
381 * Return MULTI_ONE if c is a single "multi" operator.
382 * Return MULTI_MULT if c is a multi "multi" operator.
383 */
384 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100385re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000386{
387 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
388 return MULTI_ONE;
389 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
390 return MULTI_MULT;
391 return NOT_MULTI;
392}
393
394/*
395 * Flags to be passed up and down.
396 */
397#define HASWIDTH 0x1 /* Known never to match null string. */
398#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
399#define SPSTART 0x4 /* Starts with * or +. */
400#define HASNL 0x8 /* Contains some \n. */
401#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
402#define WORST 0 /* Worst case. */
403
404/*
405 * When regcode is set to this value, code is not emitted and size is computed
406 * instead.
407 */
408#define JUST_CALC_SIZE ((char_u *) -1)
409
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000410static char_u *reg_prev_sub = NULL;
411
Bram Moolenaar071d4272004-06-13 20:20:40 +0000412/*
413 * REGEXP_INRANGE contains all characters which are always special in a []
414 * range after '\'.
415 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
416 * These are:
417 * \n - New line (NL).
418 * \r - Carriage Return (CR).
419 * \t - Tab (TAB).
420 * \e - Escape (ESC).
421 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000422 * \d - Character code in decimal, eg \d123
423 * \o - Character code in octal, eg \o80
424 * \x - Character code in hex, eg \x4a
425 * \u - Multibyte character code, eg \u20ac
426 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000427 */
428static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000429static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000430
Bram Moolenaar071d4272004-06-13 20:20:40 +0000431/*
432 * Translate '\x' to its control character, except "\n", which is Magic.
433 */
434 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100435backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000436{
437 switch (c)
438 {
439 case 'r': return CAR;
440 case 't': return TAB;
441 case 'e': return ESC;
442 case 'b': return BS;
443 }
444 return c;
445}
446
447/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000448 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000449 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
450 * recognized. Otherwise "pp" is advanced to after the item.
451 */
452 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100453get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000454{
455 static const char *(class_names[]) =
456 {
457 "alnum:]",
458#define CLASS_ALNUM 0
459 "alpha:]",
460#define CLASS_ALPHA 1
461 "blank:]",
462#define CLASS_BLANK 2
463 "cntrl:]",
464#define CLASS_CNTRL 3
465 "digit:]",
466#define CLASS_DIGIT 4
467 "graph:]",
468#define CLASS_GRAPH 5
469 "lower:]",
470#define CLASS_LOWER 6
471 "print:]",
472#define CLASS_PRINT 7
473 "punct:]",
474#define CLASS_PUNCT 8
475 "space:]",
476#define CLASS_SPACE 9
477 "upper:]",
478#define CLASS_UPPER 10
479 "xdigit:]",
480#define CLASS_XDIGIT 11
481 "tab:]",
482#define CLASS_TAB 12
483 "return:]",
484#define CLASS_RETURN 13
485 "backspace:]",
486#define CLASS_BACKSPACE 14
487 "escape:]",
488#define CLASS_ESCAPE 15
489 };
490#define CLASS_NONE 99
491 int i;
492
493 if ((*pp)[1] == ':')
494 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000495 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
497 {
498 *pp += STRLEN(class_names[i]) + 2;
499 return i;
500 }
501 }
502 return CLASS_NONE;
503}
504
505/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506 * Specific version of character class functions.
507 * Using a table to keep this fast.
508 */
509static short class_tab[256];
510
511#define RI_DIGIT 0x01
512#define RI_HEX 0x02
513#define RI_OCTAL 0x04
514#define RI_WORD 0x08
515#define RI_HEAD 0x10
516#define RI_ALPHA 0x20
517#define RI_LOWER 0x40
518#define RI_UPPER 0x80
519#define RI_WHITE 0x100
520
521 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100522init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000523{
524 int i;
525 static int done = FALSE;
526
527 if (done)
528 return;
529
530 for (i = 0; i < 256; ++i)
531 {
532 if (i >= '0' && i <= '7')
533 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
534 else if (i >= '8' && i <= '9')
535 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
536 else if (i >= 'a' && i <= 'f')
537 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
538#ifdef EBCDIC
539 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
540 || (i >= 's' && i <= 'z'))
541#else
542 else if (i >= 'g' && i <= 'z')
543#endif
544 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
545 else if (i >= 'A' && i <= 'F')
546 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
547#ifdef EBCDIC
548 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
549 || (i >= 'S' && i <= 'Z'))
550#else
551 else if (i >= 'G' && i <= 'Z')
552#endif
553 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
554 else if (i == '_')
555 class_tab[i] = RI_WORD + RI_HEAD;
556 else
557 class_tab[i] = 0;
558 }
559 class_tab[' '] |= RI_WHITE;
560 class_tab['\t'] |= RI_WHITE;
561 done = TRUE;
562}
563
564#ifdef FEAT_MBYTE
565# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
566# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
567# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
568# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
569# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
570# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
571# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
572# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
573# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
574#else
575# define ri_digit(c) (class_tab[c] & RI_DIGIT)
576# define ri_hex(c) (class_tab[c] & RI_HEX)
577# define ri_octal(c) (class_tab[c] & RI_OCTAL)
578# define ri_word(c) (class_tab[c] & RI_WORD)
579# define ri_head(c) (class_tab[c] & RI_HEAD)
580# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
581# define ri_lower(c) (class_tab[c] & RI_LOWER)
582# define ri_upper(c) (class_tab[c] & RI_UPPER)
583# define ri_white(c) (class_tab[c] & RI_WHITE)
584#endif
585
586/* flags for regflags */
587#define RF_ICASE 1 /* ignore case */
588#define RF_NOICASE 2 /* don't ignore case */
589#define RF_HASNL 4 /* can match a NL */
590#define RF_ICOMBINE 8 /* ignore combining characters */
591#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
592
593/*
594 * Global work variables for vim_regcomp().
595 */
596
597static char_u *regparse; /* Input-scan pointer. */
598static int prevchr_len; /* byte length of previous char */
599static int num_complex_braces; /* Complex \{...} count */
600static int regnpar; /* () count. */
601#ifdef FEAT_SYN_HL
602static int regnzpar; /* \z() count. */
603static int re_has_z; /* \z item detected */
604#endif
605static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
606static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000607static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000608static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
609static unsigned regflags; /* RF_ flags for prog */
610static long brace_min[10]; /* Minimums for complex brace repeats */
611static long brace_max[10]; /* Maximums for complex brace repeats */
612static int brace_count[10]; /* Current counts for complex brace repeats */
613#if defined(FEAT_SYN_HL) || defined(PROTO)
614static int had_eol; /* TRUE when EOL found by vim_regcomp() */
615#endif
616static int one_exactly = FALSE; /* only do one char for EXACTLY */
617
618static int reg_magic; /* magicness of the pattern: */
619#define MAGIC_NONE 1 /* "\V" very unmagic */
620#define MAGIC_OFF 2 /* "\M" or 'magic' off */
621#define MAGIC_ON 3 /* "\m" or 'magic' */
622#define MAGIC_ALL 4 /* "\v" very magic */
623
624static int reg_string; /* matching with a string instead of a buffer
625 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000626static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000627
628/*
629 * META contains all characters that may be magic, except '^' and '$'.
630 */
631
632#ifdef EBCDIC
633static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
634#else
635/* META[] is used often enough to justify turning it into a table. */
636static char_u META_flags[] = {
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639/* % & ( ) * + . */
640 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
641/* 1 2 3 4 5 6 7 8 9 < = > ? */
642 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
643/* @ A C D F H I K L M O */
644 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
645/* P S U V W X Z [ _ */
646 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
647/* a c d f h i k l m n o */
648 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
649/* p s u v w x z { | ~ */
650 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
651};
652#endif
653
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200654static int curchr; /* currently parsed character */
655/* Previous character. Note: prevchr is sometimes -1 when we are not at the
656 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
657 * because ^ was taken to be magic -- webb */
658static int prevchr;
659static int prevprevchr; /* previous-previous character */
660static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000661
662/* arguments for reg() */
663#define REG_NOPAREN 0 /* toplevel reg() */
664#define REG_PAREN 1 /* \(\) */
665#define REG_ZPAREN 2 /* \z(\) */
666#define REG_NPAREN 3 /* \%(\) */
667
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200668typedef struct
669{
670 char_u *regparse;
671 int prevchr_len;
672 int curchr;
673 int prevchr;
674 int prevprevchr;
675 int nextchr;
676 int at_start;
677 int prev_at_start;
678 int regnpar;
679} parse_state_T;
680
Bram Moolenaar071d4272004-06-13 20:20:40 +0000681/*
682 * Forward declarations for vim_regcomp()'s friends.
683 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100684static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100685static int getchr(void);
686static void skipchr_keepstart(void);
687static int peekchr(void);
688static void skipchr(void);
689static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100690static long gethexchrs(int maxinputlen);
691static long getoctchrs(void);
692static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100693static int coll_get_char(void);
694static void regcomp_start(char_u *expr, int flags);
695static char_u *reg(int, int *);
696static char_u *regbranch(int *flagp);
697static char_u *regconcat(int *flagp);
698static char_u *regpiece(int *);
699static char_u *regatom(int *);
700static char_u *regnode(int);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000701#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100702static int use_multibytecode(int c);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000703#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100704static int prog_magic_wrong(void);
705static char_u *regnext(char_u *);
706static void regc(int b);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000707#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100708static void regmbc(int c);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200709# define REGMBC(x) regmbc(x);
710# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000711#else
712# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200713# define REGMBC(x)
714# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000715#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100716static void reginsert(int, char_u *);
717static void reginsert_nr(int op, long val, char_u *opnd);
718static void reginsert_limits(int, long, long, char_u *);
719static char_u *re_put_long(char_u *pr, long_u val);
720static int read_limits(long *, long *);
721static void regtail(char_u *, char_u *);
722static void regoptail(char_u *, char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000723
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200724static regengine_T bt_regengine;
725static regengine_T nfa_regengine;
726
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727/*
728 * Return TRUE if compiled regular expression "prog" can match a line break.
729 */
730 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100731re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000732{
733 return (prog->regflags & RF_HASNL);
734}
735
736/*
737 * Return TRUE if compiled regular expression "prog" looks before the start
738 * position (pattern contains "\@<=" or "\@<!").
739 */
740 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100741re_lookbehind(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000742{
743 return (prog->regflags & RF_LOOKBH);
744}
745
746/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000747 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
748 * Returns a character representing the class. Zero means that no item was
749 * recognized. Otherwise "pp" is advanced to after the item.
750 */
751 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100752get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000753{
754 int c;
755 int l = 1;
756 char_u *p = *pp;
757
758 if (p[1] == '=')
759 {
760#ifdef FEAT_MBYTE
761 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000762 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000763#endif
764 if (p[l + 2] == '=' && p[l + 3] == ']')
765 {
766#ifdef FEAT_MBYTE
767 if (has_mbyte)
768 c = mb_ptr2char(p + 2);
769 else
770#endif
771 c = p[2];
772 *pp += l + 4;
773 return c;
774 }
775 }
776 return 0;
777}
778
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200779#ifdef EBCDIC
780/*
781 * Table for equivalence class "c". (IBM-1047)
782 */
783char *EQUIVAL_CLASS_C[16] = {
784 "A\x62\x63\x64\x65\x66\x67",
785 "C\x68",
786 "E\x71\x72\x73\x74",
787 "I\x75\x76\x77\x78",
788 "N\x69",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200789 "O\xEB\xEC\xED\xEE\xEF\x80",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200790 "U\xFB\xFC\xFD\xFE",
791 "Y\xBA",
792 "a\x42\x43\x44\x45\x46\x47",
793 "c\x48",
794 "e\x51\x52\x53\x54",
795 "i\x55\x56\x57\x58",
796 "n\x49",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200797 "o\xCB\xCC\xCD\xCE\xCF\x70",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200798 "u\xDB\xDC\xDD\xDE",
799 "y\x8D\xDF",
800};
801#endif
802
Bram Moolenaardf177f62005-02-22 08:39:57 +0000803/*
804 * Produce the bytes for equivalence class "c".
805 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200806 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000807 */
808 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100809reg_equi_class(int c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000810{
811#ifdef FEAT_MBYTE
812 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000813 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000814#endif
815 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200816#ifdef EBCDIC
817 int i;
818
819 /* This might be slower than switch/case below. */
820 for (i = 0; i < 16; i++)
821 {
822 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
823 {
824 char *p = EQUIVAL_CLASS_C[i];
825
826 while (*p != 0)
827 regmbc(*p++);
828 return;
829 }
830 }
831#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000832 switch (c)
833 {
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200834 /* Do not use '\300' style, it results in a negative number. */
835 case 'A': case 0xc0: case 0xc1: case 0xc2:
836 case 0xc3: case 0xc4: case 0xc5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200837 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
838 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200839 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
840 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
841 regmbc(0xc5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200842 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
843 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
844 REGMBC(0x1ea2)
845 return;
846 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
847 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000848 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200849 case 'C': case 0xc7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200850 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200851 regmbc('C'); regmbc(0xc7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200852 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
853 REGMBC(0x10c)
854 return;
855 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
856 CASEMBC(0x1e0e) CASEMBC(0x1e10)
857 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
858 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000859 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200860 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200861 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
862 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200863 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
864 regmbc(0xca); regmbc(0xcb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200865 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
866 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
867 REGMBC(0x1ebc)
868 return;
869 case 'F': CASEMBC(0x1e1e)
870 regmbc('F'); REGMBC(0x1e1e)
871 return;
872 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
873 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
874 CASEMBC(0x1e20)
875 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
876 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
877 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
878 return;
879 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
880 CASEMBC(0x1e26) CASEMBC(0x1e28)
881 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
882 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000883 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200884 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200885 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
886 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200887 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
888 regmbc(0xce); regmbc(0xcf);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200889 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
890 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
891 REGMBC(0x1ec8)
892 return;
893 case 'J': CASEMBC(0x134)
894 regmbc('J'); REGMBC(0x134)
895 return;
896 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
897 CASEMBC(0x1e34)
898 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
899 REGMBC(0x1e30) REGMBC(0x1e34)
900 return;
901 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
902 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
903 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
904 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
905 REGMBC(0x1e3a)
906 return;
907 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
908 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000909 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200910 case 'N': case 0xd1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200911 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
912 CASEMBC(0x1e48)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200913 regmbc('N'); regmbc(0xd1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200914 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
915 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000916 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200917 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
918 case 0xd6: case 0xd8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200919 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
920 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200921 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
922 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
923 regmbc(0xd8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200924 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
925 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
926 REGMBC(0x1ec) REGMBC(0x1ece)
927 return;
928 case 'P': case 0x1e54: case 0x1e56:
929 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
930 return;
931 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
932 CASEMBC(0x1e58) CASEMBC(0x1e5e)
933 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
934 REGMBC(0x1e58) REGMBC(0x1e5e)
935 return;
936 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
937 CASEMBC(0x160) CASEMBC(0x1e60)
938 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
939 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
940 return;
941 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
942 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
943 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
944 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000945 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200946 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200947 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
948 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
949 CASEMBC(0x1ee6)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200950 regmbc('U'); regmbc(0xd9); regmbc(0xda);
951 regmbc(0xdb); regmbc(0xdc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200952 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
953 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
954 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
955 return;
956 case 'V': CASEMBC(0x1e7c)
957 regmbc('V'); REGMBC(0x1e7c)
958 return;
959 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
960 CASEMBC(0x1e84) CASEMBC(0x1e86)
961 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
962 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
963 return;
964 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
965 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000966 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200967 case 'Y': case 0xdd:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200968 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
969 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200970 regmbc('Y'); regmbc(0xdd);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200971 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
972 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
973 return;
974 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
975 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
976 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
977 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
978 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000979 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200980 case 'a': case 0xe0: case 0xe1: case 0xe2:
981 case 0xe3: case 0xe4: case 0xe5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200982 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
983 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200984 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
985 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
986 regmbc(0xe5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200987 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
988 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
989 REGMBC(0x1ea3)
990 return;
991 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
992 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000993 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200994 case 'c': case 0xe7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200995 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200996 regmbc('c'); regmbc(0xe7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200997 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
998 REGMBC(0x10d)
999 return;
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001000 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
1001 CASEMBC(0x1e0f) CASEMBC(0x1e11)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001002 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001003 REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001004 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001005 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001006 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1007 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001008 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
1009 regmbc(0xea); regmbc(0xeb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001010 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1011 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1012 REGMBC(0x1ebd)
1013 return;
1014 case 'f': CASEMBC(0x1e1f)
1015 regmbc('f'); REGMBC(0x1e1f)
1016 return;
1017 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1018 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1019 CASEMBC(0x1e21)
1020 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1021 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1022 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1023 return;
1024 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1025 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1026 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1027 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1028 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001029 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001030 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001031 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1032 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001033 regmbc('i'); regmbc(0xec); regmbc(0xed);
1034 regmbc(0xee); regmbc(0xef);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001035 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1036 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1037 return;
1038 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1039 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1040 return;
1041 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1042 CASEMBC(0x1e35)
1043 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1044 REGMBC(0x1e31) REGMBC(0x1e35)
1045 return;
1046 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1047 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1048 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1049 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1050 REGMBC(0x1e3b)
1051 return;
1052 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1053 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001054 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001055 case 'n': case 0xf1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001056 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1057 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001058 regmbc('n'); regmbc(0xf1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001059 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1060 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001061 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001062 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1063 case 0xf6: case 0xf8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001064 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1065 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001066 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1067 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1068 regmbc(0xf8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001069 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1070 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1071 REGMBC(0x1ed) REGMBC(0x1ecf)
1072 return;
1073 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1074 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1075 return;
1076 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1077 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1078 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1079 REGMBC(0x1e59) REGMBC(0x1e5f)
1080 return;
1081 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1082 CASEMBC(0x161) CASEMBC(0x1e61)
1083 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1084 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1085 return;
1086 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1087 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1088 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1089 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001090 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001091 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001092 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1093 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1094 CASEMBC(0x1ee7)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001095 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1096 regmbc(0xfb); regmbc(0xfc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001097 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1098 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1099 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1100 return;
1101 case 'v': CASEMBC(0x1e7d)
1102 regmbc('v'); REGMBC(0x1e7d)
1103 return;
1104 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1105 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1106 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1107 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1108 REGMBC(0x1e98)
1109 return;
1110 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1111 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001112 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001113 case 'y': case 0xfd: case 0xff:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001114 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1115 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001116 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001117 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1118 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1119 return;
1120 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1121 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1122 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1123 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1124 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001125 return;
1126 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001127#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001128 }
1129 regmbc(c);
1130}
1131
1132/*
1133 * Check for a collating element "[.a.]". "pp" points to the '['.
1134 * Returns a character. Zero means that no item was recognized. Otherwise
1135 * "pp" is advanced to after the item.
1136 * Currently only single characters are recognized!
1137 */
1138 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001139get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001140{
1141 int c;
1142 int l = 1;
1143 char_u *p = *pp;
1144
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001145 if (p[0] != NUL && p[1] == '.')
Bram Moolenaardf177f62005-02-22 08:39:57 +00001146 {
1147#ifdef FEAT_MBYTE
1148 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001149 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001150#endif
1151 if (p[l + 2] == '.' && p[l + 3] == ']')
1152 {
1153#ifdef FEAT_MBYTE
1154 if (has_mbyte)
1155 c = mb_ptr2char(p + 2);
1156 else
1157#endif
1158 c = p[2];
1159 *pp += l + 4;
1160 return c;
1161 }
1162 }
1163 return 0;
1164}
1165
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001166static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1167static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1168
1169 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001170get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001171{
1172 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1173 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1174}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001175
1176/*
1177 * Skip over a "[]" range.
1178 * "p" must point to the character after the '['.
1179 * The returned pointer is on the matching ']', or the terminating NUL.
1180 */
1181 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001182skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001183{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001184#ifdef FEAT_MBYTE
1185 int l;
1186#endif
1187
Bram Moolenaardf177f62005-02-22 08:39:57 +00001188 if (*p == '^') /* Complement of range. */
1189 ++p;
1190 if (*p == ']' || *p == '-')
1191 ++p;
1192 while (*p != NUL && *p != ']')
1193 {
1194#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001195 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001196 p += l;
1197 else
1198#endif
1199 if (*p == '-')
1200 {
1201 ++p;
1202 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001203 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001204 }
1205 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001206 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001207 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001208 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001209 p += 2;
1210 else if (*p == '[')
1211 {
1212 if (get_char_class(&p) == CLASS_NONE
1213 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001214 && get_coll_element(&p) == 0
1215 && *p != NUL)
1216 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001217 }
1218 else
1219 ++p;
1220 }
1221
1222 return p;
1223}
1224
1225/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001226 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001227 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001228 * Take care of characters with a backslash in front of it.
1229 * Skip strings inside [ and ].
1230 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1231 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1232 * is changed in-place.
1233 */
1234 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001235skip_regexp(
1236 char_u *startp,
1237 int dirc,
1238 int magic,
1239 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001240{
1241 int mymagic;
1242 char_u *p = startp;
1243
1244 if (magic)
1245 mymagic = MAGIC_ON;
1246 else
1247 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001248 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001249
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001250 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001251 {
1252 if (p[0] == dirc) /* found end of regexp */
1253 break;
1254 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1255 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1256 {
1257 p = skip_anyof(p + 1);
1258 if (p[0] == NUL)
1259 break;
1260 }
1261 else if (p[0] == '\\' && p[1] != NUL)
1262 {
1263 if (dirc == '?' && newp != NULL && p[1] == '?')
1264 {
1265 /* change "\?" to "?", make a copy first. */
1266 if (*newp == NULL)
1267 {
1268 *newp = vim_strsave(startp);
1269 if (*newp != NULL)
1270 p = *newp + (p - startp);
1271 }
1272 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001273 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001274 else
1275 ++p;
1276 }
1277 else
1278 ++p; /* skip next character */
1279 if (*p == 'v')
1280 mymagic = MAGIC_ALL;
1281 else if (*p == 'V')
1282 mymagic = MAGIC_NONE;
1283 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001284 }
1285 return p;
1286}
1287
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02001288/*
1289 * Return TRUE if the back reference is legal. We must have seen the close
1290 * brace.
1291 * TODO: Should also check that we don't refer to something that is repeated
1292 * (+*=): what instance of the repetition should we match?
1293 */
1294 static int
1295seen_endbrace(int refnum)
1296{
1297 if (!had_endbrace[refnum])
1298 {
1299 char_u *p;
1300
1301 /* Trick: check if "@<=" or "@<!" follows, in which case
1302 * the \1 can appear before the referenced match. */
1303 for (p = regparse; *p != NUL; ++p)
1304 if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '='))
1305 break;
1306 if (*p == NUL)
1307 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01001308 emsg(_("E65: Illegal back reference"));
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02001309 rc_did_emsg = TRUE;
1310 return FALSE;
1311 }
1312 }
1313 return TRUE;
1314}
1315
Bram Moolenaar071d4272004-06-13 20:20:40 +00001316/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001317 * bt_regcomp() - compile a regular expression into internal code for the
1318 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001319 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001320 *
1321 * We can't allocate space until we know how big the compiled form will be,
1322 * but we can't compile it (and thus know how big it is) until we've got a
1323 * place to put the code. So we cheat: we compile it twice, once with code
1324 * generation turned off and size counting turned on, and once "for real".
1325 * This also means that we don't allocate space until we are sure that the
1326 * thing really will compile successfully, and we never have to move the
1327 * code and thus invalidate pointers into it. (Note that it has to be in
1328 * one piece because vim_free() must be able to free it all.)
1329 *
1330 * Whether upper/lower case is to be ignored is decided when executing the
1331 * program, it does not matter here.
1332 *
1333 * Beware that the optimization-preparation code in here knows about some
1334 * of the structure of the compiled regexp.
1335 * "re_flags": RE_MAGIC and/or RE_STRING.
1336 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001337 static regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001338bt_regcomp(char_u *expr, int re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001339{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001340 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001341 char_u *scan;
1342 char_u *longest;
1343 int len;
1344 int flags;
1345
1346 if (expr == NULL)
1347 EMSG_RET_NULL(_(e_null));
1348
1349 init_class_tab();
1350
1351 /*
1352 * First pass: determine size, legality.
1353 */
1354 regcomp_start(expr, re_flags);
1355 regcode = JUST_CALC_SIZE;
1356 regc(REGMAGIC);
1357 if (reg(REG_NOPAREN, &flags) == NULL)
1358 return NULL;
1359
Bram Moolenaar071d4272004-06-13 20:20:40 +00001360 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001361 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001362 if (r == NULL)
1363 return NULL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001364 r->re_in_use = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001365
1366 /*
1367 * Second pass: emit code.
1368 */
1369 regcomp_start(expr, re_flags);
1370 regcode = r->program;
1371 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001372 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001373 {
1374 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001375 if (reg_toolong)
1376 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001377 return NULL;
1378 }
1379
1380 /* Dig out information for optimizations. */
1381 r->regstart = NUL; /* Worst-case defaults. */
1382 r->reganch = 0;
1383 r->regmust = NULL;
1384 r->regmlen = 0;
1385 r->regflags = regflags;
1386 if (flags & HASNL)
1387 r->regflags |= RF_HASNL;
1388 if (flags & HASLOOKBH)
1389 r->regflags |= RF_LOOKBH;
1390#ifdef FEAT_SYN_HL
1391 /* Remember whether this pattern has any \z specials in it. */
1392 r->reghasz = re_has_z;
1393#endif
1394 scan = r->program + 1; /* First BRANCH. */
1395 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1396 {
1397 scan = OPERAND(scan);
1398
1399 /* Starting-point info. */
1400 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1401 {
1402 r->reganch++;
1403 scan = regnext(scan);
1404 }
1405
1406 if (OP(scan) == EXACTLY)
1407 {
1408#ifdef FEAT_MBYTE
1409 if (has_mbyte)
1410 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1411 else
1412#endif
1413 r->regstart = *OPERAND(scan);
1414 }
1415 else if ((OP(scan) == BOW
1416 || OP(scan) == EOW
1417 || OP(scan) == NOTHING
1418 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1419 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1420 && OP(regnext(scan)) == EXACTLY)
1421 {
1422#ifdef FEAT_MBYTE
1423 if (has_mbyte)
1424 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1425 else
1426#endif
1427 r->regstart = *OPERAND(regnext(scan));
1428 }
1429
1430 /*
1431 * If there's something expensive in the r.e., find the longest
1432 * literal string that must appear and make it the regmust. Resolve
1433 * ties in favor of later strings, since the regstart check works
1434 * with the beginning of the r.e. and avoiding duplication
1435 * strengthens checking. Not a strong reason, but sufficient in the
1436 * absence of others.
1437 */
1438 /*
1439 * When the r.e. starts with BOW, it is faster to look for a regmust
1440 * first. Used a lot for "#" and "*" commands. (Added by mool).
1441 */
1442 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1443 && !(flags & HASNL))
1444 {
1445 longest = NULL;
1446 len = 0;
1447 for (; scan != NULL; scan = regnext(scan))
1448 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1449 {
1450 longest = OPERAND(scan);
1451 len = (int)STRLEN(OPERAND(scan));
1452 }
1453 r->regmust = longest;
1454 r->regmlen = len;
1455 }
1456 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001457#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001458 regdump(expr, r);
1459#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001460 r->engine = &bt_regengine;
1461 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001462}
1463
1464/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001465 * Free a compiled regexp program, returned by bt_regcomp().
1466 */
1467 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001468bt_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02001469{
1470 vim_free(prog);
1471}
1472
1473/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001474 * Setup to parse the regexp. Used once to get the length and once to do it.
1475 */
1476 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001477regcomp_start(
1478 char_u *expr,
1479 int re_flags) /* see vim_regcomp() */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001480{
1481 initchr(expr);
1482 if (re_flags & RE_MAGIC)
1483 reg_magic = MAGIC_ON;
1484 else
1485 reg_magic = MAGIC_OFF;
1486 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001487 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001488 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001489
1490 num_complex_braces = 0;
1491 regnpar = 1;
1492 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1493#ifdef FEAT_SYN_HL
1494 regnzpar = 1;
1495 re_has_z = 0;
1496#endif
1497 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001498 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001499 regflags = 0;
1500#if defined(FEAT_SYN_HL) || defined(PROTO)
1501 had_eol = FALSE;
1502#endif
1503}
1504
1505#if defined(FEAT_SYN_HL) || defined(PROTO)
1506/*
1507 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1508 * found. This is messy, but it works fine.
1509 */
1510 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001511vim_regcomp_had_eol(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001512{
1513 return had_eol;
1514}
1515#endif
1516
Bram Moolenaar0270f382018-07-17 05:43:58 +02001517// variables used for parsing
1518static int at_start; // True when on the first character
1519static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001520
Bram Moolenaar071d4272004-06-13 20:20:40 +00001521/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001522 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001523 *
1524 * Caller must absorb opening parenthesis.
1525 *
1526 * Combining parenthesis handling with the base level of regular expression
1527 * is a trifle forced, but the need to tie the tails of the branches to what
1528 * follows makes it hard to avoid.
1529 */
1530 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001531reg(
1532 int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1533 int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001534{
1535 char_u *ret;
1536 char_u *br;
1537 char_u *ender;
1538 int parno = 0;
1539 int flags;
1540
1541 *flagp = HASWIDTH; /* Tentatively. */
1542
1543#ifdef FEAT_SYN_HL
1544 if (paren == REG_ZPAREN)
1545 {
1546 /* Make a ZOPEN node. */
1547 if (regnzpar >= NSUBEXP)
1548 EMSG_RET_NULL(_("E50: Too many \\z("));
1549 parno = regnzpar;
1550 regnzpar++;
1551 ret = regnode(ZOPEN + parno);
1552 }
1553 else
1554#endif
1555 if (paren == REG_PAREN)
1556 {
1557 /* Make a MOPEN node. */
1558 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001559 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001560 parno = regnpar;
1561 ++regnpar;
1562 ret = regnode(MOPEN + parno);
1563 }
1564 else if (paren == REG_NPAREN)
1565 {
1566 /* Make a NOPEN node. */
1567 ret = regnode(NOPEN);
1568 }
1569 else
1570 ret = NULL;
1571
1572 /* Pick up the branches, linking them together. */
1573 br = regbranch(&flags);
1574 if (br == NULL)
1575 return NULL;
1576 if (ret != NULL)
1577 regtail(ret, br); /* [MZ]OPEN -> first. */
1578 else
1579 ret = br;
1580 /* If one of the branches can be zero-width, the whole thing can.
1581 * If one of the branches has * at start or matches a line-break, the
1582 * whole thing can. */
1583 if (!(flags & HASWIDTH))
1584 *flagp &= ~HASWIDTH;
1585 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1586 while (peekchr() == Magic('|'))
1587 {
1588 skipchr();
1589 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001590 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001591 return NULL;
1592 regtail(ret, br); /* BRANCH -> BRANCH. */
1593 if (!(flags & HASWIDTH))
1594 *flagp &= ~HASWIDTH;
1595 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1596 }
1597
1598 /* Make a closing node, and hook it on the end. */
1599 ender = regnode(
1600#ifdef FEAT_SYN_HL
1601 paren == REG_ZPAREN ? ZCLOSE + parno :
1602#endif
1603 paren == REG_PAREN ? MCLOSE + parno :
1604 paren == REG_NPAREN ? NCLOSE : END);
1605 regtail(ret, ender);
1606
1607 /* Hook the tails of the branches to the closing node. */
1608 for (br = ret; br != NULL; br = regnext(br))
1609 regoptail(br, ender);
1610
1611 /* Check for proper termination. */
1612 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1613 {
1614#ifdef FEAT_SYN_HL
1615 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001616 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001617 else
1618#endif
1619 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001620 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001621 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001622 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001623 }
1624 else if (paren == REG_NOPAREN && peekchr() != NUL)
1625 {
1626 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001627 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001628 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001629 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001630 /* NOTREACHED */
1631 }
1632 /*
1633 * Here we set the flag allowing back references to this set of
1634 * parentheses.
1635 */
1636 if (paren == REG_PAREN)
1637 had_endbrace[parno] = TRUE; /* have seen the close paren */
1638 return ret;
1639}
1640
1641/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001642 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001643 * Implements the & operator.
1644 */
1645 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001646regbranch(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001647{
1648 char_u *ret;
1649 char_u *chain = NULL;
1650 char_u *latest;
1651 int flags;
1652
1653 *flagp = WORST | HASNL; /* Tentatively. */
1654
1655 ret = regnode(BRANCH);
1656 for (;;)
1657 {
1658 latest = regconcat(&flags);
1659 if (latest == NULL)
1660 return NULL;
1661 /* If one of the branches has width, the whole thing has. If one of
1662 * the branches anchors at start-of-line, the whole thing does.
1663 * If one of the branches uses look-behind, the whole thing does. */
1664 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1665 /* If one of the branches doesn't match a line-break, the whole thing
1666 * doesn't. */
1667 *flagp &= ~HASNL | (flags & HASNL);
1668 if (chain != NULL)
1669 regtail(chain, latest);
1670 if (peekchr() != Magic('&'))
1671 break;
1672 skipchr();
1673 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001674 if (reg_toolong)
1675 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001676 reginsert(MATCH, latest);
1677 chain = latest;
1678 }
1679
1680 return ret;
1681}
1682
1683/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001684 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001685 * Implements the concatenation operator.
1686 */
1687 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001688regconcat(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001689{
1690 char_u *first = NULL;
1691 char_u *chain = NULL;
1692 char_u *latest;
1693 int flags;
1694 int cont = TRUE;
1695
1696 *flagp = WORST; /* Tentatively. */
1697
1698 while (cont)
1699 {
1700 switch (peekchr())
1701 {
1702 case NUL:
1703 case Magic('|'):
1704 case Magic('&'):
1705 case Magic(')'):
1706 cont = FALSE;
1707 break;
1708 case Magic('Z'):
1709#ifdef FEAT_MBYTE
1710 regflags |= RF_ICOMBINE;
1711#endif
1712 skipchr_keepstart();
1713 break;
1714 case Magic('c'):
1715 regflags |= RF_ICASE;
1716 skipchr_keepstart();
1717 break;
1718 case Magic('C'):
1719 regflags |= RF_NOICASE;
1720 skipchr_keepstart();
1721 break;
1722 case Magic('v'):
1723 reg_magic = MAGIC_ALL;
1724 skipchr_keepstart();
1725 curchr = -1;
1726 break;
1727 case Magic('m'):
1728 reg_magic = MAGIC_ON;
1729 skipchr_keepstart();
1730 curchr = -1;
1731 break;
1732 case Magic('M'):
1733 reg_magic = MAGIC_OFF;
1734 skipchr_keepstart();
1735 curchr = -1;
1736 break;
1737 case Magic('V'):
1738 reg_magic = MAGIC_NONE;
1739 skipchr_keepstart();
1740 curchr = -1;
1741 break;
1742 default:
1743 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001744 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001745 return NULL;
1746 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1747 if (chain == NULL) /* First piece. */
1748 *flagp |= flags & SPSTART;
1749 else
1750 regtail(chain, latest);
1751 chain = latest;
1752 if (first == NULL)
1753 first = latest;
1754 break;
1755 }
1756 }
1757 if (first == NULL) /* Loop ran zero times. */
1758 first = regnode(NOTHING);
1759 return first;
1760}
1761
1762/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001763 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001764 *
1765 * Note that the branching code sequences used for = and the general cases
1766 * of * and + are somewhat optimized: they use the same NOTHING node as
1767 * both the endmarker for their branch list and the body of the last branch.
1768 * It might seem that this node could be dispensed with entirely, but the
1769 * endmarker role is not redundant.
1770 */
1771 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001772regpiece(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001773{
1774 char_u *ret;
1775 int op;
1776 char_u *next;
1777 int flags;
1778 long minval;
1779 long maxval;
1780
1781 ret = regatom(&flags);
1782 if (ret == NULL)
1783 return NULL;
1784
1785 op = peekchr();
1786 if (re_multi_type(op) == NOT_MULTI)
1787 {
1788 *flagp = flags;
1789 return ret;
1790 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001791 /* default flags */
1792 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1793
1794 skipchr();
1795 switch (op)
1796 {
1797 case Magic('*'):
1798 if (flags & SIMPLE)
1799 reginsert(STAR, ret);
1800 else
1801 {
1802 /* Emit x* as (x&|), where & means "self". */
1803 reginsert(BRANCH, ret); /* Either x */
1804 regoptail(ret, regnode(BACK)); /* and loop */
1805 regoptail(ret, ret); /* back */
1806 regtail(ret, regnode(BRANCH)); /* or */
1807 regtail(ret, regnode(NOTHING)); /* null. */
1808 }
1809 break;
1810
1811 case Magic('+'):
1812 if (flags & SIMPLE)
1813 reginsert(PLUS, ret);
1814 else
1815 {
1816 /* Emit x+ as x(&|), where & means "self". */
1817 next = regnode(BRANCH); /* Either */
1818 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001819 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001820 regtail(next, regnode(BRANCH)); /* or */
1821 regtail(ret, regnode(NOTHING)); /* null. */
1822 }
1823 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1824 break;
1825
1826 case Magic('@'):
1827 {
1828 int lop = END;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001829 long nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001830
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001831 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001832 switch (no_Magic(getchr()))
1833 {
1834 case '=': lop = MATCH; break; /* \@= */
1835 case '!': lop = NOMATCH; break; /* \@! */
1836 case '>': lop = SUBPAT; break; /* \@> */
1837 case '<': switch (no_Magic(getchr()))
1838 {
1839 case '=': lop = BEHIND; break; /* \@<= */
1840 case '!': lop = NOBEHIND; break; /* \@<! */
1841 }
1842 }
1843 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001844 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001845 reg_magic == MAGIC_ALL);
1846 /* Look behind must match with behind_pos. */
1847 if (lop == BEHIND || lop == NOBEHIND)
1848 {
1849 regtail(ret, regnode(BHPOS));
1850 *flagp |= HASLOOKBH;
1851 }
1852 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001853 if (lop == BEHIND || lop == NOBEHIND)
1854 {
1855 if (nr < 0)
1856 nr = 0; /* no limit is same as zero limit */
1857 reginsert_nr(lop, nr, ret);
1858 }
1859 else
1860 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001861 break;
1862 }
1863
1864 case Magic('?'):
1865 case Magic('='):
1866 /* Emit x= as (x|) */
1867 reginsert(BRANCH, ret); /* Either x */
1868 regtail(ret, regnode(BRANCH)); /* or */
1869 next = regnode(NOTHING); /* null. */
1870 regtail(ret, next);
1871 regoptail(ret, next);
1872 break;
1873
1874 case Magic('{'):
1875 if (!read_limits(&minval, &maxval))
1876 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001877 if (flags & SIMPLE)
1878 {
1879 reginsert(BRACE_SIMPLE, ret);
1880 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1881 }
1882 else
1883 {
1884 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001885 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001886 reg_magic == MAGIC_ALL);
1887 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1888 regoptail(ret, regnode(BACK));
1889 regoptail(ret, ret);
1890 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1891 ++num_complex_braces;
1892 }
1893 if (minval > 0 && maxval > 0)
1894 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1895 break;
1896 }
1897 if (re_multi_type(peekchr()) != NOT_MULTI)
1898 {
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001899 // Can't have a multi follow a multi.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001900 if (peekchr() == Magic('*'))
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001901 EMSG2_RET_NULL(_("E61: Nested %s*"), reg_magic >= MAGIC_ON);
1902 EMSG3_RET_NULL(_("E62: Nested %s%c"), reg_magic == MAGIC_ALL,
1903 no_Magic(peekchr()));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001904 }
1905
1906 return ret;
1907}
1908
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001909/* When making changes to classchars also change nfa_classcodes. */
1910static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1911static int classcodes[] = {
1912 ANY, IDENT, SIDENT, KWORD, SKWORD,
1913 FNAME, SFNAME, PRINT, SPRINT,
1914 WHITE, NWHITE, DIGIT, NDIGIT,
1915 HEX, NHEX, OCTAL, NOCTAL,
1916 WORD, NWORD, HEAD, NHEAD,
1917 ALPHA, NALPHA, LOWER, NLOWER,
1918 UPPER, NUPPER
1919};
1920
Bram Moolenaar071d4272004-06-13 20:20:40 +00001921/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001922 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923 *
1924 * Optimization: gobbles an entire sequence of ordinary characters so that
1925 * it can turn them into a single node, which is smaller to store and
1926 * faster to run. Don't do this when one_exactly is set.
1927 */
1928 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001929regatom(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001930{
1931 char_u *ret;
1932 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001933 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001934 char_u *p;
1935 int extra = 0;
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001936 int save_prev_at_start = prev_at_start;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001937
1938 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001939
1940 c = getchr();
1941 switch (c)
1942 {
1943 case Magic('^'):
1944 ret = regnode(BOL);
1945 break;
1946
1947 case Magic('$'):
1948 ret = regnode(EOL);
1949#if defined(FEAT_SYN_HL) || defined(PROTO)
1950 had_eol = TRUE;
1951#endif
1952 break;
1953
1954 case Magic('<'):
1955 ret = regnode(BOW);
1956 break;
1957
1958 case Magic('>'):
1959 ret = regnode(EOW);
1960 break;
1961
1962 case Magic('_'):
1963 c = no_Magic(getchr());
1964 if (c == '^') /* "\_^" is start-of-line */
1965 {
1966 ret = regnode(BOL);
1967 break;
1968 }
1969 if (c == '$') /* "\_$" is end-of-line */
1970 {
1971 ret = regnode(EOL);
1972#if defined(FEAT_SYN_HL) || defined(PROTO)
1973 had_eol = TRUE;
1974#endif
1975 break;
1976 }
1977
1978 extra = ADD_NL;
1979 *flagp |= HASNL;
1980
1981 /* "\_[" is character range plus newline */
1982 if (c == '[')
1983 goto collection;
1984
1985 /* "\_x" is character class plus newline */
Bram Moolenaar2f40d122017-10-24 21:49:36 +02001986 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001987
1988 /*
1989 * Character classes.
1990 */
1991 case Magic('.'):
1992 case Magic('i'):
1993 case Magic('I'):
1994 case Magic('k'):
1995 case Magic('K'):
1996 case Magic('f'):
1997 case Magic('F'):
1998 case Magic('p'):
1999 case Magic('P'):
2000 case Magic('s'):
2001 case Magic('S'):
2002 case Magic('d'):
2003 case Magic('D'):
2004 case Magic('x'):
2005 case Magic('X'):
2006 case Magic('o'):
2007 case Magic('O'):
2008 case Magic('w'):
2009 case Magic('W'):
2010 case Magic('h'):
2011 case Magic('H'):
2012 case Magic('a'):
2013 case Magic('A'):
2014 case Magic('l'):
2015 case Magic('L'):
2016 case Magic('u'):
2017 case Magic('U'):
2018 p = vim_strchr(classchars, no_Magic(c));
2019 if (p == NULL)
2020 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002021#ifdef FEAT_MBYTE
2022 /* When '.' is followed by a composing char ignore the dot, so that
2023 * the composing char is matched here. */
2024 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2025 {
2026 c = getchr();
2027 goto do_multibyte;
2028 }
2029#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002030 ret = regnode(classcodes[p - classchars] + extra);
2031 *flagp |= HASWIDTH | SIMPLE;
2032 break;
2033
2034 case Magic('n'):
2035 if (reg_string)
2036 {
2037 /* In a string "\n" matches a newline character. */
2038 ret = regnode(EXACTLY);
2039 regc(NL);
2040 regc(NUL);
2041 *flagp |= HASWIDTH | SIMPLE;
2042 }
2043 else
2044 {
2045 /* In buffer text "\n" matches the end of a line. */
2046 ret = regnode(NEWL);
2047 *flagp |= HASWIDTH | HASNL;
2048 }
2049 break;
2050
2051 case Magic('('):
2052 if (one_exactly)
2053 EMSG_ONE_RET_NULL;
2054 ret = reg(REG_PAREN, &flags);
2055 if (ret == NULL)
2056 return NULL;
2057 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2058 break;
2059
2060 case NUL:
2061 case Magic('|'):
2062 case Magic('&'):
2063 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002064 if (one_exactly)
2065 EMSG_ONE_RET_NULL;
Bram Moolenaar95f09602016-11-10 20:01:45 +01002066 IEMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002067 /* NOTREACHED */
2068
2069 case Magic('='):
2070 case Magic('?'):
2071 case Magic('+'):
2072 case Magic('@'):
2073 case Magic('{'):
2074 case Magic('*'):
2075 c = no_Magic(c);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01002076 EMSG3_RET_NULL(_("E64: %s%c follows nothing"),
2077 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL), c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002078 /* NOTREACHED */
2079
2080 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002081 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002082 {
2083 char_u *lp;
2084
2085 ret = regnode(EXACTLY);
2086 lp = reg_prev_sub;
2087 while (*lp != NUL)
2088 regc(*lp++);
2089 regc(NUL);
2090 if (*reg_prev_sub != NUL)
2091 {
2092 *flagp |= HASWIDTH;
2093 if ((lp - reg_prev_sub) == 1)
2094 *flagp |= SIMPLE;
2095 }
2096 }
2097 else
2098 EMSG_RET_NULL(_(e_nopresub));
2099 break;
2100
2101 case Magic('1'):
2102 case Magic('2'):
2103 case Magic('3'):
2104 case Magic('4'):
2105 case Magic('5'):
2106 case Magic('6'):
2107 case Magic('7'):
2108 case Magic('8'):
2109 case Magic('9'):
2110 {
2111 int refnum;
2112
2113 refnum = c - Magic('0');
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02002114 if (!seen_endbrace(refnum))
2115 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002116 ret = regnode(BACKREF + refnum);
2117 }
2118 break;
2119
Bram Moolenaar071d4272004-06-13 20:20:40 +00002120 case Magic('z'):
2121 {
2122 c = no_Magic(getchr());
2123 switch (c)
2124 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002125#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002126 case '(': if ((reg_do_extmatch & REX_SET) == 0)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002127 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002128 if (one_exactly)
2129 EMSG_ONE_RET_NULL;
2130 ret = reg(REG_ZPAREN, &flags);
2131 if (ret == NULL)
2132 return NULL;
2133 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2134 re_has_z = REX_SET;
2135 break;
2136
2137 case '1':
2138 case '2':
2139 case '3':
2140 case '4':
2141 case '5':
2142 case '6':
2143 case '7':
2144 case '8':
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002145 case '9': if ((reg_do_extmatch & REX_USE) == 0)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002146 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002147 ret = regnode(ZREF + c - '0');
2148 re_has_z = REX_USE;
2149 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002150#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002151
2152 case 's': ret = regnode(MOPEN + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002153 if (re_mult_next("\\zs") == FAIL)
2154 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002155 break;
2156
2157 case 'e': ret = regnode(MCLOSE + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002158 if (re_mult_next("\\ze") == FAIL)
2159 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002160 break;
2161
2162 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2163 }
2164 }
2165 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002166
2167 case Magic('%'):
2168 {
2169 c = no_Magic(getchr());
2170 switch (c)
2171 {
2172 /* () without a back reference */
2173 case '(':
2174 if (one_exactly)
2175 EMSG_ONE_RET_NULL;
2176 ret = reg(REG_NPAREN, &flags);
2177 if (ret == NULL)
2178 return NULL;
2179 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2180 break;
2181
2182 /* Catch \%^ and \%$ regardless of where they appear in the
2183 * pattern -- regardless of whether or not it makes sense. */
2184 case '^':
2185 ret = regnode(RE_BOF);
2186 break;
2187
2188 case '$':
2189 ret = regnode(RE_EOF);
2190 break;
2191
2192 case '#':
2193 ret = regnode(CURSOR);
2194 break;
2195
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002196 case 'V':
2197 ret = regnode(RE_VISUAL);
2198 break;
2199
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002200 case 'C':
2201 ret = regnode(RE_COMPOSING);
2202 break;
2203
Bram Moolenaar071d4272004-06-13 20:20:40 +00002204 /* \%[abc]: Emit as a list of branches, all ending at the last
2205 * branch which matches nothing. */
2206 case '[':
2207 if (one_exactly) /* doesn't nest */
2208 EMSG_ONE_RET_NULL;
2209 {
2210 char_u *lastbranch;
2211 char_u *lastnode = NULL;
2212 char_u *br;
2213
2214 ret = NULL;
2215 while ((c = getchr()) != ']')
2216 {
2217 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002218 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002219 reg_magic == MAGIC_ALL);
2220 br = regnode(BRANCH);
2221 if (ret == NULL)
2222 ret = br;
2223 else
2224 regtail(lastnode, br);
2225
2226 ungetchr();
2227 one_exactly = TRUE;
2228 lastnode = regatom(flagp);
2229 one_exactly = FALSE;
2230 if (lastnode == NULL)
2231 return NULL;
2232 }
2233 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002234 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002235 reg_magic == MAGIC_ALL);
2236 lastbranch = regnode(BRANCH);
2237 br = regnode(NOTHING);
2238 if (ret != JUST_CALC_SIZE)
2239 {
2240 regtail(lastnode, br);
2241 regtail(lastbranch, br);
2242 /* connect all branches to the NOTHING
2243 * branch at the end */
2244 for (br = ret; br != lastnode; )
2245 {
2246 if (OP(br) == BRANCH)
2247 {
2248 regtail(br, lastbranch);
2249 br = OPERAND(br);
2250 }
2251 else
2252 br = regnext(br);
2253 }
2254 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002255 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002256 break;
2257 }
2258
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002259 case 'd': /* %d123 decimal */
2260 case 'o': /* %o123 octal */
2261 case 'x': /* %xab hex 2 */
2262 case 'u': /* %uabcd hex 4 */
2263 case 'U': /* %U1234abcd hex 8 */
2264 {
Bram Moolenaar4c22a912017-11-02 22:29:38 +01002265 long i;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002266
2267 switch (c)
2268 {
2269 case 'd': i = getdecchrs(); break;
2270 case 'o': i = getoctchrs(); break;
2271 case 'x': i = gethexchrs(2); break;
2272 case 'u': i = gethexchrs(4); break;
2273 case 'U': i = gethexchrs(8); break;
2274 default: i = -1; break;
2275 }
2276
2277 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002278 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002279 _("E678: Invalid character after %s%%[dxouU]"),
2280 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002281#ifdef FEAT_MBYTE
2282 if (use_multibytecode(i))
2283 ret = regnode(MULTIBYTECODE);
2284 else
2285#endif
2286 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002287 if (i == 0)
2288 regc(0x0a);
2289 else
2290#ifdef FEAT_MBYTE
2291 regmbc(i);
2292#else
2293 regc(i);
2294#endif
2295 regc(NUL);
2296 *flagp |= HASWIDTH;
2297 break;
2298 }
2299
Bram Moolenaar071d4272004-06-13 20:20:40 +00002300 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002301 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2302 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002303 {
2304 long_u n = 0;
2305 int cmp;
2306
2307 cmp = c;
2308 if (cmp == '<' || cmp == '>')
2309 c = getchr();
2310 while (VIM_ISDIGIT(c))
2311 {
2312 n = n * 10 + (c - '0');
2313 c = getchr();
2314 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002315 if (c == '\'' && n == 0)
2316 {
2317 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2318 c = getchr();
2319 ret = regnode(RE_MARK);
2320 if (ret == JUST_CALC_SIZE)
2321 regsize += 2;
2322 else
2323 {
2324 *regcode++ = c;
2325 *regcode++ = cmp;
2326 }
2327 break;
2328 }
2329 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002330 {
2331 if (c == 'l')
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002332 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002333 ret = regnode(RE_LNUM);
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002334 if (save_prev_at_start)
2335 at_start = TRUE;
2336 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002337 else if (c == 'c')
2338 ret = regnode(RE_COL);
2339 else
2340 ret = regnode(RE_VCOL);
2341 if (ret == JUST_CALC_SIZE)
2342 regsize += 5;
2343 else
2344 {
2345 /* put the number and the optional
2346 * comparator after the opcode */
2347 regcode = re_put_long(regcode, n);
2348 *regcode++ = cmp;
2349 }
2350 break;
2351 }
2352 }
2353
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002354 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002355 reg_magic == MAGIC_ALL);
2356 }
2357 }
2358 break;
2359
2360 case Magic('['):
2361collection:
2362 {
2363 char_u *lp;
2364
2365 /*
2366 * If there is no matching ']', we assume the '[' is a normal
2367 * character. This makes 'incsearch' and ":help [" work.
2368 */
2369 lp = skip_anyof(regparse);
2370 if (*lp == ']') /* there is a matching ']' */
2371 {
2372 int startc = -1; /* > 0 when next '-' is a range */
2373 int endc;
2374
2375 /*
2376 * In a character class, different parsing rules apply.
2377 * Not even \ is special anymore, nothing is.
2378 */
2379 if (*regparse == '^') /* Complement of range. */
2380 {
2381 ret = regnode(ANYBUT + extra);
2382 regparse++;
2383 }
2384 else
2385 ret = regnode(ANYOF + extra);
2386
2387 /* At the start ']' and '-' mean the literal character. */
2388 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002389 {
2390 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002391 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002392 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002393
2394 while (*regparse != NUL && *regparse != ']')
2395 {
2396 if (*regparse == '-')
2397 {
2398 ++regparse;
2399 /* The '-' is not used for a range at the end and
2400 * after or before a '\n'. */
2401 if (*regparse == ']' || *regparse == NUL
2402 || startc == -1
2403 || (regparse[0] == '\\' && regparse[1] == 'n'))
2404 {
2405 regc('-');
2406 startc = '-'; /* [--x] is a range */
2407 }
2408 else
2409 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002410 /* Also accept "a-[.z.]" */
2411 endc = 0;
2412 if (*regparse == '[')
2413 endc = get_coll_element(&regparse);
2414 if (endc == 0)
2415 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002416#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002417 if (has_mbyte)
2418 endc = mb_ptr2char_adv(&regparse);
2419 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002420#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002421 endc = *regparse++;
2422 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002423
2424 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002425 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002426 endc = coll_get_char();
2427
Bram Moolenaar071d4272004-06-13 20:20:40 +00002428 if (startc > endc)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002429 EMSG_RET_NULL(_(e_reverse_range));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002430#ifdef FEAT_MBYTE
2431 if (has_mbyte && ((*mb_char2len)(startc) > 1
2432 || (*mb_char2len)(endc) > 1))
2433 {
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002434 /* Limit to a range of 256 chars. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002435 if (endc > startc + 256)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002436 EMSG_RET_NULL(_(e_large_class));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002437 while (++startc <= endc)
2438 regmbc(startc);
2439 }
2440 else
2441#endif
2442 {
2443#ifdef EBCDIC
2444 int alpha_only = FALSE;
2445
2446 /* for alphabetical range skip the gaps
2447 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2448 if (isalpha(startc) && isalpha(endc))
2449 alpha_only = TRUE;
2450#endif
2451 while (++startc <= endc)
2452#ifdef EBCDIC
2453 if (!alpha_only || isalpha(startc))
2454#endif
2455 regc(startc);
2456 }
2457 startc = -1;
2458 }
2459 }
2460 /*
2461 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2462 * accepts "\t", "\e", etc., but only when the 'l' flag in
2463 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002464 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002465 */
2466 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002467 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002468 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002469 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002470 && vim_strchr(REGEXP_ABBR,
2471 regparse[1]) != NULL)))
2472 {
2473 regparse++;
2474 if (*regparse == 'n')
2475 {
2476 /* '\n' in range: also match NL */
2477 if (ret != JUST_CALC_SIZE)
2478 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002479 /* Using \n inside [^] does not change what
2480 * matches. "[^\n]" is the same as ".". */
2481 if (*ret == ANYOF)
2482 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002483 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002484 *flagp |= HASNL;
2485 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002486 /* else: must have had a \n already */
2487 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002488 regparse++;
2489 startc = -1;
2490 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002491 else if (*regparse == 'd'
2492 || *regparse == 'o'
2493 || *regparse == 'x'
2494 || *regparse == 'u'
2495 || *regparse == 'U')
2496 {
2497 startc = coll_get_char();
2498 if (startc == 0)
2499 regc(0x0a);
2500 else
2501#ifdef FEAT_MBYTE
2502 regmbc(startc);
2503#else
2504 regc(startc);
2505#endif
2506 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002507 else
2508 {
2509 startc = backslash_trans(*regparse++);
2510 regc(startc);
2511 }
2512 }
2513 else if (*regparse == '[')
2514 {
2515 int c_class;
2516 int cu;
2517
Bram Moolenaardf177f62005-02-22 08:39:57 +00002518 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002519 startc = -1;
2520 /* Characters assumed to be 8 bits! */
2521 switch (c_class)
2522 {
2523 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002524 c_class = get_equi_class(&regparse);
2525 if (c_class != 0)
2526 {
2527 /* produce equivalence class */
2528 reg_equi_class(c_class);
2529 }
2530 else if ((c_class =
2531 get_coll_element(&regparse)) != 0)
2532 {
2533 /* produce a collating element */
2534 regmbc(c_class);
2535 }
2536 else
2537 {
2538 /* literal '[', allow [[-x] as a range */
2539 startc = *regparse++;
2540 regc(startc);
2541 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002542 break;
2543 case CLASS_ALNUM:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002544 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002545 if (isalnum(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002546 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002547 break;
2548 case CLASS_ALPHA:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002549 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002550 if (isalpha(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002551 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002552 break;
2553 case CLASS_BLANK:
2554 regc(' ');
2555 regc('\t');
2556 break;
2557 case CLASS_CNTRL:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002558 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002559 if (iscntrl(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002560 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002561 break;
2562 case CLASS_DIGIT:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002563 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002564 if (VIM_ISDIGIT(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002565 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002566 break;
2567 case CLASS_GRAPH:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002568 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002569 if (isgraph(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002570 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002571 break;
2572 case CLASS_LOWER:
2573 for (cu = 1; cu <= 255; cu++)
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002574 if (MB_ISLOWER(cu) && cu != 170
2575 && cu != 186)
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002576 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002577 break;
2578 case CLASS_PRINT:
2579 for (cu = 1; cu <= 255; cu++)
2580 if (vim_isprintc(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002581 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002582 break;
2583 case CLASS_PUNCT:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002584 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002585 if (ispunct(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002586 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002587 break;
2588 case CLASS_SPACE:
2589 for (cu = 9; cu <= 13; cu++)
2590 regc(cu);
2591 regc(' ');
2592 break;
2593 case CLASS_UPPER:
2594 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002595 if (MB_ISUPPER(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002596 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002597 break;
2598 case CLASS_XDIGIT:
2599 for (cu = 1; cu <= 255; cu++)
2600 if (vim_isxdigit(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002601 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002602 break;
2603 case CLASS_TAB:
2604 regc('\t');
2605 break;
2606 case CLASS_RETURN:
2607 regc('\r');
2608 break;
2609 case CLASS_BACKSPACE:
2610 regc('\b');
2611 break;
2612 case CLASS_ESCAPE:
2613 regc('\033');
2614 break;
2615 }
2616 }
2617 else
2618 {
2619#ifdef FEAT_MBYTE
2620 if (has_mbyte)
2621 {
2622 int len;
2623
2624 /* produce a multibyte character, including any
2625 * following composing characters */
2626 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002627 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002628 if (enc_utf8 && utf_char2len(startc) != len)
2629 startc = -1; /* composing chars */
2630 while (--len >= 0)
2631 regc(*regparse++);
2632 }
2633 else
2634#endif
2635 {
2636 startc = *regparse++;
2637 regc(startc);
2638 }
2639 }
2640 }
2641 regc(NUL);
2642 prevchr_len = 1; /* last char was the ']' */
2643 if (*regparse != ']')
2644 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2645 skipchr(); /* let's be friends with the lexer again */
2646 *flagp |= HASWIDTH | SIMPLE;
2647 break;
2648 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002649 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002650 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002651 }
2652 /* FALLTHROUGH */
2653
2654 default:
2655 {
2656 int len;
2657
2658#ifdef FEAT_MBYTE
2659 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002660 * before a multi and when it's a composing char. */
2661 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002662 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002663do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002664 ret = regnode(MULTIBYTECODE);
2665 regmbc(c);
2666 *flagp |= HASWIDTH | SIMPLE;
2667 break;
2668 }
2669#endif
2670
2671 ret = regnode(EXACTLY);
2672
2673 /*
2674 * Append characters as long as:
2675 * - there is no following multi, we then need the character in
2676 * front of it as a single character operand
2677 * - not running into a Magic character
2678 * - "one_exactly" is not set
2679 * But always emit at least one character. Might be a Multi,
2680 * e.g., a "[" without matching "]".
2681 */
2682 for (len = 0; c != NUL && (len == 0
2683 || (re_multi_type(peekchr()) == NOT_MULTI
2684 && !one_exactly
2685 && !is_Magic(c))); ++len)
2686 {
2687 c = no_Magic(c);
2688#ifdef FEAT_MBYTE
2689 if (has_mbyte)
2690 {
2691 regmbc(c);
2692 if (enc_utf8)
2693 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002694 int l;
2695
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002696 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002697 for (;;)
2698 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002699 l = utf_ptr2len(regparse);
2700 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002701 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002702 regmbc(utf_ptr2char(regparse));
2703 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002704 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002705 }
2706 }
2707 else
2708#endif
2709 regc(c);
2710 c = getchr();
2711 }
2712 ungetchr();
2713
2714 regc(NUL);
2715 *flagp |= HASWIDTH;
2716 if (len == 1)
2717 *flagp |= SIMPLE;
2718 }
2719 break;
2720 }
2721
2722 return ret;
2723}
2724
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002725#ifdef FEAT_MBYTE
2726/*
2727 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2728 * character "c".
2729 */
2730 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002731use_multibytecode(int c)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002732{
2733 return has_mbyte && (*mb_char2len)(c) > 1
2734 && (re_multi_type(peekchr()) != NOT_MULTI
2735 || (enc_utf8 && utf_iscomposing(c)));
2736}
2737#endif
2738
Bram Moolenaar071d4272004-06-13 20:20:40 +00002739/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002740 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002741 * Return pointer to generated code.
2742 */
2743 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002744regnode(int op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002745{
2746 char_u *ret;
2747
2748 ret = regcode;
2749 if (ret == JUST_CALC_SIZE)
2750 regsize += 3;
2751 else
2752 {
2753 *regcode++ = op;
2754 *regcode++ = NUL; /* Null "next" pointer. */
2755 *regcode++ = NUL;
2756 }
2757 return ret;
2758}
2759
2760/*
2761 * Emit (if appropriate) a byte of code
2762 */
2763 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002764regc(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002765{
2766 if (regcode == JUST_CALC_SIZE)
2767 regsize++;
2768 else
2769 *regcode++ = b;
2770}
2771
2772#ifdef FEAT_MBYTE
2773/*
2774 * Emit (if appropriate) a multi-byte character of code
2775 */
2776 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002777regmbc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002778{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002779 if (!has_mbyte && c > 0xff)
2780 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002781 if (regcode == JUST_CALC_SIZE)
2782 regsize += (*mb_char2len)(c);
2783 else
2784 regcode += (*mb_char2bytes)(c, regcode);
2785}
2786#endif
2787
2788/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002789 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002790 *
2791 * Means relocating the operand.
2792 */
2793 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002794reginsert(int op, char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002795{
2796 char_u *src;
2797 char_u *dst;
2798 char_u *place;
2799
2800 if (regcode == JUST_CALC_SIZE)
2801 {
2802 regsize += 3;
2803 return;
2804 }
2805 src = regcode;
2806 regcode += 3;
2807 dst = regcode;
2808 while (src > opnd)
2809 *--dst = *--src;
2810
2811 place = opnd; /* Op node, where operand used to be. */
2812 *place++ = op;
2813 *place++ = NUL;
2814 *place = NUL;
2815}
2816
2817/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002818 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002819 * Add a number to the operator.
2820 */
2821 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002822reginsert_nr(int op, long val, char_u *opnd)
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002823{
2824 char_u *src;
2825 char_u *dst;
2826 char_u *place;
2827
2828 if (regcode == JUST_CALC_SIZE)
2829 {
2830 regsize += 7;
2831 return;
2832 }
2833 src = regcode;
2834 regcode += 7;
2835 dst = regcode;
2836 while (src > opnd)
2837 *--dst = *--src;
2838
2839 place = opnd; /* Op node, where operand used to be. */
2840 *place++ = op;
2841 *place++ = NUL;
2842 *place++ = NUL;
2843 place = re_put_long(place, (long_u)val);
2844}
2845
2846/*
2847 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002848 * The operator has the given limit values as operands. Also set next pointer.
2849 *
2850 * Means relocating the operand.
2851 */
2852 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002853reginsert_limits(
2854 int op,
2855 long minval,
2856 long maxval,
2857 char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002858{
2859 char_u *src;
2860 char_u *dst;
2861 char_u *place;
2862
2863 if (regcode == JUST_CALC_SIZE)
2864 {
2865 regsize += 11;
2866 return;
2867 }
2868 src = regcode;
2869 regcode += 11;
2870 dst = regcode;
2871 while (src > opnd)
2872 *--dst = *--src;
2873
2874 place = opnd; /* Op node, where operand used to be. */
2875 *place++ = op;
2876 *place++ = NUL;
2877 *place++ = NUL;
2878 place = re_put_long(place, (long_u)minval);
2879 place = re_put_long(place, (long_u)maxval);
2880 regtail(opnd, place);
2881}
2882
2883/*
2884 * Write a long as four bytes at "p" and return pointer to the next char.
2885 */
2886 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002887re_put_long(char_u *p, long_u val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002888{
2889 *p++ = (char_u) ((val >> 24) & 0377);
2890 *p++ = (char_u) ((val >> 16) & 0377);
2891 *p++ = (char_u) ((val >> 8) & 0377);
2892 *p++ = (char_u) (val & 0377);
2893 return p;
2894}
2895
2896/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002897 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002898 */
2899 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002900regtail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002901{
2902 char_u *scan;
2903 char_u *temp;
2904 int offset;
2905
2906 if (p == JUST_CALC_SIZE)
2907 return;
2908
2909 /* Find last node. */
2910 scan = p;
2911 for (;;)
2912 {
2913 temp = regnext(scan);
2914 if (temp == NULL)
2915 break;
2916 scan = temp;
2917 }
2918
Bram Moolenaar582fd852005-03-28 20:58:01 +00002919 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002920 offset = (int)(scan - val);
2921 else
2922 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002923 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002924 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002925 * values in too many places. */
2926 if (offset > 0xffff)
2927 reg_toolong = TRUE;
2928 else
2929 {
2930 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2931 *(scan + 2) = (char_u) (offset & 0377);
2932 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002933}
2934
2935/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002936 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002937 */
2938 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002939regoptail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002940{
2941 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2942 if (p == NULL || p == JUST_CALC_SIZE
2943 || (OP(p) != BRANCH
2944 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2945 return;
2946 regtail(OPERAND(p), val);
2947}
2948
2949/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002950 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002951 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002952/*
2953 * Start parsing at "str".
2954 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002955 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002956initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002957{
2958 regparse = str;
2959 prevchr_len = 0;
2960 curchr = prevprevchr = prevchr = nextchr = -1;
2961 at_start = TRUE;
2962 prev_at_start = FALSE;
2963}
2964
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002965/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002966 * Save the current parse state, so that it can be restored and parsing
2967 * starts in the same state again.
2968 */
2969 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002970save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002971{
2972 ps->regparse = regparse;
2973 ps->prevchr_len = prevchr_len;
2974 ps->curchr = curchr;
2975 ps->prevchr = prevchr;
2976 ps->prevprevchr = prevprevchr;
2977 ps->nextchr = nextchr;
2978 ps->at_start = at_start;
2979 ps->prev_at_start = prev_at_start;
2980 ps->regnpar = regnpar;
2981}
2982
2983/*
2984 * Restore a previously saved parse state.
2985 */
2986 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002987restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002988{
2989 regparse = ps->regparse;
2990 prevchr_len = ps->prevchr_len;
2991 curchr = ps->curchr;
2992 prevchr = ps->prevchr;
2993 prevprevchr = ps->prevprevchr;
2994 nextchr = ps->nextchr;
2995 at_start = ps->at_start;
2996 prev_at_start = ps->prev_at_start;
2997 regnpar = ps->regnpar;
2998}
2999
3000
3001/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003002 * Get the next character without advancing.
3003 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003004 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003005peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003006{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003007 static int after_slash = FALSE;
3008
Bram Moolenaar071d4272004-06-13 20:20:40 +00003009 if (curchr == -1)
3010 {
3011 switch (curchr = regparse[0])
3012 {
3013 case '.':
3014 case '[':
3015 case '~':
3016 /* magic when 'magic' is on */
3017 if (reg_magic >= MAGIC_ON)
3018 curchr = Magic(curchr);
3019 break;
3020 case '(':
3021 case ')':
3022 case '{':
3023 case '%':
3024 case '+':
3025 case '=':
3026 case '?':
3027 case '@':
3028 case '!':
3029 case '&':
3030 case '|':
3031 case '<':
3032 case '>':
3033 case '#': /* future ext. */
3034 case '"': /* future ext. */
3035 case '\'': /* future ext. */
3036 case ',': /* future ext. */
3037 case '-': /* future ext. */
3038 case ':': /* future ext. */
3039 case ';': /* future ext. */
3040 case '`': /* future ext. */
3041 case '/': /* Can't be used in / command */
3042 /* magic only after "\v" */
3043 if (reg_magic == MAGIC_ALL)
3044 curchr = Magic(curchr);
3045 break;
3046 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003047 /* * is not magic as the very first character, eg "?*ptr", when
3048 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3049 * "\(\*" is not magic, thus must be magic if "after_slash" */
3050 if (reg_magic >= MAGIC_ON
3051 && !at_start
3052 && !(prev_at_start && prevchr == Magic('^'))
3053 && (after_slash
3054 || (prevchr != Magic('(')
3055 && prevchr != Magic('&')
3056 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003057 curchr = Magic('*');
3058 break;
3059 case '^':
3060 /* '^' is only magic as the very first character and if it's after
3061 * "\(", "\|", "\&' or "\n" */
3062 if (reg_magic >= MAGIC_OFF
3063 && (at_start
3064 || reg_magic == MAGIC_ALL
3065 || prevchr == Magic('(')
3066 || prevchr == Magic('|')
3067 || prevchr == Magic('&')
3068 || prevchr == Magic('n')
3069 || (no_Magic(prevchr) == '('
3070 && prevprevchr == Magic('%'))))
3071 {
3072 curchr = Magic('^');
3073 at_start = TRUE;
3074 prev_at_start = FALSE;
3075 }
3076 break;
3077 case '$':
3078 /* '$' is only magic as the very last char and if it's in front of
3079 * either "\|", "\)", "\&", or "\n" */
3080 if (reg_magic >= MAGIC_OFF)
3081 {
3082 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003083 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003084
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003085 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003086 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003087 || p[1] == 'm' || p[1] == 'M'
3088 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3089 {
3090 if (p[1] == 'v')
3091 is_magic_all = TRUE;
3092 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3093 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003094 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003095 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003096 if (p[0] == NUL
3097 || (p[0] == '\\'
3098 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3099 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003100 || (is_magic_all
3101 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003102 || reg_magic == MAGIC_ALL)
3103 curchr = Magic('$');
3104 }
3105 break;
3106 case '\\':
3107 {
3108 int c = regparse[1];
3109
3110 if (c == NUL)
3111 curchr = '\\'; /* trailing '\' */
3112 else if (
3113#ifdef EBCDIC
3114 vim_strchr(META, c)
3115#else
3116 c <= '~' && META_flags[c]
3117#endif
3118 )
3119 {
3120 /*
3121 * META contains everything that may be magic sometimes,
3122 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02003123 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +00003124 * magicness. Therefore, \ is so meta-magic that it is
3125 * not in META.
3126 */
3127 curchr = -1;
3128 prev_at_start = at_start;
3129 at_start = FALSE; /* be able to say "/\*ptr" */
3130 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003131 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003132 peekchr();
3133 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003134 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003135 curchr = toggle_Magic(curchr);
3136 }
3137 else if (vim_strchr(REGEXP_ABBR, c))
3138 {
3139 /*
3140 * Handle abbreviations, like "\t" for TAB -- webb
3141 */
3142 curchr = backslash_trans(c);
3143 }
3144 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3145 curchr = toggle_Magic(c);
3146 else
3147 {
3148 /*
3149 * Next character can never be (made) magic?
3150 * Then backslashing it won't do anything.
3151 */
3152#ifdef FEAT_MBYTE
3153 if (has_mbyte)
3154 curchr = (*mb_ptr2char)(regparse + 1);
3155 else
3156#endif
3157 curchr = c;
3158 }
3159 break;
3160 }
3161
3162#ifdef FEAT_MBYTE
3163 default:
3164 if (has_mbyte)
3165 curchr = (*mb_ptr2char)(regparse);
3166#endif
3167 }
3168 }
3169
3170 return curchr;
3171}
3172
3173/*
3174 * Eat one lexed character. Do this in a way that we can undo it.
3175 */
3176 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003177skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003178{
3179 /* peekchr() eats a backslash, do the same here */
3180 if (*regparse == '\\')
3181 prevchr_len = 1;
3182 else
3183 prevchr_len = 0;
3184 if (regparse[prevchr_len] != NUL)
3185 {
3186#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003187 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003188 /* exclude composing chars that mb_ptr2len does include */
3189 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003190 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003191 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003192 else
3193#endif
3194 ++prevchr_len;
3195 }
3196 regparse += prevchr_len;
3197 prev_at_start = at_start;
3198 at_start = FALSE;
3199 prevprevchr = prevchr;
3200 prevchr = curchr;
3201 curchr = nextchr; /* use previously unget char, or -1 */
3202 nextchr = -1;
3203}
3204
3205/*
3206 * Skip a character while keeping the value of prev_at_start for at_start.
3207 * prevchr and prevprevchr are also kept.
3208 */
3209 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003210skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003211{
3212 int as = prev_at_start;
3213 int pr = prevchr;
3214 int prpr = prevprevchr;
3215
3216 skipchr();
3217 at_start = as;
3218 prevchr = pr;
3219 prevprevchr = prpr;
3220}
3221
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003222/*
3223 * Get the next character from the pattern. We know about magic and such, so
3224 * therefore we need a lexical analyzer.
3225 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003226 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003227getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003228{
3229 int chr = peekchr();
3230
3231 skipchr();
3232 return chr;
3233}
3234
3235/*
3236 * put character back. Works only once!
3237 */
3238 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003239ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003240{
3241 nextchr = curchr;
3242 curchr = prevchr;
3243 prevchr = prevprevchr;
3244 at_start = prev_at_start;
3245 prev_at_start = FALSE;
3246
3247 /* Backup regparse, so that it's at the same position as before the
3248 * getchr(). */
3249 regparse -= prevchr_len;
3250}
3251
3252/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003253 * Get and return the value of the hex string at the current position.
3254 * Return -1 if there is no valid hex number.
3255 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003256 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003257 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003258 * The parameter controls the maximum number of input characters. This will be
3259 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3260 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003261 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003262gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003263{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003264 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003265 int c;
3266 int i;
3267
3268 for (i = 0; i < maxinputlen; ++i)
3269 {
3270 c = regparse[0];
3271 if (!vim_isxdigit(c))
3272 break;
3273 nr <<= 4;
3274 nr |= hex2nr(c);
3275 ++regparse;
3276 }
3277
3278 if (i == 0)
3279 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003280 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003281}
3282
3283/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003284 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003285 * current position. Return -1 for invalid. Consumes all digits.
3286 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003287 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003288getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003289{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003290 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003291 int c;
3292 int i;
3293
3294 for (i = 0; ; ++i)
3295 {
3296 c = regparse[0];
3297 if (c < '0' || c > '9')
3298 break;
3299 nr *= 10;
3300 nr += c - '0';
3301 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003302 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003303 }
3304
3305 if (i == 0)
3306 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003307 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003308}
3309
3310/*
3311 * get and return the value of the octal string immediately after the current
3312 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3313 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3314 * treat 8 or 9 as recognised characters. Position is updated:
3315 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003316 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003317 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003318 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003319getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003320{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003321 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003322 int c;
3323 int i;
3324
3325 for (i = 0; i < 3 && nr < 040; ++i)
3326 {
3327 c = regparse[0];
3328 if (c < '0' || c > '7')
3329 break;
3330 nr <<= 3;
3331 nr |= hex2nr(c);
3332 ++regparse;
3333 }
3334
3335 if (i == 0)
3336 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003337 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003338}
3339
3340/*
3341 * Get a number after a backslash that is inside [].
3342 * When nothing is recognized return a backslash.
3343 */
3344 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003345coll_get_char(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003346{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003347 long nr = -1;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003348
3349 switch (*regparse++)
3350 {
3351 case 'd': nr = getdecchrs(); break;
3352 case 'o': nr = getoctchrs(); break;
3353 case 'x': nr = gethexchrs(2); break;
3354 case 'u': nr = gethexchrs(4); break;
3355 case 'U': nr = gethexchrs(8); break;
3356 }
3357 if (nr < 0)
3358 {
3359 /* If getting the number fails be backwards compatible: the character
3360 * is a backslash. */
3361 --regparse;
3362 nr = '\\';
3363 }
3364 return nr;
3365}
3366
3367/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003368 * read_limits - Read two integers to be taken as a minimum and maximum.
3369 * If the first character is '-', then the range is reversed.
3370 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3371 * missing, a very big number is the default.
3372 */
3373 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003374read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003375{
3376 int reverse = FALSE;
3377 char_u *first_char;
3378 long tmp;
3379
3380 if (*regparse == '-')
3381 {
3382 /* Starts with '-', so reverse the range later */
3383 regparse++;
3384 reverse = TRUE;
3385 }
3386 first_char = regparse;
3387 *minval = getdigits(&regparse);
3388 if (*regparse == ',') /* There is a comma */
3389 {
3390 if (vim_isdigit(*++regparse))
3391 *maxval = getdigits(&regparse);
3392 else
3393 *maxval = MAX_LIMIT;
3394 }
3395 else if (VIM_ISDIGIT(*first_char))
3396 *maxval = *minval; /* It was \{n} or \{-n} */
3397 else
3398 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3399 if (*regparse == '\\')
3400 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003401 if (*regparse != '}')
Bram Moolenaar1be45b22019-01-14 22:46:15 +01003402 EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"),
3403 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003404
3405 /*
3406 * Reverse the range if there was a '-', or make sure it is in the right
3407 * order otherwise.
3408 */
3409 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3410 {
3411 tmp = *minval;
3412 *minval = *maxval;
3413 *maxval = tmp;
3414 }
3415 skipchr(); /* let's be friends with the lexer again */
3416 return OK;
3417}
3418
3419/*
3420 * vim_regexec and friends
3421 */
3422
3423/*
3424 * Global work variables for vim_regexec().
3425 */
3426
Bram Moolenaar071d4272004-06-13 20:20:40 +00003427/*
3428 * Structure used to save the current input state, when it needs to be
3429 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003430 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003431 */
3432typedef struct
3433{
3434 union
3435 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003436 char_u *ptr; /* rex.input pointer, for single-line regexp */
3437 lpos_T pos; /* rex.input pos, for multi-line regexp */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003438 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003439 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003440} regsave_T;
3441
3442/* struct to save start/end pointer/position in for \(\) */
3443typedef struct
3444{
3445 union
3446 {
3447 char_u *ptr;
3448 lpos_T pos;
3449 } se_u;
3450} save_se_T;
3451
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003452/* used for BEHIND and NOBEHIND matching */
3453typedef struct regbehind_S
3454{
3455 regsave_T save_after;
3456 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003457 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003458 save_se_T save_start[NSUBEXP];
3459 save_se_T save_end[NSUBEXP];
3460} regbehind_T;
3461
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003462static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm, int *timed_out);
Bram Moolenaar09463262017-06-17 20:55:06 +02003463static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, int *timed_out);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003464static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003465#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003466static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003467#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003468static void save_subexpr(regbehind_T *bp);
3469static void restore_subexpr(regbehind_T *bp);
3470static void reg_nextline(void);
3471static void reg_save(regsave_T *save, garray_T *gap);
3472static void reg_restore(regsave_T *save, garray_T *gap);
3473static int reg_save_equal(regsave_T *save);
3474static void save_se_multi(save_se_T *savep, lpos_T *posp);
3475static void save_se_one(save_se_T *savep, char_u **pp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003476
3477/* Save the sub-expressions before attempting a match. */
3478#define save_se(savep, posp, pp) \
3479 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3480
3481/* After a failed match restore the sub-expressions. */
3482#define restore_se(savep, posp, pp) { \
3483 if (REG_MULTI) \
3484 *(posp) = (savep)->se_u.pos; \
3485 else \
3486 *(pp) = (savep)->se_u.ptr; }
3487
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003488static int re_num_cmp(long_u val, char_u *scan);
3489static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar09463262017-06-17 20:55:06 +02003490static int regmatch(char_u *prog, proftime_T *tm, int *timed_out);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003491static int regrepeat(char_u *p, long maxcount);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003492
3493#ifdef DEBUG
3494int regnarrate = 0;
3495#endif
3496
3497/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003498 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3499 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003500 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003501 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003502static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003503static unsigned reg_tofreelen;
3504
3505/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02003506 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003507 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003508 * done:
3509 * single-line multi-line
3510 * reg_match &regmatch_T NULL
3511 * reg_mmatch NULL &regmmatch_T
3512 * reg_startp reg_match->startp <invalid>
3513 * reg_endp reg_match->endp <invalid>
3514 * reg_startpos <invalid> reg_mmatch->startpos
3515 * reg_endpos <invalid> reg_mmatch->endpos
3516 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003517 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003518 * reg_firstlnum <invalid> first line in which to search
3519 * reg_maxline 0 last line nr
3520 * reg_line_lbr FALSE or TRUE FALSE
3521 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003522typedef struct {
3523 regmatch_T *reg_match;
3524 regmmatch_T *reg_mmatch;
3525 char_u **reg_startp;
3526 char_u **reg_endp;
3527 lpos_T *reg_startpos;
3528 lpos_T *reg_endpos;
3529 win_T *reg_win;
3530 buf_T *reg_buf;
3531 linenr_T reg_firstlnum;
3532 linenr_T reg_maxline;
3533 int reg_line_lbr; /* "\n" in string is line break */
3534
Bram Moolenaar0270f382018-07-17 05:43:58 +02003535 // The current match-position is stord in these variables:
3536 linenr_T lnum; // line number, relative to first line
3537 char_u *line; // start of current line
3538 char_u *input; // current input, points into "regline"
3539
3540 int need_clear_subexpr; // subexpressions still need to be cleared
3541#ifdef FEAT_SYN_HL
3542 int need_clear_zsubexpr; // extmatch subexpressions still need to be
3543 // cleared
3544#endif
3545
Bram Moolenaar6100d022016-10-02 16:51:57 +02003546 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3547 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3548 * contains '\c' or '\C' the value is overruled. */
3549 int reg_ic;
3550
3551#ifdef FEAT_MBYTE
Bram Moolenaar0270f382018-07-17 05:43:58 +02003552 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z
Bram Moolenaar6100d022016-10-02 16:51:57 +02003553 * flag in the regexp. Defaults to false, always. */
3554 int reg_icombine;
3555#endif
3556
3557 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3558 * there is no maximum. */
3559 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003560
3561 // State for the NFA engine regexec.
3562 int nfa_has_zend; // NFA regexp \ze operator encountered.
3563 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
3564 int nfa_nsubexpr; // Number of sub expressions actually being used
3565 // during execution. 1 if only the whole match
3566 // (subexpr 0) is used.
3567 // listid is global, so that it increases on recursive calls to
3568 // nfa_regmatch(), which means we don't have to clear the lastlist field of
3569 // all the states.
3570 int nfa_listid;
3571 int nfa_alt_listid;
3572
3573#ifdef FEAT_SYN_HL
3574 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
3575#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003576} regexec_T;
3577
3578static regexec_T rex;
3579static int rex_in_use = FALSE;
3580
Bram Moolenaar071d4272004-06-13 20:20:40 +00003581
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003582/* Values for rs_state in regitem_T. */
3583typedef enum regstate_E
3584{
3585 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3586 , RS_MOPEN /* MOPEN + [0-9] */
3587 , RS_MCLOSE /* MCLOSE + [0-9] */
3588#ifdef FEAT_SYN_HL
3589 , RS_ZOPEN /* ZOPEN + [0-9] */
3590 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3591#endif
3592 , RS_BRANCH /* BRANCH */
3593 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3594 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3595 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3596 , RS_NOMATCH /* NOMATCH */
3597 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3598 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3599 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3600 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3601} regstate_T;
3602
3603/*
3604 * When there are alternatives a regstate_T is put on the regstack to remember
3605 * what we are doing.
3606 * Before it may be another type of item, depending on rs_state, to remember
3607 * more things.
3608 */
3609typedef struct regitem_S
3610{
3611 regstate_T rs_state; /* what we are doing, one of RS_ above */
3612 char_u *rs_scan; /* current node in program */
3613 union
3614 {
3615 save_se_T sesave;
3616 regsave_T regsave;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003617 } rs_un; /* room for saving rex.input */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003618 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003619} regitem_T;
3620
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003621static regitem_T *regstack_push(regstate_T state, char_u *scan);
3622static void regstack_pop(char_u **scan);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003623
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003624/* used for STAR, PLUS and BRACE_SIMPLE matching */
3625typedef struct regstar_S
3626{
3627 int nextb; /* next byte */
3628 int nextb_ic; /* next byte reverse case */
3629 long count;
3630 long minval;
3631 long maxval;
3632} regstar_T;
3633
3634/* used to store input position when a BACK was encountered, so that we now if
3635 * we made any progress since the last time. */
3636typedef struct backpos_S
3637{
3638 char_u *bp_scan; /* "scan" where BACK was encountered */
3639 regsave_T bp_pos; /* last input position */
3640} backpos_T;
3641
3642/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003643 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3644 * to avoid invoking malloc() and free() often.
3645 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3646 * or regbehind_T.
3647 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003648 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003649static garray_T regstack = {0, 0, 0, 0, NULL};
3650static garray_T backpos = {0, 0, 0, 0, NULL};
3651
3652/*
3653 * Both for regstack and backpos tables we use the following strategy of
3654 * allocation (to reduce malloc/free calls):
3655 * - Initial size is fairly small.
3656 * - When needed, the tables are grown bigger (8 times at first, double after
3657 * that).
3658 * - After executing the match we free the memory only if the array has grown.
3659 * Thus the memory is kept allocated when it's at the initial size.
3660 * This makes it fast while not keeping a lot of memory allocated.
3661 * A three times speed increase was observed when using many simple patterns.
3662 */
3663#define REGSTACK_INITIAL 2048
3664#define BACKPOS_INITIAL 64
3665
3666#if defined(EXITFREE) || defined(PROTO)
3667 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003668free_regexp_stuff(void)
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003669{
3670 ga_clear(&regstack);
3671 ga_clear(&backpos);
3672 vim_free(reg_tofree);
3673 vim_free(reg_prev_sub);
3674}
3675#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003676
Bram Moolenaar071d4272004-06-13 20:20:40 +00003677/*
3678 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3679 */
3680 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01003681reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003682{
3683 /* when looking behind for a match/no-match lnum is negative. But we
3684 * can't go before line 1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003685 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003686 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003687 if (lnum > rex.reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003688 /* Must have matched the "\n" in the last line. */
3689 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02003690 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003691}
3692
3693static regsave_T behind_pos;
3694
3695#ifdef FEAT_SYN_HL
3696static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3697static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3698static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3699static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3700#endif
3701
3702/* TRUE if using multi-line regexp. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003703#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003704
Bram Moolenaar071d4272004-06-13 20:20:40 +00003705/*
3706 * Match a regexp against a string.
3707 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3708 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003709 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003710 *
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003711 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003712 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003713 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003714bt_regexec_nl(
3715 regmatch_T *rmp,
3716 char_u *line, /* string to match against */
3717 colnr_T col, /* column to start looking for match */
3718 int line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003719{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003720 rex.reg_match = rmp;
3721 rex.reg_mmatch = NULL;
3722 rex.reg_maxline = 0;
3723 rex.reg_line_lbr = line_lbr;
3724 rex.reg_buf = curbuf;
3725 rex.reg_win = NULL;
3726 rex.reg_ic = rmp->rm_ic;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003727#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003728 rex.reg_icombine = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003729#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003730 rex.reg_maxcol = 0;
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003731
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003732 return bt_regexec_both(line, col, NULL, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003733}
3734
Bram Moolenaar071d4272004-06-13 20:20:40 +00003735/*
3736 * Match a regexp against multiple lines.
3737 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3738 * Uses curbuf for line count and 'iskeyword'.
3739 *
3740 * Return zero if there is no match. Return number of lines contained in the
3741 * match otherwise.
3742 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003743 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003744bt_regexec_multi(
3745 regmmatch_T *rmp,
3746 win_T *win, /* window in which to search or NULL */
3747 buf_T *buf, /* buffer in which to search */
3748 linenr_T lnum, /* nr of line to start looking for match */
3749 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003750 proftime_T *tm, /* timeout limit or NULL */
3751 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003752{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003753 rex.reg_match = NULL;
3754 rex.reg_mmatch = rmp;
3755 rex.reg_buf = buf;
3756 rex.reg_win = win;
3757 rex.reg_firstlnum = lnum;
3758 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
3759 rex.reg_line_lbr = FALSE;
3760 rex.reg_ic = rmp->rmm_ic;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003761#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003762 rex.reg_icombine = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003763#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003764 rex.reg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003765
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003766 return bt_regexec_both(NULL, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003767}
3768
3769/*
3770 * Match a regexp against a string ("line" points to the string) or multiple
3771 * lines ("line" is NULL, use reg_getline()).
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003772 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003773 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003774 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003775bt_regexec_both(
3776 char_u *line,
3777 colnr_T col, /* column to start looking for match */
Bram Moolenaar09463262017-06-17 20:55:06 +02003778 proftime_T *tm, /* timeout limit or NULL */
3779 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003780{
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003781 bt_regprog_T *prog;
3782 char_u *s;
3783 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003784
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003785 /* Create "regstack" and "backpos" if they are not allocated yet.
3786 * We allocate *_INITIAL amount of bytes first and then set the grow size
3787 * to much bigger value to avoid many malloc calls in case of deep regular
3788 * expressions. */
3789 if (regstack.ga_data == NULL)
3790 {
3791 /* Use an item size of 1 byte, since we push different things
3792 * onto the regstack. */
3793 ga_init2(&regstack, 1, REGSTACK_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003794 (void)ga_grow(&regstack, REGSTACK_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003795 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3796 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003797
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003798 if (backpos.ga_data == NULL)
3799 {
3800 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003801 (void)ga_grow(&backpos, BACKPOS_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003802 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3803 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003804
Bram Moolenaar071d4272004-06-13 20:20:40 +00003805 if (REG_MULTI)
3806 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003807 prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003808 line = reg_getline((linenr_T)0);
Bram Moolenaar6100d022016-10-02 16:51:57 +02003809 rex.reg_startpos = rex.reg_mmatch->startpos;
3810 rex.reg_endpos = rex.reg_mmatch->endpos;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003811 }
3812 else
3813 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003814 prog = (bt_regprog_T *)rex.reg_match->regprog;
3815 rex.reg_startp = rex.reg_match->startp;
3816 rex.reg_endp = rex.reg_match->endp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003817 }
3818
3819 /* Be paranoid... */
3820 if (prog == NULL || line == NULL)
3821 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003822 emsg(_(e_null));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003823 goto theend;
3824 }
3825
3826 /* Check validity of program. */
3827 if (prog_magic_wrong())
3828 goto theend;
3829
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003830 /* If the start column is past the maximum column: no need to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003831 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003832 goto theend;
3833
Bram Moolenaar6100d022016-10-02 16:51:57 +02003834 /* If pattern contains "\c" or "\C": overrule value of rex.reg_ic */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003835 if (prog->regflags & RF_ICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003836 rex.reg_ic = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003837 else if (prog->regflags & RF_NOICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003838 rex.reg_ic = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003839
3840#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003841 /* If pattern contains "\Z" overrule value of rex.reg_icombine */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003842 if (prog->regflags & RF_ICOMBINE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003843 rex.reg_icombine = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003844#endif
3845
3846 /* If there is a "must appear" string, look for it. */
3847 if (prog->regmust != NULL)
3848 {
3849 int c;
3850
3851#ifdef FEAT_MBYTE
3852 if (has_mbyte)
3853 c = (*mb_ptr2char)(prog->regmust);
3854 else
3855#endif
3856 c = *prog->regmust;
3857 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003858
3859 /*
3860 * This is used very often, esp. for ":global". Use three versions of
3861 * the loop to avoid overhead of conditions.
3862 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003863 if (!rex.reg_ic
Bram Moolenaar05159a02005-02-26 23:04:13 +00003864#ifdef FEAT_MBYTE
3865 && !has_mbyte
3866#endif
3867 )
3868 while ((s = vim_strbyte(s, c)) != NULL)
3869 {
3870 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3871 break; /* Found it. */
3872 ++s;
3873 }
3874#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003875 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar05159a02005-02-26 23:04:13 +00003876 while ((s = vim_strchr(s, c)) != NULL)
3877 {
3878 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3879 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003880 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003881 }
3882#endif
3883 else
3884 while ((s = cstrchr(s, c)) != NULL)
3885 {
3886 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3887 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003888 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003889 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003890 if (s == NULL) /* Not present. */
3891 goto theend;
3892 }
3893
Bram Moolenaar0270f382018-07-17 05:43:58 +02003894 rex.line = line;
3895 rex.lnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003896 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003897
3898 /* Simplest case: Anchored match need be tried only once. */
3899 if (prog->reganch)
3900 {
3901 int c;
3902
3903#ifdef FEAT_MBYTE
3904 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003905 c = (*mb_ptr2char)(rex.line + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003906 else
3907#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02003908 c = rex.line[col];
Bram Moolenaar071d4272004-06-13 20:20:40 +00003909 if (prog->regstart == NUL
3910 || prog->regstart == c
Bram Moolenaar6100d022016-10-02 16:51:57 +02003911 || (rex.reg_ic && ((
Bram Moolenaar071d4272004-06-13 20:20:40 +00003912#ifdef FEAT_MBYTE
3913 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3914 || (c < 255 && prog->regstart < 255 &&
3915#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003916 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar09463262017-06-17 20:55:06 +02003917 retval = regtry(prog, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003918 else
3919 retval = 0;
3920 }
3921 else
3922 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003923#ifdef FEAT_RELTIME
3924 int tm_count = 0;
3925#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003926 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003927 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003928 {
3929 if (prog->regstart != NUL)
3930 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003931 /* Skip until the char we know it must start with.
3932 * Used often, do some work to avoid call overhead. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003933 if (!rex.reg_ic
Bram Moolenaar05159a02005-02-26 23:04:13 +00003934#ifdef FEAT_MBYTE
3935 && !has_mbyte
3936#endif
3937 )
Bram Moolenaar0270f382018-07-17 05:43:58 +02003938 s = vim_strbyte(rex.line + col, prog->regstart);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003939 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02003940 s = cstrchr(rex.line + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003941 if (s == NULL)
3942 {
3943 retval = 0;
3944 break;
3945 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02003946 col = (int)(s - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003947 }
3948
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003949 /* Check for maximum column to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003950 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003951 {
3952 retval = 0;
3953 break;
3954 }
3955
Bram Moolenaar09463262017-06-17 20:55:06 +02003956 retval = regtry(prog, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003957 if (retval > 0)
3958 break;
3959
3960 /* if not currently on the first line, get it again */
Bram Moolenaar0270f382018-07-17 05:43:58 +02003961 if (rex.lnum != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003962 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003963 rex.lnum = 0;
3964 rex.line = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003965 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02003966 if (rex.line[col] == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003967 break;
3968#ifdef FEAT_MBYTE
3969 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003970 col += (*mb_ptr2len)(rex.line + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003971 else
3972#endif
3973 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003974#ifdef FEAT_RELTIME
3975 /* Check for timeout once in a twenty times to avoid overhead. */
3976 if (tm != NULL && ++tm_count == 20)
3977 {
3978 tm_count = 0;
3979 if (profile_passed_limit(tm))
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003980 {
3981 if (timed_out != NULL)
3982 *timed_out = TRUE;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003983 break;
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003984 }
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003985 }
3986#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003987 }
3988 }
3989
Bram Moolenaar071d4272004-06-13 20:20:40 +00003990theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003991 /* Free "reg_tofree" when it's a bit big.
3992 * Free regstack and backpos if they are bigger than their initial size. */
3993 if (reg_tofreelen > 400)
Bram Moolenaard23a8232018-02-10 18:45:26 +01003994 VIM_CLEAR(reg_tofree);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003995 if (regstack.ga_maxlen > REGSTACK_INITIAL)
3996 ga_clear(&regstack);
3997 if (backpos.ga_maxlen > BACKPOS_INITIAL)
3998 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003999
Bram Moolenaar071d4272004-06-13 20:20:40 +00004000 return retval;
4001}
4002
4003#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00004004/*
4005 * Create a new extmatch and mark it as referenced once.
4006 */
4007 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01004008make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004009{
4010 reg_extmatch_T *em;
4011
4012 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4013 if (em != NULL)
4014 em->refcnt = 1;
4015 return em;
4016}
4017
4018/*
4019 * Add a reference to an extmatch.
4020 */
4021 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01004022ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004023{
4024 if (em != NULL)
4025 em->refcnt++;
4026 return em;
4027}
4028
4029/*
4030 * Remove a reference to an extmatch. If there are no references left, free
4031 * the info.
4032 */
4033 void
Bram Moolenaar05540972016-01-30 20:31:25 +01004034unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004035{
4036 int i;
4037
4038 if (em != NULL && --em->refcnt <= 0)
4039 {
4040 for (i = 0; i < NSUBEXP; ++i)
4041 vim_free(em->matches[i]);
4042 vim_free(em);
4043 }
4044}
4045#endif
4046
4047/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02004048 * regtry - try match of "prog" with at rex.line["col"].
Bram Moolenaar071d4272004-06-13 20:20:40 +00004049 * Returns 0 for failure, number of lines contained in the match otherwise.
4050 */
4051 static long
Bram Moolenaar09463262017-06-17 20:55:06 +02004052regtry(
4053 bt_regprog_T *prog,
4054 colnr_T col,
4055 proftime_T *tm, /* timeout limit or NULL */
4056 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004057{
Bram Moolenaar0270f382018-07-17 05:43:58 +02004058 rex.input = rex.line + col;
4059 rex.need_clear_subexpr = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004060#ifdef FEAT_SYN_HL
Bram Moolenaar0270f382018-07-17 05:43:58 +02004061 // Clear the external match subpointers if necessary.
4062 rex.need_clear_zsubexpr = (prog->reghasz == REX_SET);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004063#endif
4064
Bram Moolenaar09463262017-06-17 20:55:06 +02004065 if (regmatch(prog->program + 1, tm, timed_out) == 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004066 return 0;
4067
4068 cleanup_subexpr();
4069 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004070 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004071 if (rex.reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004072 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004073 rex.reg_startpos[0].lnum = 0;
4074 rex.reg_startpos[0].col = col;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004075 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004076 if (rex.reg_endpos[0].lnum < 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004077 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004078 rex.reg_endpos[0].lnum = rex.lnum;
4079 rex.reg_endpos[0].col = (int)(rex.input - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004080 }
4081 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004082 /* Use line number of "\ze". */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004083 rex.lnum = rex.reg_endpos[0].lnum;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004084 }
4085 else
4086 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004087 if (rex.reg_startp[0] == NULL)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004088 rex.reg_startp[0] = rex.line + col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004089 if (rex.reg_endp[0] == NULL)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004090 rex.reg_endp[0] = rex.input;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004091 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004092#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004093 /* Package any found \z(...\) matches for export. Default is none. */
4094 unref_extmatch(re_extmatch_out);
4095 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004096
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004097 if (prog->reghasz == REX_SET)
4098 {
4099 int i;
4100
4101 cleanup_zsubexpr();
4102 re_extmatch_out = make_extmatch();
4103 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004104 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004105 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004106 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004107 /* Only accept single line matches. */
4108 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004109 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4110 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004111 re_extmatch_out->matches[i] =
4112 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004113 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004114 reg_endzpos[i].col - reg_startzpos[i].col);
4115 }
4116 else
4117 {
4118 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4119 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004120 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004121 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004122 }
4123 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004124 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004125#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02004126 return 1 + rex.lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004127}
4128
4129#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00004130/*
4131 * Get class of previous character.
4132 */
4133 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004134reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004135{
Bram Moolenaar0270f382018-07-17 05:43:58 +02004136 if (rex.input > rex.line)
4137 return mb_get_class_buf(rex.input - 1
4138 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004139 return -1;
4140}
Bram Moolenaar071d4272004-06-13 20:20:40 +00004141#endif
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004142
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004143/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02004144 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004145 */
4146 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004147reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004148{
4149 pos_T top, bot;
4150 linenr_T lnum;
4151 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004152 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004153 int mode;
4154 colnr_T start, end;
4155 colnr_T start2, end2;
4156 colnr_T cols;
4157
4158 /* Check if the buffer is the current buffer. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004159 if (rex.reg_buf != curbuf || VIsual.lnum == 0)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004160 return FALSE;
4161
4162 if (VIsual_active)
4163 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004164 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004165 {
4166 top = VIsual;
4167 bot = wp->w_cursor;
4168 }
4169 else
4170 {
4171 top = wp->w_cursor;
4172 bot = VIsual;
4173 }
4174 mode = VIsual_mode;
4175 }
4176 else
4177 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004178 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004179 {
4180 top = curbuf->b_visual.vi_start;
4181 bot = curbuf->b_visual.vi_end;
4182 }
4183 else
4184 {
4185 top = curbuf->b_visual.vi_end;
4186 bot = curbuf->b_visual.vi_start;
4187 }
4188 mode = curbuf->b_visual.vi_mode;
4189 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004190 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004191 if (lnum < top.lnum || lnum > bot.lnum)
4192 return FALSE;
4193
4194 if (mode == 'v')
4195 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004196 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004197 if ((lnum == top.lnum && col < top.col)
4198 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4199 return FALSE;
4200 }
4201 else if (mode == Ctrl_V)
4202 {
4203 getvvcol(wp, &top, &start, NULL, &end);
4204 getvvcol(wp, &bot, &start2, NULL, &end2);
4205 if (start2 < start)
4206 start = start2;
4207 if (end2 > end)
4208 end = end2;
4209 if (top.col == MAXCOL || bot.col == MAXCOL)
4210 end = MAXCOL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004211 cols = win_linetabsize(wp, rex.line, (colnr_T)(rex.input - rex.line));
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004212 if (cols < start || cols > end - (*p_sel == 'e'))
4213 return FALSE;
4214 }
4215 return TRUE;
4216}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004217
Bram Moolenaar0270f382018-07-17 05:43:58 +02004218#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004219
4220/*
4221 * The arguments from BRACE_LIMITS are stored here. They are actually local
4222 * to regmatch(), but they are here to reduce the amount of stack space used
4223 * (it can be called recursively many times).
4224 */
4225static long bl_minval;
4226static long bl_maxval;
4227
4228/*
4229 * regmatch - main matching routine
4230 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004231 * Conceptually the strategy is simple: Check to see whether the current node
4232 * matches, push an item onto the regstack and loop to see whether the rest
4233 * matches, and then act accordingly. In practice we make some effort to
4234 * avoid using the regstack, in particular by going through "ordinary" nodes
4235 * (that don't need to know whether the rest of the match failed) by a nested
4236 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004237 *
Bram Moolenaar0270f382018-07-17 05:43:58 +02004238 * Returns TRUE when there is a match. Leaves rex.input and rex.lnum just after
Bram Moolenaar071d4272004-06-13 20:20:40 +00004239 * the last matched character.
Bram Moolenaar0270f382018-07-17 05:43:58 +02004240 * Returns FALSE when there is no match. Leaves rex.input and rex.lnum in an
Bram Moolenaar071d4272004-06-13 20:20:40 +00004241 * undefined state!
4242 */
4243 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004244regmatch(
Bram Moolenaar09463262017-06-17 20:55:06 +02004245 char_u *scan, /* Current node. */
4246 proftime_T *tm UNUSED, /* timeout limit or NULL */
4247 int *timed_out UNUSED) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004248{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004249 char_u *next; /* Next node. */
4250 int op;
4251 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004252 regitem_T *rp;
4253 int no;
4254 int status; /* one of the RA_ values: */
4255#define RA_FAIL 1 /* something failed, abort */
4256#define RA_CONT 2 /* continue in inner loop */
4257#define RA_BREAK 3 /* break inner loop */
4258#define RA_MATCH 4 /* successful match */
4259#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar09463262017-06-17 20:55:06 +02004260#ifdef FEAT_RELTIME
4261 int tm_count = 0;
4262#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00004263
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004264 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004265 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004266 regstack.ga_len = 0;
4267 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004268
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004269 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004270 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004271 */
4272 for (;;)
4273 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004274 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4275 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004276 fast_breakcheck();
4277
4278#ifdef DEBUG
4279 if (scan != NULL && regnarrate)
4280 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004281 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004282 mch_errmsg("(\n");
4283 }
4284#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004285
4286 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004287 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004288 * regstack.
4289 */
4290 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004291 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004292 if (got_int || scan == NULL)
4293 {
4294 status = RA_FAIL;
4295 break;
4296 }
Bram Moolenaar09463262017-06-17 20:55:06 +02004297#ifdef FEAT_RELTIME
4298 /* Check for timeout once in a 100 times to avoid overhead. */
4299 if (tm != NULL && ++tm_count == 100)
4300 {
4301 tm_count = 0;
4302 if (profile_passed_limit(tm))
4303 {
4304 if (timed_out != NULL)
4305 *timed_out = TRUE;
4306 status = RA_FAIL;
4307 break;
4308 }
4309 }
4310#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004311 status = RA_CONT;
4312
Bram Moolenaar071d4272004-06-13 20:20:40 +00004313#ifdef DEBUG
4314 if (regnarrate)
4315 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004316 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004317 mch_errmsg("...\n");
4318# ifdef FEAT_SYN_HL
4319 if (re_extmatch_in != NULL)
4320 {
4321 int i;
4322
4323 mch_errmsg(_("External submatches:\n"));
4324 for (i = 0; i < NSUBEXP; i++)
4325 {
4326 mch_errmsg(" \"");
4327 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004328 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004329 mch_errmsg("\"\n");
4330 }
4331 }
4332# endif
4333 }
4334#endif
4335 next = regnext(scan);
4336
4337 op = OP(scan);
4338 /* Check for character class with NL added. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004339 if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
Bram Moolenaar0270f382018-07-17 05:43:58 +02004340 && *rex.input == NUL && rex.lnum <= rex.reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004341 {
4342 reg_nextline();
4343 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004344 else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n')
Bram Moolenaar071d4272004-06-13 20:20:40 +00004345 {
4346 ADVANCE_REGINPUT();
4347 }
4348 else
4349 {
4350 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004351 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004352#ifdef FEAT_MBYTE
4353 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004354 c = (*mb_ptr2char)(rex.input);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004355 else
4356#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02004357 c = *rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004358 switch (op)
4359 {
4360 case BOL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004361 if (rex.input != rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004362 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004363 break;
4364
4365 case EOL:
4366 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004367 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004368 break;
4369
4370 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004371 /* We're not at the beginning of the file when below the first
4372 * line where we started, not at the start of the line or we
4373 * didn't start at the first line of the buffer. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004374 if (rex.lnum != 0 || rex.input != rex.line
Bram Moolenaar6100d022016-10-02 16:51:57 +02004375 || (REG_MULTI && rex.reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004376 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004377 break;
4378
4379 case RE_EOF:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004380 if (rex.lnum != rex.reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004381 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004382 break;
4383
4384 case CURSOR:
4385 /* Check if the buffer is in a window and compare the
Bram Moolenaar6100d022016-10-02 16:51:57 +02004386 * rex.reg_win->w_cursor position to the match position. */
4387 if (rex.reg_win == NULL
Bram Moolenaar0270f382018-07-17 05:43:58 +02004388 || (rex.lnum + rex.reg_firstlnum
Bram Moolenaar6100d022016-10-02 16:51:57 +02004389 != rex.reg_win->w_cursor.lnum)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004390 || ((colnr_T)(rex.input - rex.line)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004391 != rex.reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004392 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004393 break;
4394
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004395 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004396 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004397 {
4398 int mark = OPERAND(scan)[0];
4399 int cmp = OPERAND(scan)[1];
4400 pos_T *pos;
4401
Bram Moolenaar6100d022016-10-02 16:51:57 +02004402 pos = getmark_buf(rex.reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004403 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004404 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004405 || (pos->lnum == rex.lnum + rex.reg_firstlnum
4406 ? (pos->col == (colnr_T)(rex.input - rex.line)
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004407 ? (cmp == '<' || cmp == '>')
Bram Moolenaar0270f382018-07-17 05:43:58 +02004408 : (pos->col < (colnr_T)(rex.input - rex.line)
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004409 ? cmp != '>'
4410 : cmp != '<'))
Bram Moolenaar0270f382018-07-17 05:43:58 +02004411 : (pos->lnum < rex.lnum + rex.reg_firstlnum
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004412 ? cmp != '>'
4413 : cmp != '<')))
4414 status = RA_NOMATCH;
4415 }
4416 break;
4417
4418 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004419 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004420 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004421 break;
4422
Bram Moolenaar071d4272004-06-13 20:20:40 +00004423 case RE_LNUM:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004424 if (!REG_MULTI || !re_num_cmp((long_u)(rex.lnum + rex.reg_firstlnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00004425 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004426 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004427 break;
4428
4429 case RE_COL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004430 if (!re_num_cmp((long_u)(rex.input - rex.line) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004431 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004432 break;
4433
4434 case RE_VCOL:
4435 if (!re_num_cmp((long_u)win_linetabsize(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004436 rex.reg_win == NULL ? curwin : rex.reg_win,
Bram Moolenaar0270f382018-07-17 05:43:58 +02004437 rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004438 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004439 break;
4440
Bram Moolenaar0270f382018-07-17 05:43:58 +02004441 case BOW: /* \<word; rex.input points to w */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004442 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004443 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004444#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004445 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004446 {
4447 int this_class;
4448
4449 /* Get class of current and previous char (if it exists). */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004450 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004451 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004452 status = RA_NOMATCH; /* not on a word at all */
4453 else if (reg_prev_class() == this_class)
4454 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004455 }
4456#endif
4457 else
4458 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004459 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line
4460 && vim_iswordc_buf(rex.input[-1], rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004461 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004462 }
4463 break;
4464
Bram Moolenaar0270f382018-07-17 05:43:58 +02004465 case EOW: /* word\>; rex.input points after d */
4466 if (rex.input == rex.line) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004467 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004468#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004469 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004470 {
4471 int this_class, prev_class;
4472
4473 /* Get class of current and previous char (if it exists). */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004474 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004475 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004476 if (this_class == prev_class
4477 || prev_class == 0 || prev_class == 1)
4478 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004479 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004480#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004481 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004482 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004483 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf)
4484 || (rex.input[0] != NUL
Bram Moolenaar6100d022016-10-02 16:51:57 +02004485 && vim_iswordc_buf(c, rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004486 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004487 }
4488 break; /* Matched with EOW */
4489
4490 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004491 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004492 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004493 status = RA_NOMATCH;
4494 else
4495 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004496 break;
4497
4498 case IDENT:
4499 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004500 status = RA_NOMATCH;
4501 else
4502 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004503 break;
4504
4505 case SIDENT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004506 if (VIM_ISDIGIT(*rex.input) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004507 status = RA_NOMATCH;
4508 else
4509 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004510 break;
4511
4512 case KWORD:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004513 if (!vim_iswordp_buf(rex.input, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004514 status = RA_NOMATCH;
4515 else
4516 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004517 break;
4518
4519 case SKWORD:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004520 if (VIM_ISDIGIT(*rex.input)
4521 || !vim_iswordp_buf(rex.input, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004522 status = RA_NOMATCH;
4523 else
4524 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004525 break;
4526
4527 case FNAME:
4528 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004529 status = RA_NOMATCH;
4530 else
4531 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004532 break;
4533
4534 case SFNAME:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004535 if (VIM_ISDIGIT(*rex.input) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004536 status = RA_NOMATCH;
4537 else
4538 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004539 break;
4540
4541 case PRINT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004542 if (!vim_isprintc(PTR2CHAR(rex.input)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004543 status = RA_NOMATCH;
4544 else
4545 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004546 break;
4547
4548 case SPRINT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004549 if (VIM_ISDIGIT(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004550 status = RA_NOMATCH;
4551 else
4552 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004553 break;
4554
4555 case WHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004556 if (!VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004557 status = RA_NOMATCH;
4558 else
4559 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004560 break;
4561
4562 case NWHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004563 if (c == NUL || VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004564 status = RA_NOMATCH;
4565 else
4566 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004567 break;
4568
4569 case DIGIT:
4570 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004571 status = RA_NOMATCH;
4572 else
4573 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004574 break;
4575
4576 case NDIGIT:
4577 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004578 status = RA_NOMATCH;
4579 else
4580 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004581 break;
4582
4583 case HEX:
4584 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004585 status = RA_NOMATCH;
4586 else
4587 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004588 break;
4589
4590 case NHEX:
4591 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004592 status = RA_NOMATCH;
4593 else
4594 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004595 break;
4596
4597 case OCTAL:
4598 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004599 status = RA_NOMATCH;
4600 else
4601 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004602 break;
4603
4604 case NOCTAL:
4605 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004606 status = RA_NOMATCH;
4607 else
4608 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004609 break;
4610
4611 case WORD:
4612 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004613 status = RA_NOMATCH;
4614 else
4615 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004616 break;
4617
4618 case NWORD:
4619 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004620 status = RA_NOMATCH;
4621 else
4622 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004623 break;
4624
4625 case HEAD:
4626 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004627 status = RA_NOMATCH;
4628 else
4629 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004630 break;
4631
4632 case NHEAD:
4633 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004634 status = RA_NOMATCH;
4635 else
4636 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004637 break;
4638
4639 case ALPHA:
4640 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004641 status = RA_NOMATCH;
4642 else
4643 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004644 break;
4645
4646 case NALPHA:
4647 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004648 status = RA_NOMATCH;
4649 else
4650 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004651 break;
4652
4653 case LOWER:
4654 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004655 status = RA_NOMATCH;
4656 else
4657 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004658 break;
4659
4660 case NLOWER:
4661 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004662 status = RA_NOMATCH;
4663 else
4664 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004665 break;
4666
4667 case UPPER:
4668 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004669 status = RA_NOMATCH;
4670 else
4671 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004672 break;
4673
4674 case NUPPER:
4675 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004676 status = RA_NOMATCH;
4677 else
4678 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004679 break;
4680
4681 case EXACTLY:
4682 {
4683 int len;
4684 char_u *opnd;
4685
4686 opnd = OPERAND(scan);
4687 /* Inline the first byte, for speed. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004688 if (*opnd != *rex.input
Bram Moolenaar6100d022016-10-02 16:51:57 +02004689 && (!rex.reg_ic || (
Bram Moolenaar071d4272004-06-13 20:20:40 +00004690#ifdef FEAT_MBYTE
4691 !enc_utf8 &&
4692#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02004693 MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004694 status = RA_NOMATCH;
4695 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004696 {
4697 /* match empty string always works; happens when "~" is
4698 * empty. */
4699 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004700 else
4701 {
4702 if (opnd[1] == NUL
Bram Moolenaar071d4272004-06-13 20:20:40 +00004703#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02004704 && !(enc_utf8 && rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004705#endif
4706 )
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004707 {
4708 len = 1; /* matched a single byte above */
4709 }
4710 else
4711 {
4712 /* Need to match first byte again for multi-byte. */
4713 len = (int)STRLEN(opnd);
Bram Moolenaar0270f382018-07-17 05:43:58 +02004714 if (cstrncmp(opnd, rex.input, &len) != 0)
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004715 status = RA_NOMATCH;
4716 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004717#ifdef FEAT_MBYTE
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004718 /* Check for following composing character, unless %C
4719 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004720 if (status != RA_NOMATCH
4721 && enc_utf8
Bram Moolenaar0270f382018-07-17 05:43:58 +02004722 && UTF_COMPOSINGLIKE(rex.input, rex.input + len)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004723 && !rex.reg_icombine
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004724 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004725 {
4726 /* raaron: This code makes a composing character get
4727 * ignored, which is the correct behavior (sometimes)
4728 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004729 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004730 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004731#endif
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004732 if (status != RA_NOMATCH)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004733 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004734 }
4735 }
4736 break;
4737
4738 case ANYOF:
4739 case ANYBUT:
4740 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004741 status = RA_NOMATCH;
4742 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4743 status = RA_NOMATCH;
4744 else
4745 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004746 break;
4747
4748#ifdef FEAT_MBYTE
4749 case MULTIBYTECODE:
4750 if (has_mbyte)
4751 {
4752 int i, len;
4753 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004754 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004755
4756 opnd = OPERAND(scan);
4757 /* Safety check (just in case 'encoding' was changed since
4758 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004759 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004760 {
4761 status = RA_NOMATCH;
4762 break;
4763 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004764 if (enc_utf8)
Bram Moolenaarace95982017-03-29 17:30:27 +02004765 opndc = utf_ptr2char(opnd);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004766 if (enc_utf8 && utf_iscomposing(opndc))
4767 {
4768 /* When only a composing char is given match at any
4769 * position where that composing char appears. */
4770 status = RA_NOMATCH;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004771 for (i = 0; rex.input[i] != NUL;
4772 i += utf_ptr2len(rex.input + i))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004773 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004774 inpc = utf_ptr2char(rex.input + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004775 if (!utf_iscomposing(inpc))
4776 {
4777 if (i > 0)
4778 break;
4779 }
4780 else if (opndc == inpc)
4781 {
4782 /* Include all following composing chars. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004783 len = i + utfc_ptr2len(rex.input + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004784 status = RA_MATCH;
4785 break;
4786 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004787 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004788 }
4789 else
4790 for (i = 0; i < len; ++i)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004791 if (opnd[i] != rex.input[i])
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004792 {
4793 status = RA_NOMATCH;
4794 break;
4795 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004796 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004797 }
4798 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004799 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004800 break;
4801#endif
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004802 case RE_COMPOSING:
4803#ifdef FEAT_MBYTE
4804 if (enc_utf8)
4805 {
4806 /* Skip composing characters. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004807 while (utf_iscomposing(utf_ptr2char(rex.input)))
4808 MB_CPTR_ADV(rex.input);
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004809 }
4810#endif
4811 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004812
4813 case NOTHING:
4814 break;
4815
4816 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004817 {
4818 int i;
4819 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004820
Bram Moolenaar582fd852005-03-28 20:58:01 +00004821 /*
4822 * When we run into BACK we need to check if we don't keep
4823 * looping without matching any input. The second and later
4824 * times a BACK is encountered it fails if the input is still
4825 * at the same position as the previous time.
4826 * The positions are stored in "backpos" and found by the
4827 * current value of "scan", the position in the RE program.
4828 */
4829 bp = (backpos_T *)backpos.ga_data;
4830 for (i = 0; i < backpos.ga_len; ++i)
4831 if (bp[i].bp_scan == scan)
4832 break;
4833 if (i == backpos.ga_len)
4834 {
4835 /* First time at this BACK, make room to store the pos. */
4836 if (ga_grow(&backpos, 1) == FAIL)
4837 status = RA_FAIL;
4838 else
4839 {
4840 /* get "ga_data" again, it may have changed */
4841 bp = (backpos_T *)backpos.ga_data;
4842 bp[i].bp_scan = scan;
4843 ++backpos.ga_len;
4844 }
4845 }
4846 else if (reg_save_equal(&bp[i].bp_pos))
4847 /* Still at same position as last time, fail. */
4848 status = RA_NOMATCH;
4849
4850 if (status != RA_FAIL && status != RA_NOMATCH)
4851 reg_save(&bp[i].bp_pos, &backpos);
4852 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004853 break;
4854
Bram Moolenaar071d4272004-06-13 20:20:40 +00004855 case MOPEN + 0: /* Match start: \zs */
4856 case MOPEN + 1: /* \( */
4857 case MOPEN + 2:
4858 case MOPEN + 3:
4859 case MOPEN + 4:
4860 case MOPEN + 5:
4861 case MOPEN + 6:
4862 case MOPEN + 7:
4863 case MOPEN + 8:
4864 case MOPEN + 9:
4865 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004866 no = op - MOPEN;
4867 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004868 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004869 if (rp == NULL)
4870 status = RA_FAIL;
4871 else
4872 {
4873 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004874 save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
4875 &rex.reg_startp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004876 /* We simply continue and handle the result when done. */
4877 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004878 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004879 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004880
4881 case NOPEN: /* \%( */
4882 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004883 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004884 status = RA_FAIL;
4885 /* We simply continue and handle the result when done. */
4886 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004887
4888#ifdef FEAT_SYN_HL
4889 case ZOPEN + 1:
4890 case ZOPEN + 2:
4891 case ZOPEN + 3:
4892 case ZOPEN + 4:
4893 case ZOPEN + 5:
4894 case ZOPEN + 6:
4895 case ZOPEN + 7:
4896 case ZOPEN + 8:
4897 case ZOPEN + 9:
4898 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004899 no = op - ZOPEN;
4900 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004901 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004902 if (rp == NULL)
4903 status = RA_FAIL;
4904 else
4905 {
4906 rp->rs_no = no;
4907 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4908 &reg_startzp[no]);
4909 /* We simply continue and handle the result when done. */
4910 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004911 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004912 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004913#endif
4914
4915 case MCLOSE + 0: /* Match end: \ze */
4916 case MCLOSE + 1: /* \) */
4917 case MCLOSE + 2:
4918 case MCLOSE + 3:
4919 case MCLOSE + 4:
4920 case MCLOSE + 5:
4921 case MCLOSE + 6:
4922 case MCLOSE + 7:
4923 case MCLOSE + 8:
4924 case MCLOSE + 9:
4925 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004926 no = op - MCLOSE;
4927 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004928 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004929 if (rp == NULL)
4930 status = RA_FAIL;
4931 else
4932 {
4933 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004934 save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
4935 &rex.reg_endp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004936 /* We simply continue and handle the result when done. */
4937 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004938 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004939 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004940
4941#ifdef FEAT_SYN_HL
4942 case ZCLOSE + 1: /* \) after \z( */
4943 case ZCLOSE + 2:
4944 case ZCLOSE + 3:
4945 case ZCLOSE + 4:
4946 case ZCLOSE + 5:
4947 case ZCLOSE + 6:
4948 case ZCLOSE + 7:
4949 case ZCLOSE + 8:
4950 case ZCLOSE + 9:
4951 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004952 no = op - ZCLOSE;
4953 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004954 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004955 if (rp == NULL)
4956 status = RA_FAIL;
4957 else
4958 {
4959 rp->rs_no = no;
4960 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4961 &reg_endzp[no]);
4962 /* We simply continue and handle the result when done. */
4963 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004964 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004965 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004966#endif
4967
4968 case BACKREF + 1:
4969 case BACKREF + 2:
4970 case BACKREF + 3:
4971 case BACKREF + 4:
4972 case BACKREF + 5:
4973 case BACKREF + 6:
4974 case BACKREF + 7:
4975 case BACKREF + 8:
4976 case BACKREF + 9:
4977 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004978 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004979
4980 no = op - BACKREF;
4981 cleanup_subexpr();
4982 if (!REG_MULTI) /* Single-line regexp */
4983 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004984 if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004985 {
4986 /* Backref was not set: Match an empty string. */
4987 len = 0;
4988 }
4989 else
4990 {
4991 /* Compare current input with back-ref in the same
4992 * line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004993 len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
Bram Moolenaar0270f382018-07-17 05:43:58 +02004994 if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004995 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004996 }
4997 }
4998 else /* Multi-line regexp */
4999 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005000 if (rex.reg_startpos[no].lnum < 0
5001 || rex.reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005002 {
5003 /* Backref was not set: Match an empty string. */
5004 len = 0;
5005 }
5006 else
5007 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005008 if (rex.reg_startpos[no].lnum == rex.lnum
5009 && rex.reg_endpos[no].lnum == rex.lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005010 {
5011 /* Compare back-ref within the current line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02005012 len = rex.reg_endpos[no].col
5013 - rex.reg_startpos[no].col;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005014 if (cstrncmp(rex.line + rex.reg_startpos[no].col,
5015 rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005016 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005017 }
5018 else
5019 {
5020 /* Messy situation: Need to compare between two
5021 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005022 int r = match_with_backref(
Bram Moolenaar6100d022016-10-02 16:51:57 +02005023 rex.reg_startpos[no].lnum,
5024 rex.reg_startpos[no].col,
5025 rex.reg_endpos[no].lnum,
5026 rex.reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02005027 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005028
5029 if (r != RA_MATCH)
5030 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005031 }
5032 }
5033 }
5034
5035 /* Matched the backref, skip over it. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005036 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005037 }
5038 break;
5039
5040#ifdef FEAT_SYN_HL
5041 case ZREF + 1:
5042 case ZREF + 2:
5043 case ZREF + 3:
5044 case ZREF + 4:
5045 case ZREF + 5:
5046 case ZREF + 6:
5047 case ZREF + 7:
5048 case ZREF + 8:
5049 case ZREF + 9:
5050 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005051 int len;
5052
5053 cleanup_zsubexpr();
5054 no = op - ZREF;
5055 if (re_extmatch_in != NULL
5056 && re_extmatch_in->matches[no] != NULL)
5057 {
5058 len = (int)STRLEN(re_extmatch_in->matches[no]);
5059 if (cstrncmp(re_extmatch_in->matches[no],
Bram Moolenaar0270f382018-07-17 05:43:58 +02005060 rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005061 status = RA_NOMATCH;
5062 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02005063 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005064 }
5065 else
5066 {
5067 /* Backref was not set: Match an empty string. */
5068 }
5069 }
5070 break;
5071#endif
5072
5073 case BRANCH:
5074 {
5075 if (OP(next) != BRANCH) /* No choice. */
5076 next = OPERAND(scan); /* Avoid recursion. */
5077 else
5078 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005079 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005080 if (rp == NULL)
5081 status = RA_FAIL;
5082 else
5083 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005084 }
5085 }
5086 break;
5087
5088 case BRACE_LIMITS:
5089 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005090 if (OP(next) == BRACE_SIMPLE)
5091 {
5092 bl_minval = OPERAND_MIN(scan);
5093 bl_maxval = OPERAND_MAX(scan);
5094 }
5095 else if (OP(next) >= BRACE_COMPLEX
5096 && OP(next) < BRACE_COMPLEX + 10)
5097 {
5098 no = OP(next) - BRACE_COMPLEX;
5099 brace_min[no] = OPERAND_MIN(scan);
5100 brace_max[no] = OPERAND_MAX(scan);
5101 brace_count[no] = 0;
5102 }
5103 else
5104 {
Bram Moolenaar95f09602016-11-10 20:01:45 +01005105 internal_error("BRACE_LIMITS");
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005106 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005107 }
5108 }
5109 break;
5110
5111 case BRACE_COMPLEX + 0:
5112 case BRACE_COMPLEX + 1:
5113 case BRACE_COMPLEX + 2:
5114 case BRACE_COMPLEX + 3:
5115 case BRACE_COMPLEX + 4:
5116 case BRACE_COMPLEX + 5:
5117 case BRACE_COMPLEX + 6:
5118 case BRACE_COMPLEX + 7:
5119 case BRACE_COMPLEX + 8:
5120 case BRACE_COMPLEX + 9:
5121 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005122 no = op - BRACE_COMPLEX;
5123 ++brace_count[no];
5124
5125 /* If not matched enough times yet, try one more */
5126 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005127 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005128 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005129 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005130 if (rp == NULL)
5131 status = RA_FAIL;
5132 else
5133 {
5134 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005135 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005136 next = OPERAND(scan);
5137 /* We continue and handle the result when done. */
5138 }
5139 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005140 }
5141
5142 /* If matched enough times, may try matching some more */
5143 if (brace_min[no] <= brace_max[no])
5144 {
5145 /* Range is the normal way around, use longest match */
5146 if (brace_count[no] <= brace_max[no])
5147 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005148 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005149 if (rp == NULL)
5150 status = RA_FAIL;
5151 else
5152 {
5153 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005154 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005155 next = OPERAND(scan);
5156 /* We continue and handle the result when done. */
5157 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005158 }
5159 }
5160 else
5161 {
5162 /* Range is backwards, use shortest match first */
5163 if (brace_count[no] <= brace_min[no])
5164 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005165 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005166 if (rp == NULL)
5167 status = RA_FAIL;
5168 else
5169 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005170 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005171 /* We continue and handle the result when done. */
5172 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005173 }
5174 }
5175 }
5176 break;
5177
5178 case BRACE_SIMPLE:
5179 case STAR:
5180 case PLUS:
5181 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005182 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005183
5184 /*
5185 * Lookahead to avoid useless match attempts when we know
5186 * what character comes next.
5187 */
5188 if (OP(next) == EXACTLY)
5189 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005190 rst.nextb = *OPERAND(next);
Bram Moolenaar6100d022016-10-02 16:51:57 +02005191 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005192 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005193 if (MB_ISUPPER(rst.nextb))
5194 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005195 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005196 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005197 }
5198 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005199 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005200 }
5201 else
5202 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005203 rst.nextb = NUL;
5204 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005205 }
5206 if (op != BRACE_SIMPLE)
5207 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005208 rst.minval = (op == STAR) ? 0 : 1;
5209 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005210 }
5211 else
5212 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005213 rst.minval = bl_minval;
5214 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005215 }
5216
5217 /*
5218 * When maxval > minval, try matching as much as possible, up
5219 * to maxval. When maxval < minval, try matching at least the
5220 * minimal number (since the range is backwards, that's also
5221 * maxval!).
5222 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005223 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005224 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005225 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005226 status = RA_FAIL;
5227 break;
5228 }
5229 if (rst.minval <= rst.maxval
5230 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5231 {
5232 /* It could match. Prepare for trying to match what
5233 * follows. The code is below. Parameters are stored in
5234 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005235 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005236 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005237 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005238 status = RA_FAIL;
5239 }
5240 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005241 status = RA_FAIL;
5242 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005243 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005244 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005245 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005246 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005247 if (rp == NULL)
5248 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005249 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005250 {
5251 *(((regstar_T *)rp) - 1) = rst;
5252 status = RA_BREAK; /* skip the restore bits */
5253 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005254 }
5255 }
5256 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005257 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005258
Bram Moolenaar071d4272004-06-13 20:20:40 +00005259 }
5260 break;
5261
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005262 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005263 case MATCH:
5264 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005265 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005266 if (rp == NULL)
5267 status = RA_FAIL;
5268 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005269 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005270 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005271 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005272 next = OPERAND(scan);
5273 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005274 }
5275 break;
5276
5277 case BEHIND:
5278 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005279 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005280 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005281 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005282 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005283 status = RA_FAIL;
5284 }
5285 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005286 status = RA_FAIL;
5287 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005288 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005289 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005290 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005291 if (rp == NULL)
5292 status = RA_FAIL;
5293 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005294 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005295 /* Need to save the subexpr to be able to restore them
5296 * when there is a match but we don't use it. */
5297 save_subexpr(((regbehind_T *)rp) - 1);
5298
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005299 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005300 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005301 /* First try if what follows matches. If it does then we
5302 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005303 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005304 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005305 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005306
5307 case BHPOS:
5308 if (REG_MULTI)
5309 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005310 if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line)
5311 || behind_pos.rs_u.pos.lnum != rex.lnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005312 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005313 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02005314 else if (behind_pos.rs_u.ptr != rex.input)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005315 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005316 break;
5317
5318 case NEWL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02005319 if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005320 || rex.reg_line_lbr)
5321 && (c != '\n' || !rex.reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005322 status = RA_NOMATCH;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005323 else if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005324 ADVANCE_REGINPUT();
5325 else
5326 reg_nextline();
5327 break;
5328
5329 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005330 status = RA_MATCH; /* Success! */
5331 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005332
5333 default:
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005334 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00005335#ifdef DEBUG
5336 printf("Illegal op code %d\n", op);
5337#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005338 status = RA_FAIL;
5339 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005340 }
5341 }
5342
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005343 /* If we can't continue sequentially, break the inner loop. */
5344 if (status != RA_CONT)
5345 break;
5346
5347 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005348 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005349
5350 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005351
5352 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005353 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005354 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005355 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005356 while (regstack.ga_len > 0 && status != RA_FAIL)
5357 {
5358 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5359 switch (rp->rs_state)
5360 {
5361 case RS_NOPEN:
5362 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005363 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005364 break;
5365
5366 case RS_MOPEN:
5367 /* Pop the state. Restore pointers when there is no match. */
5368 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005369 restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
5370 &rex.reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005371 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005372 break;
5373
5374#ifdef FEAT_SYN_HL
5375 case RS_ZOPEN:
5376 /* Pop the state. Restore pointers when there is no match. */
5377 if (status == RA_NOMATCH)
5378 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5379 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005380 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005381 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005382#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005383
5384 case RS_MCLOSE:
5385 /* Pop the state. Restore pointers when there is no match. */
5386 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005387 restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
5388 &rex.reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005389 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005390 break;
5391
5392#ifdef FEAT_SYN_HL
5393 case RS_ZCLOSE:
5394 /* Pop the state. Restore pointers when there is no match. */
5395 if (status == RA_NOMATCH)
5396 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5397 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005398 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005399 break;
5400#endif
5401
5402 case RS_BRANCH:
5403 if (status == RA_MATCH)
5404 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005405 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005406 else
5407 {
5408 if (status != RA_BREAK)
5409 {
5410 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005411 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005412 scan = rp->rs_scan;
5413 }
5414 if (scan == NULL || OP(scan) != BRANCH)
5415 {
5416 /* no more branches, didn't find a match */
5417 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005418 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005419 }
5420 else
5421 {
5422 /* Prepare to try a branch. */
5423 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005424 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005425 scan = OPERAND(scan);
5426 }
5427 }
5428 break;
5429
5430 case RS_BRCPLX_MORE:
5431 /* Pop the state. Restore pointers when there is no match. */
5432 if (status == RA_NOMATCH)
5433 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005434 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005435 --brace_count[rp->rs_no]; /* decrement match count */
5436 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005437 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005438 break;
5439
5440 case RS_BRCPLX_LONG:
5441 /* Pop the state. Restore pointers when there is no match. */
5442 if (status == RA_NOMATCH)
5443 {
5444 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005445 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005446 --brace_count[rp->rs_no];
5447 /* continue with the items after "\{}" */
5448 status = RA_CONT;
5449 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005450 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005451 if (status == RA_CONT)
5452 scan = regnext(scan);
5453 break;
5454
5455 case RS_BRCPLX_SHORT:
5456 /* Pop the state. Restore pointers when there is no match. */
5457 if (status == RA_NOMATCH)
5458 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005459 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005460 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005461 if (status == RA_NOMATCH)
5462 {
5463 scan = OPERAND(scan);
5464 status = RA_CONT;
5465 }
5466 break;
5467
5468 case RS_NOMATCH:
5469 /* Pop the state. If the operand matches for NOMATCH or
5470 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5471 * except for SUBPAT, and continue with the next item. */
5472 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5473 status = RA_NOMATCH;
5474 else
5475 {
5476 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005477 if (rp->rs_no != SUBPAT) /* zero-width */
5478 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005479 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005480 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005481 if (status == RA_CONT)
5482 scan = regnext(scan);
5483 break;
5484
5485 case RS_BEHIND1:
5486 if (status == RA_NOMATCH)
5487 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005488 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005489 regstack.ga_len -= sizeof(regbehind_T);
5490 }
5491 else
5492 {
5493 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5494 * the behind part does (not) match before the current
5495 * position in the input. This must be done at every
5496 * position in the input and checking if the match ends at
5497 * the current position. */
5498
5499 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005500 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005501
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005502 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005503 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005504 * result, hitting the start of the line or the previous
5505 * line (for multi-line matching).
5506 * Set behind_pos to where the match should end, BHPOS
5507 * will match it. Save the current value. */
5508 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5509 behind_pos = rp->rs_un.regsave;
5510
5511 rp->rs_state = RS_BEHIND2;
5512
Bram Moolenaar582fd852005-03-28 20:58:01 +00005513 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005514 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005515 }
5516 break;
5517
5518 case RS_BEHIND2:
5519 /*
5520 * Looping for BEHIND / NOBEHIND match.
5521 */
5522 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5523 {
5524 /* found a match that ends where "next" started */
5525 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5526 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005527 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5528 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005529 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005530 {
5531 /* But we didn't want a match. Need to restore the
5532 * subexpr, because what follows matched, so they have
5533 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005534 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005535 restore_subexpr(((regbehind_T *)rp) - 1);
5536 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005537 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005538 regstack.ga_len -= sizeof(regbehind_T);
5539 }
5540 else
5541 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005542 long limit;
5543
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005544 /* No match or a match that doesn't end where we want it: Go
5545 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005546 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005547 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005548 if (REG_MULTI)
5549 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005550 if (limit > 0
5551 && ((rp->rs_un.regsave.rs_u.pos.lnum
5552 < behind_pos.rs_u.pos.lnum
Bram Moolenaar0270f382018-07-17 05:43:58 +02005553 ? (colnr_T)STRLEN(rex.line)
Bram Moolenaar61602c52013-06-01 19:54:43 +02005554 : behind_pos.rs_u.pos.col)
5555 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5556 no = FAIL;
5557 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005558 {
5559 if (rp->rs_un.regsave.rs_u.pos.lnum
5560 < behind_pos.rs_u.pos.lnum
5561 || reg_getline(
5562 --rp->rs_un.regsave.rs_u.pos.lnum)
5563 == NULL)
5564 no = FAIL;
5565 else
5566 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005567 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005568 rp->rs_un.regsave.rs_u.pos.col =
Bram Moolenaar0270f382018-07-17 05:43:58 +02005569 (colnr_T)STRLEN(rex.line);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005570 }
5571 }
5572 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005573 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005574#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005575 if (has_mbyte)
Bram Moolenaarbc197192018-02-13 16:35:06 +01005576 {
5577 char_u *line =
Bram Moolenaar866f3552019-01-01 22:19:08 +01005578 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum);
Bram Moolenaarbc197192018-02-13 16:35:06 +01005579
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005580 rp->rs_un.regsave.rs_u.pos.col -=
Bram Moolenaarbc197192018-02-13 16:35:06 +01005581 (*mb_head_off)(line, line
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005582 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaarbc197192018-02-13 16:35:06 +01005583 }
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005584 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005585#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005586 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005587 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005588 }
5589 else
5590 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005591 if (rp->rs_un.regsave.rs_u.ptr == rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005592 no = FAIL;
5593 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005594 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005595 MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005596 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5597 - rp->rs_un.regsave.rs_u.ptr) > limit)
5598 no = FAIL;
5599 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005600 }
5601 if (no == OK)
5602 {
5603 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005604 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005605 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005606 if (status == RA_MATCH)
5607 {
5608 /* We did match, so subexpr may have been changed,
5609 * need to restore them for the next try. */
5610 status = RA_NOMATCH;
5611 restore_subexpr(((regbehind_T *)rp) - 1);
5612 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005613 }
5614 else
5615 {
5616 /* Can't advance. For NOBEHIND that's a match. */
5617 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5618 if (rp->rs_no == NOBEHIND)
5619 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005620 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5621 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005622 status = RA_MATCH;
5623 }
5624 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005625 {
5626 /* We do want a proper match. Need to restore the
5627 * subexpr if we had a match, because they may have
5628 * been set. */
5629 if (status == RA_MATCH)
5630 {
5631 status = RA_NOMATCH;
5632 restore_subexpr(((regbehind_T *)rp) - 1);
5633 }
5634 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005635 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005636 regstack.ga_len -= sizeof(regbehind_T);
5637 }
5638 }
5639 break;
5640
5641 case RS_STAR_LONG:
5642 case RS_STAR_SHORT:
5643 {
5644 regstar_T *rst = ((regstar_T *)rp) - 1;
5645
5646 if (status == RA_MATCH)
5647 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005648 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005649 regstack.ga_len -= sizeof(regstar_T);
5650 break;
5651 }
5652
5653 /* Tried once already, restore input pointers. */
5654 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005655 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005656
5657 /* Repeat until we found a position where it could match. */
5658 for (;;)
5659 {
5660 if (status != RA_BREAK)
5661 {
5662 /* Tried first position already, advance. */
5663 if (rp->rs_state == RS_STAR_LONG)
5664 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005665 /* Trying for longest match, but couldn't or
5666 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005667 if (--rst->count < rst->minval)
5668 break;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005669 if (rex.input == rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005670 {
5671 /* backup to last char of previous line */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005672 --rex.lnum;
5673 rex.line = reg_getline(rex.lnum);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005674 /* Just in case regrepeat() didn't count
5675 * right. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005676 if (rex.line == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005677 break;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005678 rex.input = rex.line + STRLEN(rex.line);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005679 fast_breakcheck();
5680 }
5681 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02005682 MB_PTR_BACK(rex.line, rex.input);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005683 }
5684 else
5685 {
5686 /* Range is backwards, use shortest match first.
5687 * Careful: maxval and minval are exchanged!
5688 * Couldn't or didn't match: try advancing one
5689 * char. */
5690 if (rst->count == rst->minval
5691 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5692 break;
5693 ++rst->count;
5694 }
5695 if (got_int)
5696 break;
5697 }
5698 else
5699 status = RA_NOMATCH;
5700
5701 /* If it could match, try it. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005702 if (rst->nextb == NUL || *rex.input == rst->nextb
5703 || *rex.input == rst->nextb_ic)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005704 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005705 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005706 scan = regnext(rp->rs_scan);
5707 status = RA_CONT;
5708 break;
5709 }
5710 }
5711 if (status != RA_CONT)
5712 {
5713 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005714 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005715 regstack.ga_len -= sizeof(regstar_T);
5716 status = RA_NOMATCH;
5717 }
5718 }
5719 break;
5720 }
5721
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005722 /* If we want to continue the inner loop or didn't pop a state
5723 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005724 if (status == RA_CONT || rp == (regitem_T *)
5725 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5726 break;
5727 }
5728
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005729 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005730 if (status == RA_CONT)
5731 continue;
5732
5733 /*
5734 * If the regstack is empty or something failed we are done.
5735 */
5736 if (regstack.ga_len == 0 || status == RA_FAIL)
5737 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005738 if (scan == NULL)
5739 {
5740 /*
5741 * We get here only if there's trouble -- normally "case END" is
5742 * the terminating point.
5743 */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005744 emsg(_(e_re_corr));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005745#ifdef DEBUG
5746 printf("Premature EOL\n");
5747#endif
5748 }
5749 return (status == RA_MATCH);
5750 }
5751
5752 } /* End of loop until the regstack is empty. */
5753
5754 /* NOTREACHED */
5755}
5756
5757/*
5758 * Push an item onto the regstack.
5759 * Returns pointer to new item. Returns NULL when out of memory.
5760 */
5761 static regitem_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01005762regstack_push(regstate_T state, char_u *scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005763{
5764 regitem_T *rp;
5765
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005766 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005767 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005768 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005769 return NULL;
5770 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005771 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005772 return NULL;
5773
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005774 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005775 rp->rs_state = state;
5776 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005777
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005778 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005779 return rp;
5780}
5781
5782/*
5783 * Pop an item from the regstack.
5784 */
5785 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01005786regstack_pop(char_u **scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005787{
5788 regitem_T *rp;
5789
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005790 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005791 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005792
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005793 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005794}
5795
Bram Moolenaar071d4272004-06-13 20:20:40 +00005796/*
5797 * regrepeat - repeatedly match something simple, return how many.
Bram Moolenaar0270f382018-07-17 05:43:58 +02005798 * Advances rex.input (and rex.lnum) to just after the matched chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005799 */
5800 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01005801regrepeat(
5802 char_u *p,
5803 long maxcount) /* maximum number of matches allowed */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005804{
5805 long count = 0;
5806 char_u *scan;
5807 char_u *opnd;
5808 int mask;
5809 int testval = 0;
5810
Bram Moolenaar0270f382018-07-17 05:43:58 +02005811 scan = rex.input; /* Make local copy of rex.input for speed. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005812 opnd = OPERAND(p);
5813 switch (OP(p))
5814 {
5815 case ANY:
5816 case ANY + ADD_NL:
5817 while (count < maxcount)
5818 {
5819 /* Matching anything means we continue until end-of-line (or
5820 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5821 while (*scan != NUL && count < maxcount)
5822 {
5823 ++count;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005824 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005825 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02005826 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005827 || rex.reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005828 break;
5829 ++count; /* count the line-break */
5830 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005831 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005832 if (got_int)
5833 break;
5834 }
5835 break;
5836
5837 case IDENT:
5838 case IDENT + ADD_NL:
5839 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005840 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005841 case SIDENT:
5842 case SIDENT + ADD_NL:
5843 while (count < maxcount)
5844 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005845 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005846 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005847 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005848 }
5849 else if (*scan == NUL)
5850 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005851 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005852 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005853 break;
5854 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005855 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005856 if (got_int)
5857 break;
5858 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005859 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005860 ++scan;
5861 else
5862 break;
5863 ++count;
5864 }
5865 break;
5866
5867 case KWORD:
5868 case KWORD + ADD_NL:
5869 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005870 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005871 case SKWORD:
5872 case SKWORD + ADD_NL:
5873 while (count < maxcount)
5874 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005875 if (vim_iswordp_buf(scan, rex.reg_buf)
Bram Moolenaarf813a182013-01-30 13:59:37 +01005876 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005877 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005878 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005879 }
5880 else if (*scan == NUL)
5881 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005882 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005883 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005884 break;
5885 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005886 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005887 if (got_int)
5888 break;
5889 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005890 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005891 ++scan;
5892 else
5893 break;
5894 ++count;
5895 }
5896 break;
5897
5898 case FNAME:
5899 case FNAME + ADD_NL:
5900 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005901 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005902 case SFNAME:
5903 case SFNAME + ADD_NL:
5904 while (count < maxcount)
5905 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005906 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005907 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005908 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005909 }
5910 else if (*scan == NUL)
5911 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005912 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005913 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005914 break;
5915 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005916 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005917 if (got_int)
5918 break;
5919 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005920 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005921 ++scan;
5922 else
5923 break;
5924 ++count;
5925 }
5926 break;
5927
5928 case PRINT:
5929 case PRINT + ADD_NL:
5930 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005931 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005932 case SPRINT:
5933 case SPRINT + ADD_NL:
5934 while (count < maxcount)
5935 {
5936 if (*scan == NUL)
5937 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005938 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005939 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005940 break;
5941 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005942 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005943 if (got_int)
5944 break;
5945 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005946 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5947 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005948 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005949 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005950 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005951 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005952 ++scan;
5953 else
5954 break;
5955 ++count;
5956 }
5957 break;
5958
5959 case WHITE:
5960 case WHITE + ADD_NL:
5961 testval = mask = RI_WHITE;
5962do_class:
5963 while (count < maxcount)
5964 {
5965#ifdef FEAT_MBYTE
5966 int l;
5967#endif
5968 if (*scan == NUL)
5969 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005970 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005971 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005972 break;
5973 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005974 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005975 if (got_int)
5976 break;
5977 }
5978#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005979 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005980 {
5981 if (testval != 0)
5982 break;
5983 scan += l;
5984 }
5985#endif
5986 else if ((class_tab[*scan] & mask) == testval)
5987 ++scan;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005988 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005989 ++scan;
5990 else
5991 break;
5992 ++count;
5993 }
5994 break;
5995
5996 case NWHITE:
5997 case NWHITE + ADD_NL:
5998 mask = RI_WHITE;
5999 goto do_class;
6000 case DIGIT:
6001 case DIGIT + ADD_NL:
6002 testval = mask = RI_DIGIT;
6003 goto do_class;
6004 case NDIGIT:
6005 case NDIGIT + ADD_NL:
6006 mask = RI_DIGIT;
6007 goto do_class;
6008 case HEX:
6009 case HEX + ADD_NL:
6010 testval = mask = RI_HEX;
6011 goto do_class;
6012 case NHEX:
6013 case NHEX + ADD_NL:
6014 mask = RI_HEX;
6015 goto do_class;
6016 case OCTAL:
6017 case OCTAL + ADD_NL:
6018 testval = mask = RI_OCTAL;
6019 goto do_class;
6020 case NOCTAL:
6021 case NOCTAL + ADD_NL:
6022 mask = RI_OCTAL;
6023 goto do_class;
6024 case WORD:
6025 case WORD + ADD_NL:
6026 testval = mask = RI_WORD;
6027 goto do_class;
6028 case NWORD:
6029 case NWORD + ADD_NL:
6030 mask = RI_WORD;
6031 goto do_class;
6032 case HEAD:
6033 case HEAD + ADD_NL:
6034 testval = mask = RI_HEAD;
6035 goto do_class;
6036 case NHEAD:
6037 case NHEAD + ADD_NL:
6038 mask = RI_HEAD;
6039 goto do_class;
6040 case ALPHA:
6041 case ALPHA + ADD_NL:
6042 testval = mask = RI_ALPHA;
6043 goto do_class;
6044 case NALPHA:
6045 case NALPHA + ADD_NL:
6046 mask = RI_ALPHA;
6047 goto do_class;
6048 case LOWER:
6049 case LOWER + ADD_NL:
6050 testval = mask = RI_LOWER;
6051 goto do_class;
6052 case NLOWER:
6053 case NLOWER + ADD_NL:
6054 mask = RI_LOWER;
6055 goto do_class;
6056 case UPPER:
6057 case UPPER + ADD_NL:
6058 testval = mask = RI_UPPER;
6059 goto do_class;
6060 case NUPPER:
6061 case NUPPER + ADD_NL:
6062 mask = RI_UPPER;
6063 goto do_class;
6064
6065 case EXACTLY:
6066 {
6067 int cu, cl;
6068
6069 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006070 * would have been used for it. It does handle single-byte
6071 * characters, such as latin1. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006072 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006073 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006074 cu = MB_TOUPPER(*opnd);
6075 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006076 while (count < maxcount && (*scan == cu || *scan == cl))
6077 {
6078 count++;
6079 scan++;
6080 }
6081 }
6082 else
6083 {
6084 cu = *opnd;
6085 while (count < maxcount && *scan == cu)
6086 {
6087 count++;
6088 scan++;
6089 }
6090 }
6091 break;
6092 }
6093
6094#ifdef FEAT_MBYTE
6095 case MULTIBYTECODE:
6096 {
6097 int i, len, cf = 0;
6098
6099 /* Safety check (just in case 'encoding' was changed since
6100 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006101 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006102 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006103 if (rex.reg_ic && enc_utf8)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006104 cf = utf_fold(utf_ptr2char(opnd));
Bram Moolenaar069dd082015-05-04 09:56:49 +02006105 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006106 {
6107 for (i = 0; i < len; ++i)
6108 if (opnd[i] != scan[i])
6109 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006110 if (i < len && (!rex.reg_ic || !enc_utf8
Bram Moolenaar071d4272004-06-13 20:20:40 +00006111 || utf_fold(utf_ptr2char(scan)) != cf))
6112 break;
6113 scan += len;
6114 ++count;
6115 }
6116 }
6117 }
6118 break;
6119#endif
6120
6121 case ANYOF:
6122 case ANYOF + ADD_NL:
6123 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02006124 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006125
6126 case ANYBUT:
6127 case ANYBUT + ADD_NL:
6128 while (count < maxcount)
6129 {
6130#ifdef FEAT_MBYTE
6131 int len;
6132#endif
6133 if (*scan == NUL)
6134 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006135 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02006136 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006137 break;
6138 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02006139 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006140 if (got_int)
6141 break;
6142 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02006143 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006144 ++scan;
6145#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006146 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006147 {
6148 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6149 break;
6150 scan += len;
6151 }
6152#endif
6153 else
6154 {
6155 if ((cstrchr(opnd, *scan) == NULL) == testval)
6156 break;
6157 ++scan;
6158 }
6159 ++count;
6160 }
6161 break;
6162
6163 case NEWL:
6164 while (count < maxcount
Bram Moolenaar0270f382018-07-17 05:43:58 +02006165 && ((*scan == NUL && rex.lnum <= rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02006166 && !rex.reg_line_lbr && REG_MULTI)
6167 || (*scan == '\n' && rex.reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006168 {
6169 count++;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006170 if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006171 ADVANCE_REGINPUT();
6172 else
6173 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02006174 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006175 if (got_int)
6176 break;
6177 }
6178 break;
6179
6180 default: /* Oh dear. Called inappropriately. */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006181 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006182#ifdef DEBUG
6183 printf("Called regrepeat with op code %d\n", OP(p));
6184#endif
6185 break;
6186 }
6187
Bram Moolenaar0270f382018-07-17 05:43:58 +02006188 rex.input = scan;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006189
6190 return (int)count;
6191}
6192
6193/*
6194 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006195 * Returns NULL when calculating size, when there is no next item and when
6196 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006197 */
6198 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006199regnext(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006200{
6201 int offset;
6202
Bram Moolenaard3005802009-11-25 17:21:32 +00006203 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006204 return NULL;
6205
6206 offset = NEXT(p);
6207 if (offset == 0)
6208 return NULL;
6209
Bram Moolenaar582fd852005-03-28 20:58:01 +00006210 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006211 return p - offset;
6212 else
6213 return p + offset;
6214}
6215
6216/*
6217 * Check the regexp program for its magic number.
6218 * Return TRUE if it's wrong.
6219 */
6220 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006221prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006222{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006223 regprog_T *prog;
6224
Bram Moolenaar6100d022016-10-02 16:51:57 +02006225 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006226 if (prog->engine == &nfa_regengine)
6227 /* For NFA matcher we don't check the magic */
6228 return FALSE;
6229
6230 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006231 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006232 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006233 return TRUE;
6234 }
6235 return FALSE;
6236}
6237
6238/*
6239 * Cleanup the subexpressions, if this wasn't done yet.
6240 * This construction is used to clear the subexpressions only when they are
6241 * used (to increase speed).
6242 */
6243 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006244cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006245{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006246 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006247 {
6248 if (REG_MULTI)
6249 {
6250 /* Use 0xff to set lnum to -1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006251 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6252 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006253 }
6254 else
6255 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006256 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6257 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006258 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006259 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006260 }
6261}
6262
6263#ifdef FEAT_SYN_HL
6264 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006265cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006266{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006267 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006268 {
6269 if (REG_MULTI)
6270 {
6271 /* Use 0xff to set lnum to -1 */
6272 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6273 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6274 }
6275 else
6276 {
6277 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6278 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6279 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006280 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006281 }
6282}
6283#endif
6284
6285/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006286 * Save the current subexpr to "bp", so that they can be restored
6287 * later by restore_subexpr().
6288 */
6289 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006290save_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006291{
6292 int i;
6293
Bram Moolenaar0270f382018-07-17 05:43:58 +02006294 /* When "rex.need_clear_subexpr" is set we don't need to save the values, only
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006295 * remember that this flag needs to be set again when restoring. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006296 bp->save_need_clear_subexpr = rex.need_clear_subexpr;
6297 if (!rex.need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006298 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006299 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006300 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006301 if (REG_MULTI)
6302 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006303 bp->save_start[i].se_u.pos = rex.reg_startpos[i];
6304 bp->save_end[i].se_u.pos = rex.reg_endpos[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006305 }
6306 else
6307 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006308 bp->save_start[i].se_u.ptr = rex.reg_startp[i];
6309 bp->save_end[i].se_u.ptr = rex.reg_endp[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006310 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006311 }
6312 }
6313}
6314
6315/*
6316 * Restore the subexpr from "bp".
6317 */
6318 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006319restore_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006320{
6321 int i;
6322
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006323 /* Only need to restore saved values when they are not to be cleared. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006324 rex.need_clear_subexpr = bp->save_need_clear_subexpr;
6325 if (!rex.need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006326 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006327 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006328 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006329 if (REG_MULTI)
6330 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006331 rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
6332 rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006333 }
6334 else
6335 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006336 rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
6337 rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006338 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006339 }
6340 }
6341}
6342
6343/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02006344 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006345 */
6346 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006347reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006348{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006349 rex.line = reg_getline(++rex.lnum);
6350 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006351 fast_breakcheck();
6352}
6353
6354/*
6355 * Save the input line and position in a regsave_T.
6356 */
6357 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006358reg_save(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006359{
6360 if (REG_MULTI)
6361 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006362 save->rs_u.pos.col = (colnr_T)(rex.input - rex.line);
6363 save->rs_u.pos.lnum = rex.lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006364 }
6365 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02006366 save->rs_u.ptr = rex.input;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006367 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006368}
6369
6370/*
6371 * Restore the input line and position from a regsave_T.
6372 */
6373 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006374reg_restore(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006375{
6376 if (REG_MULTI)
6377 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006378 if (rex.lnum != save->rs_u.pos.lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006379 {
6380 /* only call reg_getline() when the line number changed to save
6381 * a bit of time */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006382 rex.lnum = save->rs_u.pos.lnum;
6383 rex.line = reg_getline(rex.lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006384 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006385 rex.input = rex.line + save->rs_u.pos.col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006386 }
6387 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02006388 rex.input = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006389 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006390}
6391
6392/*
6393 * Return TRUE if current position is equal to saved position.
6394 */
6395 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006396reg_save_equal(regsave_T *save)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006397{
6398 if (REG_MULTI)
Bram Moolenaar0270f382018-07-17 05:43:58 +02006399 return rex.lnum == save->rs_u.pos.lnum
6400 && rex.input == rex.line + save->rs_u.pos.col;
6401 return rex.input == save->rs_u.ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006402}
6403
6404/*
6405 * Tentatively set the sub-expression start to the current position (after
6406 * calling regmatch() they will have changed). Need to save the existing
6407 * values for when there is no match.
6408 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6409 * depending on REG_MULTI.
6410 */
6411 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006412save_se_multi(save_se_T *savep, lpos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006413{
6414 savep->se_u.pos = *posp;
Bram Moolenaar0270f382018-07-17 05:43:58 +02006415 posp->lnum = rex.lnum;
6416 posp->col = (colnr_T)(rex.input - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006417}
6418
6419 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006420save_se_one(save_se_T *savep, char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006421{
6422 savep->se_u.ptr = *pp;
Bram Moolenaar0270f382018-07-17 05:43:58 +02006423 *pp = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006424}
6425
6426/*
6427 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6428 */
6429 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006430re_num_cmp(long_u val, char_u *scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006431{
6432 long_u n = OPERAND_MIN(scan);
6433
6434 if (OPERAND_CMP(scan) == '>')
6435 return val > n;
6436 if (OPERAND_CMP(scan) == '<')
6437 return val < n;
6438 return val == n;
6439}
6440
Bram Moolenaar580abea2013-06-14 20:31:28 +02006441/*
6442 * Check whether a backreference matches.
6443 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006444 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6445 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006446 */
6447 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006448match_with_backref(
6449 linenr_T start_lnum,
6450 colnr_T start_col,
6451 linenr_T end_lnum,
6452 colnr_T end_col,
6453 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006454{
6455 linenr_T clnum = start_lnum;
6456 colnr_T ccol = start_col;
6457 int len;
6458 char_u *p;
6459
6460 if (bytelen != NULL)
6461 *bytelen = 0;
6462 for (;;)
6463 {
6464 /* Since getting one line may invalidate the other, need to make copy.
6465 * Slow! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006466 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006467 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006468 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02006469 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6470 {
6471 len += 50; /* get some extra */
6472 vim_free(reg_tofree);
6473 reg_tofree = alloc(len);
6474 if (reg_tofree == NULL)
6475 return RA_FAIL; /* out of memory!*/
6476 reg_tofreelen = len;
6477 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006478 STRCPY(reg_tofree, rex.line);
6479 rex.input = reg_tofree + (rex.input - rex.line);
6480 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006481 }
6482
6483 /* Get the line to compare with. */
6484 p = reg_getline(clnum);
6485 if (clnum == end_lnum)
6486 len = end_col - ccol;
6487 else
6488 len = (int)STRLEN(p + ccol);
6489
Bram Moolenaar0270f382018-07-17 05:43:58 +02006490 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006491 return RA_NOMATCH; /* doesn't match */
6492 if (bytelen != NULL)
6493 *bytelen += len;
6494 if (clnum == end_lnum)
6495 break; /* match and at end! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006496 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006497 return RA_NOMATCH; /* text too short */
6498
6499 /* Advance to next line. */
6500 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006501 if (bytelen != NULL)
6502 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006503 ++clnum;
6504 ccol = 0;
6505 if (got_int)
6506 return RA_FAIL;
6507 }
6508
Bram Moolenaar0270f382018-07-17 05:43:58 +02006509 /* found a match! Note that rex.line may now point to a copy of the line,
Bram Moolenaar580abea2013-06-14 20:31:28 +02006510 * that should not matter. */
6511 return RA_MATCH;
6512}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006513
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006514#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006515
6516/*
6517 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6518 */
6519 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006520regdump(char_u *pattern, bt_regprog_T *r)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006521{
6522 char_u *s;
6523 int op = EXACTLY; /* Arbitrary non-END op. */
6524 char_u *next;
6525 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006526 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006527
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006528#ifdef BT_REGEXP_LOG
6529 f = fopen("bt_regexp_log.log", "a");
6530#else
6531 f = stdout;
6532#endif
6533 if (f == NULL)
6534 return;
6535 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006536
6537 s = r->program + 1;
6538 /*
6539 * Loop until we find the END that isn't before a referred next (an END
6540 * can also appear in a NOMATCH operand).
6541 */
6542 while (op != END || s <= end)
6543 {
6544 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006545 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006546 next = regnext(s);
6547 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006548 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006549 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006550 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006551 if (end < next)
6552 end = next;
6553 if (op == BRACE_LIMITS)
6554 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006555 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006556 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006557 s += 8;
6558 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006559 else if (op == BEHIND || op == NOBEHIND)
6560 {
6561 /* one int */
6562 fprintf(f, " count %ld", OPERAND_MIN(s));
6563 s += 4;
6564 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006565 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6566 {
6567 /* one int plus comperator */
6568 fprintf(f, " count %ld", OPERAND_MIN(s));
6569 s += 5;
6570 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006571 s += 3;
6572 if (op == ANYOF || op == ANYOF + ADD_NL
6573 || op == ANYBUT || op == ANYBUT + ADD_NL
6574 || op == EXACTLY)
6575 {
6576 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006577 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006578 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006579 fprintf(f, "%c", *s++);
6580 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006581 s++;
6582 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006583 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006584 }
6585
6586 /* Header fields of interest. */
6587 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006588 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006589 ? (char *)transchar(r->regstart)
6590 : "multibyte", r->regstart);
6591 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006592 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006593 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006594 fprintf(f, "must have \"%s\"", r->regmust);
6595 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006596
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006597#ifdef BT_REGEXP_LOG
6598 fclose(f);
6599#endif
6600}
6601#endif /* BT_REGEXP_DUMP */
6602
6603#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006604/*
6605 * regprop - printable representation of opcode
6606 */
6607 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006608regprop(char_u *op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006609{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006610 char *p;
6611 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006612
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006613 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006614
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006615 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006616 {
6617 case BOL:
6618 p = "BOL";
6619 break;
6620 case EOL:
6621 p = "EOL";
6622 break;
6623 case RE_BOF:
6624 p = "BOF";
6625 break;
6626 case RE_EOF:
6627 p = "EOF";
6628 break;
6629 case CURSOR:
6630 p = "CURSOR";
6631 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006632 case RE_VISUAL:
6633 p = "RE_VISUAL";
6634 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006635 case RE_LNUM:
6636 p = "RE_LNUM";
6637 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006638 case RE_MARK:
6639 p = "RE_MARK";
6640 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006641 case RE_COL:
6642 p = "RE_COL";
6643 break;
6644 case RE_VCOL:
6645 p = "RE_VCOL";
6646 break;
6647 case BOW:
6648 p = "BOW";
6649 break;
6650 case EOW:
6651 p = "EOW";
6652 break;
6653 case ANY:
6654 p = "ANY";
6655 break;
6656 case ANY + ADD_NL:
6657 p = "ANY+NL";
6658 break;
6659 case ANYOF:
6660 p = "ANYOF";
6661 break;
6662 case ANYOF + ADD_NL:
6663 p = "ANYOF+NL";
6664 break;
6665 case ANYBUT:
6666 p = "ANYBUT";
6667 break;
6668 case ANYBUT + ADD_NL:
6669 p = "ANYBUT+NL";
6670 break;
6671 case IDENT:
6672 p = "IDENT";
6673 break;
6674 case IDENT + ADD_NL:
6675 p = "IDENT+NL";
6676 break;
6677 case SIDENT:
6678 p = "SIDENT";
6679 break;
6680 case SIDENT + ADD_NL:
6681 p = "SIDENT+NL";
6682 break;
6683 case KWORD:
6684 p = "KWORD";
6685 break;
6686 case KWORD + ADD_NL:
6687 p = "KWORD+NL";
6688 break;
6689 case SKWORD:
6690 p = "SKWORD";
6691 break;
6692 case SKWORD + ADD_NL:
6693 p = "SKWORD+NL";
6694 break;
6695 case FNAME:
6696 p = "FNAME";
6697 break;
6698 case FNAME + ADD_NL:
6699 p = "FNAME+NL";
6700 break;
6701 case SFNAME:
6702 p = "SFNAME";
6703 break;
6704 case SFNAME + ADD_NL:
6705 p = "SFNAME+NL";
6706 break;
6707 case PRINT:
6708 p = "PRINT";
6709 break;
6710 case PRINT + ADD_NL:
6711 p = "PRINT+NL";
6712 break;
6713 case SPRINT:
6714 p = "SPRINT";
6715 break;
6716 case SPRINT + ADD_NL:
6717 p = "SPRINT+NL";
6718 break;
6719 case WHITE:
6720 p = "WHITE";
6721 break;
6722 case WHITE + ADD_NL:
6723 p = "WHITE+NL";
6724 break;
6725 case NWHITE:
6726 p = "NWHITE";
6727 break;
6728 case NWHITE + ADD_NL:
6729 p = "NWHITE+NL";
6730 break;
6731 case DIGIT:
6732 p = "DIGIT";
6733 break;
6734 case DIGIT + ADD_NL:
6735 p = "DIGIT+NL";
6736 break;
6737 case NDIGIT:
6738 p = "NDIGIT";
6739 break;
6740 case NDIGIT + ADD_NL:
6741 p = "NDIGIT+NL";
6742 break;
6743 case HEX:
6744 p = "HEX";
6745 break;
6746 case HEX + ADD_NL:
6747 p = "HEX+NL";
6748 break;
6749 case NHEX:
6750 p = "NHEX";
6751 break;
6752 case NHEX + ADD_NL:
6753 p = "NHEX+NL";
6754 break;
6755 case OCTAL:
6756 p = "OCTAL";
6757 break;
6758 case OCTAL + ADD_NL:
6759 p = "OCTAL+NL";
6760 break;
6761 case NOCTAL:
6762 p = "NOCTAL";
6763 break;
6764 case NOCTAL + ADD_NL:
6765 p = "NOCTAL+NL";
6766 break;
6767 case WORD:
6768 p = "WORD";
6769 break;
6770 case WORD + ADD_NL:
6771 p = "WORD+NL";
6772 break;
6773 case NWORD:
6774 p = "NWORD";
6775 break;
6776 case NWORD + ADD_NL:
6777 p = "NWORD+NL";
6778 break;
6779 case HEAD:
6780 p = "HEAD";
6781 break;
6782 case HEAD + ADD_NL:
6783 p = "HEAD+NL";
6784 break;
6785 case NHEAD:
6786 p = "NHEAD";
6787 break;
6788 case NHEAD + ADD_NL:
6789 p = "NHEAD+NL";
6790 break;
6791 case ALPHA:
6792 p = "ALPHA";
6793 break;
6794 case ALPHA + ADD_NL:
6795 p = "ALPHA+NL";
6796 break;
6797 case NALPHA:
6798 p = "NALPHA";
6799 break;
6800 case NALPHA + ADD_NL:
6801 p = "NALPHA+NL";
6802 break;
6803 case LOWER:
6804 p = "LOWER";
6805 break;
6806 case LOWER + ADD_NL:
6807 p = "LOWER+NL";
6808 break;
6809 case NLOWER:
6810 p = "NLOWER";
6811 break;
6812 case NLOWER + ADD_NL:
6813 p = "NLOWER+NL";
6814 break;
6815 case UPPER:
6816 p = "UPPER";
6817 break;
6818 case UPPER + ADD_NL:
6819 p = "UPPER+NL";
6820 break;
6821 case NUPPER:
6822 p = "NUPPER";
6823 break;
6824 case NUPPER + ADD_NL:
6825 p = "NUPPER+NL";
6826 break;
6827 case BRANCH:
6828 p = "BRANCH";
6829 break;
6830 case EXACTLY:
6831 p = "EXACTLY";
6832 break;
6833 case NOTHING:
6834 p = "NOTHING";
6835 break;
6836 case BACK:
6837 p = "BACK";
6838 break;
6839 case END:
6840 p = "END";
6841 break;
6842 case MOPEN + 0:
6843 p = "MATCH START";
6844 break;
6845 case MOPEN + 1:
6846 case MOPEN + 2:
6847 case MOPEN + 3:
6848 case MOPEN + 4:
6849 case MOPEN + 5:
6850 case MOPEN + 6:
6851 case MOPEN + 7:
6852 case MOPEN + 8:
6853 case MOPEN + 9:
6854 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6855 p = NULL;
6856 break;
6857 case MCLOSE + 0:
6858 p = "MATCH END";
6859 break;
6860 case MCLOSE + 1:
6861 case MCLOSE + 2:
6862 case MCLOSE + 3:
6863 case MCLOSE + 4:
6864 case MCLOSE + 5:
6865 case MCLOSE + 6:
6866 case MCLOSE + 7:
6867 case MCLOSE + 8:
6868 case MCLOSE + 9:
6869 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6870 p = NULL;
6871 break;
6872 case BACKREF + 1:
6873 case BACKREF + 2:
6874 case BACKREF + 3:
6875 case BACKREF + 4:
6876 case BACKREF + 5:
6877 case BACKREF + 6:
6878 case BACKREF + 7:
6879 case BACKREF + 8:
6880 case BACKREF + 9:
6881 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6882 p = NULL;
6883 break;
6884 case NOPEN:
6885 p = "NOPEN";
6886 break;
6887 case NCLOSE:
6888 p = "NCLOSE";
6889 break;
6890#ifdef FEAT_SYN_HL
6891 case ZOPEN + 1:
6892 case ZOPEN + 2:
6893 case ZOPEN + 3:
6894 case ZOPEN + 4:
6895 case ZOPEN + 5:
6896 case ZOPEN + 6:
6897 case ZOPEN + 7:
6898 case ZOPEN + 8:
6899 case ZOPEN + 9:
6900 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6901 p = NULL;
6902 break;
6903 case ZCLOSE + 1:
6904 case ZCLOSE + 2:
6905 case ZCLOSE + 3:
6906 case ZCLOSE + 4:
6907 case ZCLOSE + 5:
6908 case ZCLOSE + 6:
6909 case ZCLOSE + 7:
6910 case ZCLOSE + 8:
6911 case ZCLOSE + 9:
6912 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6913 p = NULL;
6914 break;
6915 case ZREF + 1:
6916 case ZREF + 2:
6917 case ZREF + 3:
6918 case ZREF + 4:
6919 case ZREF + 5:
6920 case ZREF + 6:
6921 case ZREF + 7:
6922 case ZREF + 8:
6923 case ZREF + 9:
6924 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6925 p = NULL;
6926 break;
6927#endif
6928 case STAR:
6929 p = "STAR";
6930 break;
6931 case PLUS:
6932 p = "PLUS";
6933 break;
6934 case NOMATCH:
6935 p = "NOMATCH";
6936 break;
6937 case MATCH:
6938 p = "MATCH";
6939 break;
6940 case BEHIND:
6941 p = "BEHIND";
6942 break;
6943 case NOBEHIND:
6944 p = "NOBEHIND";
6945 break;
6946 case SUBPAT:
6947 p = "SUBPAT";
6948 break;
6949 case BRACE_LIMITS:
6950 p = "BRACE_LIMITS";
6951 break;
6952 case BRACE_SIMPLE:
6953 p = "BRACE_SIMPLE";
6954 break;
6955 case BRACE_COMPLEX + 0:
6956 case BRACE_COMPLEX + 1:
6957 case BRACE_COMPLEX + 2:
6958 case BRACE_COMPLEX + 3:
6959 case BRACE_COMPLEX + 4:
6960 case BRACE_COMPLEX + 5:
6961 case BRACE_COMPLEX + 6:
6962 case BRACE_COMPLEX + 7:
6963 case BRACE_COMPLEX + 8:
6964 case BRACE_COMPLEX + 9:
6965 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6966 p = NULL;
6967 break;
6968#ifdef FEAT_MBYTE
6969 case MULTIBYTECODE:
6970 p = "MULTIBYTECODE";
6971 break;
6972#endif
6973 case NEWL:
6974 p = "NEWL";
6975 break;
6976 default:
6977 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6978 p = NULL;
6979 break;
6980 }
6981 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006982 STRCAT(buf, p);
6983 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006984}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006985#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006986
Bram Moolenaarfb031402014-09-09 17:18:49 +02006987/*
6988 * Used in a place where no * or \+ can follow.
6989 */
6990 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006991re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02006992{
6993 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01006994 {
6995 semsg(_("E888: (NFA regexp) cannot repeat %s"), what);
6996 rc_did_emsg = TRUE;
6997 return FAIL;
6998 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02006999 return OK;
7000}
7001
Bram Moolenaar071d4272004-06-13 20:20:40 +00007002#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00007003typedef struct
7004{
7005 int a, b, c;
7006} decomp_T;
7007
7008
7009/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00007010static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00007011{
7012 {0x5e2,0,0}, /* 0xfb20 alt ayin */
7013 {0x5d0,0,0}, /* 0xfb21 alt alef */
7014 {0x5d3,0,0}, /* 0xfb22 alt dalet */
7015 {0x5d4,0,0}, /* 0xfb23 alt he */
7016 {0x5db,0,0}, /* 0xfb24 alt kaf */
7017 {0x5dc,0,0}, /* 0xfb25 alt lamed */
7018 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
7019 {0x5e8,0,0}, /* 0xfb27 alt resh */
7020 {0x5ea,0,0}, /* 0xfb28 alt tav */
7021 {'+', 0, 0}, /* 0xfb29 alt plus */
7022 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
7023 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
7024 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
7025 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
7026 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
7027 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
7028 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
7029 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
7030 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
7031 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
7032 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
7033 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
7034 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
7035 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
7036 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
7037 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
7038 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
7039 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
7040 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
7041 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
7042 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
7043 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
7044 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
7045 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
7046 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
7047 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
7048 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
7049 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
7050 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
7051 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
7052 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7053 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7054 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7055 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7056 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7057 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7058 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7059 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7060};
7061
7062 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01007063mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007064{
7065 decomp_T d;
7066
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007067 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007068 {
7069 d = decomp_table[c - 0xfb20];
7070 *c1 = d.a;
7071 *c2 = d.b;
7072 *c3 = d.c;
7073 }
7074 else
7075 {
7076 *c1 = c;
7077 *c2 = *c3 = 0;
7078 }
7079}
7080#endif
7081
7082/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02007083 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00007084 * Return 0 if strings match, non-zero otherwise.
7085 * Correct the length "*n" when composing characters are ignored.
7086 */
7087 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007088cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007089{
7090 int result;
7091
Bram Moolenaar6100d022016-10-02 16:51:57 +02007092 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007093 result = STRNCMP(s1, s2, *n);
7094 else
7095 result = MB_STRNICMP(s1, s2, *n);
7096
7097#ifdef FEAT_MBYTE
7098 /* if it failed and it's utf8 and we want to combineignore: */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007099 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007100 {
7101 char_u *str1, *str2;
7102 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007103 int junk;
7104
7105 /* we have to handle the strcmp ourselves, since it is necessary to
7106 * deal with the composing characters by ignoring them: */
7107 str1 = s1;
7108 str2 = s2;
7109 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007110 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007111 {
7112 c1 = mb_ptr2char_adv(&str1);
7113 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007114
7115 /* decompose the character if necessary, into 'base' characters
7116 * because I don't care about Arabic, I will hard-code the Hebrew
7117 * which I *do* care about! So sue me... */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007118 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007119 {
7120 /* decomposition necessary? */
7121 mb_decompose(c1, &c11, &junk, &junk);
7122 mb_decompose(c2, &c12, &junk, &junk);
7123 c1 = c11;
7124 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007125 if (c11 != c12
7126 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007127 break;
7128 }
7129 }
7130 result = c2 - c1;
7131 if (result == 0)
7132 *n = (int)(str2 - s2);
7133 }
7134#endif
7135
7136 return result;
7137}
7138
7139/*
7140 * cstrchr: This function is used a lot for simple searches, keep it fast!
7141 */
7142 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007143cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007144{
7145 char_u *p;
7146 int cc;
7147
Bram Moolenaar6100d022016-10-02 16:51:57 +02007148 if (!rex.reg_ic
Bram Moolenaar071d4272004-06-13 20:20:40 +00007149#ifdef FEAT_MBYTE
7150 || (!enc_utf8 && mb_char2len(c) > 1)
7151#endif
7152 )
7153 return vim_strchr(s, c);
7154
7155 /* tolower() and toupper() can be slow, comparing twice should be a lot
7156 * faster (esp. when using MS Visual C++!).
7157 * For UTF-8 need to use folded case. */
7158#ifdef FEAT_MBYTE
7159 if (enc_utf8 && c > 0x80)
7160 cc = utf_fold(c);
7161 else
7162#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007163 if (MB_ISUPPER(c))
7164 cc = MB_TOLOWER(c);
7165 else if (MB_ISLOWER(c))
7166 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007167 else
7168 return vim_strchr(s, c);
7169
7170#ifdef FEAT_MBYTE
7171 if (has_mbyte)
7172 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007173 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007174 {
7175 if (enc_utf8 && c > 0x80)
7176 {
7177 if (utf_fold(utf_ptr2char(p)) == cc)
7178 return p;
7179 }
7180 else if (*p == c || *p == cc)
7181 return p;
7182 }
7183 }
7184 else
7185#endif
7186 /* Faster version for when there are no multi-byte characters. */
7187 for (p = s; *p != NUL; ++p)
7188 if (*p == c || *p == cc)
7189 return p;
7190
7191 return NULL;
7192}
7193
7194/***************************************************************
7195 * regsub stuff *
7196 ***************************************************************/
7197
Bram Moolenaar071d4272004-06-13 20:20:40 +00007198/*
7199 * We should define ftpr as a pointer to a function returning a pointer to
7200 * a function returning a pointer to a function ...
7201 * This is impossible, so we declare a pointer to a function returning a
7202 * pointer to a function returning void. This should work for all compilers.
7203 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007204typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007205
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007206static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007207
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007208 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007209do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007210{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007211 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007212
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007213 return (fptr_T)NULL;
7214}
7215
7216 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007217do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007218{
7219 *d = MB_TOUPPER(c);
7220
7221 return (fptr_T)do_Upper;
7222}
7223
7224 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007225do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007226{
7227 *d = MB_TOLOWER(c);
7228
7229 return (fptr_T)NULL;
7230}
7231
7232 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007233do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007234{
7235 *d = MB_TOLOWER(c);
7236
7237 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007238}
7239
7240/*
7241 * regtilde(): Replace tildes in the pattern by the old pattern.
7242 *
7243 * Short explanation of the tilde: It stands for the previous replacement
7244 * pattern. If that previous pattern also contains a ~ we should go back a
7245 * step further... But we insert the previous pattern into the current one
7246 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007247 * This still does not handle the case where "magic" changes. So require the
7248 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007249 *
7250 * The tildes are parsed once before the first call to vim_regsub().
7251 */
7252 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007253regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007254{
7255 char_u *newsub = source;
7256 char_u *tmpsub;
7257 char_u *p;
7258 int len;
7259 int prevlen;
7260
7261 for (p = newsub; *p; ++p)
7262 {
7263 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7264 {
7265 if (reg_prev_sub != NULL)
7266 {
7267 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7268 prevlen = (int)STRLEN(reg_prev_sub);
7269 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7270 if (tmpsub != NULL)
7271 {
7272 /* copy prefix */
7273 len = (int)(p - newsub); /* not including ~ */
7274 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007275 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007276 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7277 /* copy postfix */
7278 if (!magic)
7279 ++p; /* back off \ */
7280 STRCPY(tmpsub + len + prevlen, p + 1);
7281
7282 if (newsub != source) /* already allocated newsub */
7283 vim_free(newsub);
7284 newsub = tmpsub;
7285 p = newsub + len + prevlen;
7286 }
7287 }
7288 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007289 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007290 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007291 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007292 --p;
7293 }
7294 else
7295 {
7296 if (*p == '\\' && p[1]) /* skip escaped characters */
7297 ++p;
7298#ifdef FEAT_MBYTE
7299 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007300 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007301#endif
7302 }
7303 }
7304
7305 vim_free(reg_prev_sub);
7306 if (newsub != source) /* newsub was allocated, just keep it */
7307 reg_prev_sub = newsub;
7308 else /* no ~ found, need to save newsub */
7309 reg_prev_sub = vim_strsave(newsub);
7310 return newsub;
7311}
7312
7313#ifdef FEAT_EVAL
7314static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7315
Bram Moolenaar6100d022016-10-02 16:51:57 +02007316/* These pointers are used for reg_submatch(). Needed for when the
7317 * substitution string is an expression that contains a call to substitute()
7318 * and submatch(). */
7319typedef struct {
7320 regmatch_T *sm_match;
7321 regmmatch_T *sm_mmatch;
7322 linenr_T sm_firstlnum;
7323 linenr_T sm_maxline;
7324 int sm_line_lbr;
7325} regsubmatch_T;
7326
7327static regsubmatch_T rsm; /* can only be used when can_f_submatch is TRUE */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007328#endif
7329
7330#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007331
7332/*
7333 * Put the submatches in "argv[0]" which is a list passed into call_func() by
7334 * vim_regsub_both().
7335 */
7336 static int
7337fill_submatch_list(int argc UNUSED, typval_T *argv, int argcount)
7338{
7339 listitem_T *li;
7340 int i;
7341 char_u *s;
7342
7343 if (argcount == 0)
7344 /* called function doesn't take an argument */
7345 return 0;
7346
7347 /* Relies on sl_list to be the first item in staticList10_T. */
7348 init_static_list((staticList10_T *)(argv->vval.v_list));
7349
7350 /* There are always 10 list items in staticList10_T. */
7351 li = argv->vval.v_list->lv_first;
7352 for (i = 0; i < 10; ++i)
7353 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007354 s = rsm.sm_match->startp[i];
7355 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007356 s = NULL;
7357 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007358 s = vim_strnsave(s, (int)(rsm.sm_match->endp[i] - s));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007359 li->li_tv.v_type = VAR_STRING;
7360 li->li_tv.vval.v_string = s;
7361 li = li->li_next;
7362 }
7363 return 1;
7364}
7365
7366 static void
7367clear_submatch_list(staticList10_T *sl)
7368{
7369 int i;
7370
7371 for (i = 0; i < 10; ++i)
7372 vim_free(sl->sl_items[i].li_tv.vval.v_string);
7373}
7374
Bram Moolenaar071d4272004-06-13 20:20:40 +00007375/*
7376 * vim_regsub() - perform substitutions after a vim_regexec() or
7377 * vim_regexec_multi() match.
7378 *
7379 * If "copy" is TRUE really copy into "dest".
7380 * If "copy" is FALSE nothing is copied, this is just to find out the length
7381 * of the result.
7382 *
7383 * If "backslash" is TRUE, a backslash will be removed later, need to double
7384 * them to keep them, and insert a backslash before a CR to avoid it being
7385 * replaced with a line break later.
7386 *
7387 * Note: The matched text must not change between the call of
7388 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7389 * references invalid!
7390 *
7391 * Returns the size of the replacement, including terminating NUL.
7392 */
7393 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007394vim_regsub(
7395 regmatch_T *rmp,
7396 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007397 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007398 char_u *dest,
7399 int copy,
7400 int magic,
7401 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007402{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007403 int result;
7404 regexec_T rex_save;
7405 int rex_in_use_save = rex_in_use;
7406
7407 if (rex_in_use)
7408 /* Being called recursively, save the state. */
7409 rex_save = rex;
7410 rex_in_use = TRUE;
7411
7412 rex.reg_match = rmp;
7413 rex.reg_mmatch = NULL;
7414 rex.reg_maxline = 0;
7415 rex.reg_buf = curbuf;
7416 rex.reg_line_lbr = TRUE;
7417 result = vim_regsub_both(source, expr, dest, copy, magic, backslash);
7418
7419 rex_in_use = rex_in_use_save;
7420 if (rex_in_use)
7421 rex = rex_save;
7422
7423 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007424}
7425#endif
7426
7427 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007428vim_regsub_multi(
7429 regmmatch_T *rmp,
7430 linenr_T lnum,
7431 char_u *source,
7432 char_u *dest,
7433 int copy,
7434 int magic,
7435 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007436{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007437 int result;
7438 regexec_T rex_save;
7439 int rex_in_use_save = rex_in_use;
7440
7441 if (rex_in_use)
7442 /* Being called recursively, save the state. */
7443 rex_save = rex;
7444 rex_in_use = TRUE;
7445
7446 rex.reg_match = NULL;
7447 rex.reg_mmatch = rmp;
7448 rex.reg_buf = curbuf; /* always works on the current buffer! */
7449 rex.reg_firstlnum = lnum;
7450 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7451 rex.reg_line_lbr = FALSE;
7452 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash);
7453
7454 rex_in_use = rex_in_use_save;
7455 if (rex_in_use)
7456 rex = rex_save;
7457
7458 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007459}
7460
7461 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007462vim_regsub_both(
7463 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007464 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007465 char_u *dest,
7466 int copy,
7467 int magic,
7468 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007469{
7470 char_u *src;
7471 char_u *dst;
7472 char_u *s;
7473 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007474 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007475 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007476 fptr_T func_all = (fptr_T)NULL;
7477 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007478 linenr_T clnum = 0; /* init for GCC */
7479 int len = 0; /* init for GCC */
7480#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007481 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007482#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007483
7484 /* Be paranoid... */
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007485 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007486 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007487 emsg(_(e_null));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007488 return 0;
7489 }
7490 if (prog_magic_wrong())
7491 return 0;
7492 src = source;
7493 dst = dest;
7494
7495 /*
7496 * When the substitute part starts with "\=" evaluate it as an expression.
7497 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007498 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007499 {
7500#ifdef FEAT_EVAL
7501 /* To make sure that the length doesn't change between checking the
7502 * length and copying the string, and to speed up things, the
7503 * resulting string is saved from the call with "copy" == FALSE to the
7504 * call with "copy" == TRUE. */
7505 if (copy)
7506 {
7507 if (eval_result != NULL)
7508 {
7509 STRCPY(dest, eval_result);
7510 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01007511 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007512 }
7513 }
7514 else
7515 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007516 int prev_can_f_submatch = can_f_submatch;
7517 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007518
7519 vim_free(eval_result);
7520
7521 /* The expression may contain substitute(), which calls us
7522 * recursively. Make sure submatch() gets the text from the first
Bram Moolenaar6100d022016-10-02 16:51:57 +02007523 * level. */
7524 if (can_f_submatch)
7525 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007526 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007527 rsm.sm_match = rex.reg_match;
7528 rsm.sm_mmatch = rex.reg_mmatch;
7529 rsm.sm_firstlnum = rex.reg_firstlnum;
7530 rsm.sm_maxline = rex.reg_maxline;
7531 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007532
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007533 if (expr != NULL)
7534 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007535 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007536 int dummy;
7537 char_u buf[NUMBUFLEN];
7538 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007539 staticList10_T matchList;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007540
7541 rettv.v_type = VAR_STRING;
7542 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007543 argv[0].v_type = VAR_LIST;
7544 argv[0].vval.v_list = &matchList.sl_list;
7545 matchList.sl_list.lv_len = 0;
7546 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007547 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007548 s = expr->vval.v_string;
7549 call_func(s, (int)STRLEN(s), &rettv,
7550 1, argv, fill_submatch_list,
7551 0L, 0L, &dummy, TRUE, NULL, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007552 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007553 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007554 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007555 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007556
Bram Moolenaar6100d022016-10-02 16:51:57 +02007557 s = partial_name(partial);
7558 call_func(s, (int)STRLEN(s), &rettv,
7559 1, argv, fill_submatch_list,
7560 0L, 0L, &dummy, TRUE, partial, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007561 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007562 if (matchList.sl_list.lv_len > 0)
7563 /* fill_submatch_list() was called */
7564 clear_submatch_list(&matchList);
7565
Bram Moolenaard155d7a2018-12-21 16:04:21 +01007566 eval_result = tv_get_string_buf_chk(&rettv, buf);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007567 if (eval_result != NULL)
7568 eval_result = vim_strsave(eval_result);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007569 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007570 }
7571 else
7572 eval_result = eval_to_string(source + 2, NULL, TRUE);
7573
Bram Moolenaar071d4272004-06-13 20:20:40 +00007574 if (eval_result != NULL)
7575 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007576 int had_backslash = FALSE;
7577
Bram Moolenaar91acfff2017-03-12 19:22:36 +01007578 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007579 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007580 /* Change NL to CR, so that it becomes a line break,
7581 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007582 * Skip over a backslashed character. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007583 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007584 *s = CAR;
7585 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007586 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007587 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007588 /* Change NL to CR here too, so that this works:
7589 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7590 * abc\
7591 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007592 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007593 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007594 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007595 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007596 had_backslash = TRUE;
7597 }
7598 }
7599 if (had_backslash && backslash)
7600 {
7601 /* Backslashes will be consumed, need to double them. */
7602 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7603 if (s != NULL)
7604 {
7605 vim_free(eval_result);
7606 eval_result = s;
7607 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007608 }
7609
7610 dst += STRLEN(eval_result);
7611 }
7612
Bram Moolenaar6100d022016-10-02 16:51:57 +02007613 can_f_submatch = prev_can_f_submatch;
7614 if (can_f_submatch)
7615 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007616 }
7617#endif
7618 }
7619 else
7620 while ((c = *src++) != NUL)
7621 {
7622 if (c == '&' && magic)
7623 no = 0;
7624 else if (c == '\\' && *src != NUL)
7625 {
7626 if (*src == '&' && !magic)
7627 {
7628 ++src;
7629 no = 0;
7630 }
7631 else if ('0' <= *src && *src <= '9')
7632 {
7633 no = *src++ - '0';
7634 }
7635 else if (vim_strchr((char_u *)"uUlLeE", *src))
7636 {
7637 switch (*src++)
7638 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007639 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007640 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007641 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007642 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007643 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007644 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007645 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007646 continue;
7647 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007648 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007649 continue;
7650 }
7651 }
7652 }
7653 if (no < 0) /* Ordinary character. */
7654 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007655 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7656 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007657 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007658 if (copy)
7659 {
7660 *dst++ = c;
7661 *dst++ = *src++;
7662 *dst++ = *src++;
7663 }
7664 else
7665 {
7666 dst += 3;
7667 src += 2;
7668 }
7669 continue;
7670 }
7671
Bram Moolenaar071d4272004-06-13 20:20:40 +00007672 if (c == '\\' && *src != NUL)
7673 {
7674 /* Check for abbreviations -- webb */
7675 switch (*src)
7676 {
7677 case 'r': c = CAR; ++src; break;
7678 case 'n': c = NL; ++src; break;
7679 case 't': c = TAB; ++src; break;
7680 /* Oh no! \e already has meaning in subst pat :-( */
7681 /* case 'e': c = ESC; ++src; break; */
7682 case 'b': c = Ctrl_H; ++src; break;
7683
7684 /* If "backslash" is TRUE the backslash will be removed
7685 * later. Used to insert a literal CR. */
7686 default: if (backslash)
7687 {
7688 if (copy)
7689 *dst = '\\';
7690 ++dst;
7691 }
7692 c = *src++;
7693 }
7694 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007695#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007696 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007697 c = mb_ptr2char(src - 1);
7698#endif
7699
Bram Moolenaardb552d602006-03-23 22:59:57 +00007700 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007701 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007702 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007703 func_one = (fptr_T)(func_one(&cc, c));
7704 else if (func_all != (fptr_T)NULL)
7705 /* Turbo C complains without the typecast */
7706 func_all = (fptr_T)(func_all(&cc, c));
7707 else /* just copy */
7708 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007709
7710#ifdef FEAT_MBYTE
7711 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007712 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007713 int totlen = mb_ptr2len(src - 1);
7714
Bram Moolenaar071d4272004-06-13 20:20:40 +00007715 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007716 mb_char2bytes(cc, dst);
7717 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007718 if (enc_utf8)
7719 {
7720 int clen = utf_ptr2len(src - 1);
7721
7722 /* If the character length is shorter than "totlen", there
7723 * are composing characters; copy them as-is. */
7724 if (clen < totlen)
7725 {
7726 if (copy)
7727 mch_memmove(dst + 1, src - 1 + clen,
7728 (size_t)(totlen - clen));
7729 dst += totlen - clen;
7730 }
7731 }
7732 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007733 }
7734 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007735#endif
7736 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007737 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007738 dst++;
7739 }
7740 else
7741 {
7742 if (REG_MULTI)
7743 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007744 clnum = rex.reg_mmatch->startpos[no].lnum;
7745 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007746 s = NULL;
7747 else
7748 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007749 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
7750 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7751 len = rex.reg_mmatch->endpos[no].col
7752 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007753 else
7754 len = (int)STRLEN(s);
7755 }
7756 }
7757 else
7758 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007759 s = rex.reg_match->startp[no];
7760 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007761 s = NULL;
7762 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007763 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007764 }
7765 if (s != NULL)
7766 {
7767 for (;;)
7768 {
7769 if (len == 0)
7770 {
7771 if (REG_MULTI)
7772 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007773 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007774 break;
7775 if (copy)
7776 *dst = CAR;
7777 ++dst;
7778 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02007779 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7780 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007781 else
7782 len = (int)STRLEN(s);
7783 }
7784 else
7785 break;
7786 }
7787 else if (*s == NUL) /* we hit NUL. */
7788 {
7789 if (copy)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007790 emsg(_(e_re_damg));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007791 goto exit;
7792 }
7793 else
7794 {
7795 if (backslash && (*s == CAR || *s == '\\'))
7796 {
7797 /*
7798 * Insert a backslash in front of a CR, otherwise
7799 * it will be replaced by a line break.
7800 * Number of backslashes will be halved later,
7801 * double them here.
7802 */
7803 if (copy)
7804 {
7805 dst[0] = '\\';
7806 dst[1] = *s;
7807 }
7808 dst += 2;
7809 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007810 else
7811 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007812#ifdef FEAT_MBYTE
7813 if (has_mbyte)
7814 c = mb_ptr2char(s);
7815 else
7816#endif
7817 c = *s;
7818
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007819 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007820 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007821 func_one = (fptr_T)(func_one(&cc, c));
7822 else if (func_all != (fptr_T)NULL)
7823 /* Turbo C complains without the typecast */
7824 func_all = (fptr_T)(func_all(&cc, c));
7825 else /* just copy */
7826 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007827
7828#ifdef FEAT_MBYTE
7829 if (has_mbyte)
7830 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007831 int l;
7832
7833 /* Copy composing characters separately, one
7834 * at a time. */
7835 if (enc_utf8)
7836 l = utf_ptr2len(s) - 1;
7837 else
7838 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007839
7840 s += l;
7841 len -= l;
7842 if (copy)
7843 mb_char2bytes(cc, dst);
7844 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007845 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007846 else
7847#endif
7848 if (copy)
7849 *dst = cc;
7850 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007851 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007852
Bram Moolenaar071d4272004-06-13 20:20:40 +00007853 ++s;
7854 --len;
7855 }
7856 }
7857 }
7858 no = -1;
7859 }
7860 }
7861 if (copy)
7862 *dst = NUL;
7863
7864exit:
7865 return (int)((dst - dest) + 1);
7866}
7867
7868#ifdef FEAT_EVAL
7869/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007870 * Call reg_getline() with the line numbers from the submatch. If a
7871 * substitute() was used the reg_maxline and other values have been
7872 * overwritten.
7873 */
7874 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007875reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007876{
7877 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007878 linenr_T save_first = rex.reg_firstlnum;
7879 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007880
Bram Moolenaar6100d022016-10-02 16:51:57 +02007881 rex.reg_firstlnum = rsm.sm_firstlnum;
7882 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007883
7884 s = reg_getline(lnum);
7885
Bram Moolenaar6100d022016-10-02 16:51:57 +02007886 rex.reg_firstlnum = save_first;
7887 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007888 return s;
7889}
7890
7891/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007892 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007893 * allocated memory.
7894 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7895 */
7896 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007897reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007898{
7899 char_u *retval = NULL;
7900 char_u *s;
7901 int len;
7902 int round;
7903 linenr_T lnum;
7904
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007905 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007906 return NULL;
7907
Bram Moolenaar6100d022016-10-02 16:51:57 +02007908 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007909 {
7910 /*
7911 * First round: compute the length and allocate memory.
7912 * Second round: copy the text.
7913 */
7914 for (round = 1; round <= 2; ++round)
7915 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007916 lnum = rsm.sm_mmatch->startpos[no].lnum;
7917 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007918 return NULL;
7919
Bram Moolenaar6100d022016-10-02 16:51:57 +02007920 s = reg_getline_submatch(lnum) + rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007921 if (s == NULL) /* anti-crash check, cannot happen? */
7922 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007923 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007924 {
7925 /* Within one line: take form start to end col. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007926 len = rsm.sm_mmatch->endpos[no].col
7927 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007928 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007929 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007930 ++len;
7931 }
7932 else
7933 {
7934 /* Multiple lines: take start line from start col, middle
7935 * lines completely and end line up to end col. */
7936 len = (int)STRLEN(s);
7937 if (round == 2)
7938 {
7939 STRCPY(retval, s);
7940 retval[len] = '\n';
7941 }
7942 ++len;
7943 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007944 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007945 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007946 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007947 if (round == 2)
7948 STRCPY(retval + len, s);
7949 len += (int)STRLEN(s);
7950 if (round == 2)
7951 retval[len] = '\n';
7952 ++len;
7953 }
7954 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007955 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02007956 rsm.sm_mmatch->endpos[no].col);
7957 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007958 if (round == 2)
7959 retval[len] = NUL;
7960 ++len;
7961 }
7962
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007963 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007964 {
7965 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007966 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007967 return NULL;
7968 }
7969 }
7970 }
7971 else
7972 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007973 s = rsm.sm_match->startp[no];
7974 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007975 retval = NULL;
7976 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007977 retval = vim_strnsave(s, (int)(rsm.sm_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007978 }
7979
7980 return retval;
7981}
Bram Moolenaar41571762014-04-02 19:00:58 +02007982
7983/*
7984 * Used for the submatch() function with the optional non-zero argument: get
7985 * the list of strings from the n'th submatch in allocated memory with NULs
7986 * represented in NLs.
7987 * Returns a list of allocated strings. Returns NULL when not in a ":s"
7988 * command, for a non-existing submatch and for any error.
7989 */
7990 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007991reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02007992{
7993 char_u *s;
7994 linenr_T slnum;
7995 linenr_T elnum;
7996 colnr_T scol;
7997 colnr_T ecol;
7998 int i;
7999 list_T *list;
8000 int error = FALSE;
8001
8002 if (!can_f_submatch || no < 0)
8003 return NULL;
8004
Bram Moolenaar6100d022016-10-02 16:51:57 +02008005 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008006 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02008007 slnum = rsm.sm_mmatch->startpos[no].lnum;
8008 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02008009 if (slnum < 0 || elnum < 0)
8010 return NULL;
8011
Bram Moolenaar6100d022016-10-02 16:51:57 +02008012 scol = rsm.sm_mmatch->startpos[no].col;
8013 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02008014
8015 list = list_alloc();
8016 if (list == NULL)
8017 return NULL;
8018
8019 s = reg_getline_submatch(slnum) + scol;
8020 if (slnum == elnum)
8021 {
8022 if (list_append_string(list, s, ecol - scol) == FAIL)
8023 error = TRUE;
8024 }
8025 else
8026 {
8027 if (list_append_string(list, s, -1) == FAIL)
8028 error = TRUE;
8029 for (i = 1; i < elnum - slnum; i++)
8030 {
8031 s = reg_getline_submatch(slnum + i);
8032 if (list_append_string(list, s, -1) == FAIL)
8033 error = TRUE;
8034 }
8035 s = reg_getline_submatch(elnum);
8036 if (list_append_string(list, s, ecol) == FAIL)
8037 error = TRUE;
8038 }
8039 }
8040 else
8041 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02008042 s = rsm.sm_match->startp[no];
8043 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008044 return NULL;
8045 list = list_alloc();
8046 if (list == NULL)
8047 return NULL;
8048 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02008049 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008050 error = TRUE;
8051 }
8052
8053 if (error)
8054 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02008055 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02008056 return NULL;
8057 }
8058 return list;
8059}
Bram Moolenaar071d4272004-06-13 20:20:40 +00008060#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008061
8062static regengine_T bt_regengine =
8063{
8064 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008065 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008066 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008067 bt_regexec_multi,
8068 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008069};
8070
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008071#include "regexp_nfa.c"
8072
8073static regengine_T nfa_regengine =
8074{
8075 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008076 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008077 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008078 nfa_regexec_multi,
8079 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008080};
8081
8082/* Which regexp engine to use? Needed for vim_regcomp().
8083 * Must match with 'regexpengine'. */
8084static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008085
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008086#ifdef DEBUG
8087static char_u regname[][30] = {
8088 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02008089 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008090 "NFA Regexp Engine"
8091 };
8092#endif
8093
8094/*
8095 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02008096 * Returns the program in allocated memory.
8097 * Use vim_regfree() to free the memory.
8098 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008099 */
8100 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01008101vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008102{
8103 regprog_T *prog = NULL;
8104 char_u *expr = expr_arg;
8105
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008106 regexp_engine = p_re;
8107
8108 /* Check for prefix "\%#=", that sets the regexp engine */
8109 if (STRNCMP(expr, "\\%#=", 4) == 0)
8110 {
8111 int newengine = expr[4] - '0';
8112
8113 if (newengine == AUTOMATIC_ENGINE
8114 || newengine == BACKTRACKING_ENGINE
8115 || newengine == NFA_ENGINE)
8116 {
8117 regexp_engine = expr[4] - '0';
8118 expr += 5;
8119#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008120 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02008121 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008122#endif
8123 }
8124 else
8125 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008126 emsg(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008127 regexp_engine = AUTOMATIC_ENGINE;
8128 }
8129 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02008130#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008131 bt_regengine.expr = expr;
8132 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008133#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008134
8135 /*
8136 * First try the NFA engine, unless backtracking was requested.
8137 */
8138 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01008139 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008140 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008141 else
8142 prog = bt_regengine.regcomp(expr, re_flags);
8143
Bram Moolenaarfda37292014-11-05 14:27:36 +01008144 /* Check for error compiling regexp with initial engine. */
8145 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008146 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008147#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008148 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8149 {
8150 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008151 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008152 if (f)
8153 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008154 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008155 fclose(f);
8156 }
8157 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008158 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01008159 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008160 }
8161#endif
8162 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01008163 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008164 * The NFA engine also fails for patterns that it can't handle well
8165 * but are still valid patterns, thus a retry should work.
8166 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008167 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008168 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008169 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008170 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008171 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008172 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008173
Bram Moolenaarfda37292014-11-05 14:27:36 +01008174 if (prog != NULL)
8175 {
8176 /* Store the info needed to call regcomp() again when the engine turns
8177 * out to be very slow when executing it. */
8178 prog->re_engine = regexp_engine;
8179 prog->re_flags = re_flags;
8180 }
8181
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008182 return prog;
8183}
8184
8185/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008186 * Free a compiled regexp program, returned by vim_regcomp().
8187 */
8188 void
Bram Moolenaar05540972016-01-30 20:31:25 +01008189vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02008190{
8191 if (prog != NULL)
8192 prog->engine->regfree(prog);
8193}
8194
Bram Moolenaarfda37292014-11-05 14:27:36 +01008195#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01008196 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01008197report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008198{
8199 if (p_verbose > 0)
8200 {
8201 verbose_enter();
8202 MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
8203 MSG_PUTS(pat);
8204 verbose_leave();
8205 }
8206}
8207#endif
8208
Bram Moolenaar473de612013-06-08 18:19:48 +02008209/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01008210 * Return whether "prog" is currently being executed.
8211 */
8212 int
8213regprog_in_use(regprog_T *prog)
8214{
8215 return prog->re_in_use;
8216}
8217
8218/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008219 * Match a regexp against a string.
8220 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008221 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008222 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01008223 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008224 *
8225 * Return TRUE if there is a match, FALSE if not.
8226 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01008227 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008228vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01008229 regmatch_T *rmp,
8230 char_u *line, /* string to match against */
8231 colnr_T col, /* column to start looking for match */
8232 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008233{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008234 int result;
8235 regexec_T rex_save;
8236 int rex_in_use_save = rex_in_use;
8237
Bram Moolenaar0270f382018-07-17 05:43:58 +02008238 // Cannot use the same prog recursively, it contains state.
8239 if (rmp->regprog->re_in_use)
8240 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008241 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02008242 return FALSE;
8243 }
8244 rmp->regprog->re_in_use = TRUE;
8245
Bram Moolenaar6100d022016-10-02 16:51:57 +02008246 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02008247 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02008248 rex_save = rex;
8249 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008250
Bram Moolenaar6100d022016-10-02 16:51:57 +02008251 rex.reg_startp = NULL;
8252 rex.reg_endp = NULL;
8253 rex.reg_startpos = NULL;
8254 rex.reg_endpos = NULL;
8255
8256 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02008257 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008258
8259 /* NFA engine aborted because it's very slow. */
8260 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8261 && result == NFA_TOO_EXPENSIVE)
8262 {
8263 int save_p_re = p_re;
8264 int re_flags = rmp->regprog->re_flags;
8265 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8266
8267 p_re = BACKTRACKING_ENGINE;
8268 vim_regfree(rmp->regprog);
8269 if (pat != NULL)
8270 {
8271#ifdef FEAT_EVAL
8272 report_re_switch(pat);
8273#endif
8274 rmp->regprog = vim_regcomp(pat, re_flags);
8275 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02008276 {
8277 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008278 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02008279 rmp->regprog->re_in_use = FALSE;
8280 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01008281 vim_free(pat);
8282 }
8283
8284 p_re = save_p_re;
8285 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02008286
8287 rex_in_use = rex_in_use_save;
8288 if (rex_in_use)
8289 rex = rex_save;
8290
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008291 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008292}
8293
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008294/*
8295 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008296 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008297 */
8298 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008299vim_regexec_prog(
8300 regprog_T **prog,
8301 int ignore_case,
8302 char_u *line,
8303 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008304{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008305 int r;
8306 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008307
8308 regmatch.regprog = *prog;
8309 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008310 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008311 *prog = regmatch.regprog;
8312 return r;
8313}
8314
8315/*
8316 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008317 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008318 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008319 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008320vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008321{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008322 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008323}
8324
8325#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8326 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8327/*
8328 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008329 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008330 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008331 */
8332 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008333vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008334{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008335 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008336}
8337#endif
8338
8339/*
8340 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008341 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
8342 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008343 * Uses curbuf for line count and 'iskeyword'.
8344 *
8345 * Return zero if there is no match. Return number of lines contained in the
8346 * match otherwise.
8347 */
8348 long
Bram Moolenaar05540972016-01-30 20:31:25 +01008349vim_regexec_multi(
8350 regmmatch_T *rmp,
Bram Moolenaard23a8232018-02-10 18:45:26 +01008351 win_T *win, /* window in which to search or NULL */
8352 buf_T *buf, /* buffer in which to search */
8353 linenr_T lnum, /* nr of line to start looking for match */
8354 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008355 proftime_T *tm, /* timeout limit or NULL */
8356 int *timed_out) /* flag is set when timeout limit reached */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008357{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008358 int result;
8359 regexec_T rex_save;
8360 int rex_in_use_save = rex_in_use;
8361
Bram Moolenaar0270f382018-07-17 05:43:58 +02008362 // Cannot use the same prog recursively, it contains state.
8363 if (rmp->regprog->re_in_use)
8364 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008365 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02008366 return FALSE;
8367 }
8368 rmp->regprog->re_in_use = TRUE;
8369
Bram Moolenaar6100d022016-10-02 16:51:57 +02008370 if (rex_in_use)
8371 /* Being called recursively, save the state. */
8372 rex_save = rex;
8373 rex_in_use = TRUE;
8374
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008375 result = rmp->regprog->engine->regexec_multi(
8376 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02008377 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008378
8379 /* NFA engine aborted because it's very slow. */
8380 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8381 && result == NFA_TOO_EXPENSIVE)
8382 {
8383 int save_p_re = p_re;
8384 int re_flags = rmp->regprog->re_flags;
8385 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8386
8387 p_re = BACKTRACKING_ENGINE;
8388 vim_regfree(rmp->regprog);
8389 if (pat != NULL)
8390 {
8391#ifdef FEAT_EVAL
8392 report_re_switch(pat);
8393#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008394#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008395 // checking for \z misuse was already done when compiling for NFA,
8396 // allow all here
8397 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008398#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01008399 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008400#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008401 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008402#endif
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008403
Bram Moolenaarfda37292014-11-05 14:27:36 +01008404 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02008405 {
8406 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008407 result = rmp->regprog->engine->regexec_multi(
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008408 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02008409 rmp->regprog->re_in_use = FALSE;
8410 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01008411 vim_free(pat);
8412 }
8413 p_re = save_p_re;
8414 }
8415
Bram Moolenaar6100d022016-10-02 16:51:57 +02008416 rex_in_use = rex_in_use_save;
8417 if (rex_in_use)
8418 rex = rex_save;
8419
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008420 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008421}