blob: 20894ea72d5d1fc1aab4320d43fb48c6f5cb8be4 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100259no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260{
261 if (is_Magic(x))
262 return un_Magic(x);
263 return x;
264}
265
266 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100267toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268{
269 if (is_Magic(x))
270 return un_Magic(x);
271 return Magic(x);
272}
273
274/*
275 * The first byte of the regexp internal "program" is actually this magic
276 * number; the start node begins in the second byte. It's used to catch the
277 * most severe mutilation of the program by the caller.
278 */
279
280#define REGMAGIC 0234
281
282/*
283 * Opcode notes:
284 *
285 * BRANCH The set of branches constituting a single choice are hooked
286 * together with their "next" pointers, since precedence prevents
287 * anything being concatenated to any individual branch. The
288 * "next" pointer of the last BRANCH in a choice points to the
289 * thing following the whole choice. This is also where the
290 * final "next" pointer of each individual branch points; each
291 * branch starts with the operand node of a BRANCH node.
292 *
293 * BACK Normal "next" pointers all implicitly point forward; BACK
294 * exists to make loop structures possible.
295 *
296 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
297 * BRANCH structures using BACK. Simple cases (one character
298 * per match) are implemented with STAR and PLUS for speed
299 * and to minimize recursive plunges.
300 *
301 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
302 * node, and defines the min and max limits to be used for that
303 * node.
304 *
305 * MOPEN,MCLOSE ...are numbered at compile time.
306 * ZOPEN,ZCLOSE ...ditto
307 */
308
309/*
310 * A node is one char of opcode followed by two chars of "next" pointer.
311 * "Next" pointers are stored as two 8-bit bytes, high order first. The
312 * value is a positive offset from the opcode of the node containing it.
313 * An operand, if any, simply follows the node. (Note that much of the
314 * code generation knows about this implicit relationship.)
315 *
316 * Using two bytes for the "next" pointer is vast overkill for most things,
317 * but allows patterns to get big without disasters.
318 */
319#define OP(p) ((int)*(p))
320#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
321#define OPERAND(p) ((p) + 3)
322/* Obtain an operand that was stored as four bytes, MSB first. */
323#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
324 + ((long)(p)[5] << 8) + (long)(p)[6])
325/* Obtain a second operand stored as four bytes. */
326#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
327/* Obtain a second single-byte operand stored after a four bytes operand. */
328#define OPERAND_CMP(p) (p)[7]
329
330/*
331 * Utility definitions.
332 */
333#define UCHARAT(p) ((int)*(char_u *)(p))
334
335/* Used for an error (down from) vim_regcomp(): give the error message, set
336 * rc_did_emsg and return NULL */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100337#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
338#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
339#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
340#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar1be45b22019-01-14 22:46:15 +0100341#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100342#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200343#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344
Bram Moolenaar95f09602016-11-10 20:01:45 +0100345
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346#define MAX_LIMIT (32767L << 16L)
347
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100348static int cstrncmp(char_u *s1, char_u *s2, int *n);
349static char_u *cstrchr(char_u *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200351#ifdef BT_REGEXP_DUMP
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100352static void regdump(char_u *, bt_regprog_T *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200353#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354#ifdef DEBUG
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100355static char_u *regprop(char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356#endif
357
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static int re_mult_next(char *what);
Bram Moolenaarfb031402014-09-09 17:18:49 +0200359
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200360static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200361static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
362static char_u e_large_class[] = N_("E945: Range too large in character class");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200363static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
364static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
365static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200366#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200367static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
Bram Moolenaarbcf94422018-06-23 14:21:42 +0200368static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200369#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200370static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200371static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar0270f382018-07-17 05:43:58 +0200372static char_u e_recursive[] = N_("E956: Cannot use pattern recursively");
373
Bram Moolenaar071d4272004-06-13 20:20:40 +0000374#define NOT_MULTI 0
375#define MULTI_ONE 1
376#define MULTI_MULT 2
377/*
378 * Return NOT_MULTI if c is not a "multi" operator.
379 * Return MULTI_ONE if c is a single "multi" operator.
380 * Return MULTI_MULT if c is a multi "multi" operator.
381 */
382 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100383re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000384{
385 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
386 return MULTI_ONE;
387 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
388 return MULTI_MULT;
389 return NOT_MULTI;
390}
391
392/*
393 * Flags to be passed up and down.
394 */
395#define HASWIDTH 0x1 /* Known never to match null string. */
396#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
397#define SPSTART 0x4 /* Starts with * or +. */
398#define HASNL 0x8 /* Contains some \n. */
399#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
400#define WORST 0 /* Worst case. */
401
402/*
403 * When regcode is set to this value, code is not emitted and size is computed
404 * instead.
405 */
406#define JUST_CALC_SIZE ((char_u *) -1)
407
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000408static char_u *reg_prev_sub = NULL;
409
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410/*
411 * REGEXP_INRANGE contains all characters which are always special in a []
412 * range after '\'.
413 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
414 * These are:
415 * \n - New line (NL).
416 * \r - Carriage Return (CR).
417 * \t - Tab (TAB).
418 * \e - Escape (ESC).
419 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000420 * \d - Character code in decimal, eg \d123
421 * \o - Character code in octal, eg \o80
422 * \x - Character code in hex, eg \x4a
423 * \u - Multibyte character code, eg \u20ac
424 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000425 */
426static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000427static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428
Bram Moolenaar071d4272004-06-13 20:20:40 +0000429/*
430 * Translate '\x' to its control character, except "\n", which is Magic.
431 */
432 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100433backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434{
435 switch (c)
436 {
437 case 'r': return CAR;
438 case 't': return TAB;
439 case 'e': return ESC;
440 case 'b': return BS;
441 }
442 return c;
443}
444
445/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000446 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000447 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
448 * recognized. Otherwise "pp" is advanced to after the item.
449 */
450 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100451get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000452{
453 static const char *(class_names[]) =
454 {
455 "alnum:]",
456#define CLASS_ALNUM 0
457 "alpha:]",
458#define CLASS_ALPHA 1
459 "blank:]",
460#define CLASS_BLANK 2
461 "cntrl:]",
462#define CLASS_CNTRL 3
463 "digit:]",
464#define CLASS_DIGIT 4
465 "graph:]",
466#define CLASS_GRAPH 5
467 "lower:]",
468#define CLASS_LOWER 6
469 "print:]",
470#define CLASS_PRINT 7
471 "punct:]",
472#define CLASS_PUNCT 8
473 "space:]",
474#define CLASS_SPACE 9
475 "upper:]",
476#define CLASS_UPPER 10
477 "xdigit:]",
478#define CLASS_XDIGIT 11
479 "tab:]",
480#define CLASS_TAB 12
481 "return:]",
482#define CLASS_RETURN 13
483 "backspace:]",
484#define CLASS_BACKSPACE 14
485 "escape:]",
486#define CLASS_ESCAPE 15
487 };
488#define CLASS_NONE 99
489 int i;
490
491 if ((*pp)[1] == ':')
492 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000493 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
495 {
496 *pp += STRLEN(class_names[i]) + 2;
497 return i;
498 }
499 }
500 return CLASS_NONE;
501}
502
503/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 * Specific version of character class functions.
505 * Using a table to keep this fast.
506 */
507static short class_tab[256];
508
509#define RI_DIGIT 0x01
510#define RI_HEX 0x02
511#define RI_OCTAL 0x04
512#define RI_WORD 0x08
513#define RI_HEAD 0x10
514#define RI_ALPHA 0x20
515#define RI_LOWER 0x40
516#define RI_UPPER 0x80
517#define RI_WHITE 0x100
518
519 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100520init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000521{
522 int i;
523 static int done = FALSE;
524
525 if (done)
526 return;
527
528 for (i = 0; i < 256; ++i)
529 {
530 if (i >= '0' && i <= '7')
531 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
532 else if (i >= '8' && i <= '9')
533 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
534 else if (i >= 'a' && i <= 'f')
535 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
536#ifdef EBCDIC
537 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
538 || (i >= 's' && i <= 'z'))
539#else
540 else if (i >= 'g' && i <= 'z')
541#endif
542 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
543 else if (i >= 'A' && i <= 'F')
544 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
545#ifdef EBCDIC
546 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
547 || (i >= 'S' && i <= 'Z'))
548#else
549 else if (i >= 'G' && i <= 'Z')
550#endif
551 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
552 else if (i == '_')
553 class_tab[i] = RI_WORD + RI_HEAD;
554 else
555 class_tab[i] = 0;
556 }
557 class_tab[' '] |= RI_WHITE;
558 class_tab['\t'] |= RI_WHITE;
559 done = TRUE;
560}
561
Bram Moolenaara12a1612019-01-24 16:39:02 +0100562#define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
563#define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
564#define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
565#define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
566#define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
567#define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
568#define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
569#define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
570#define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000571
572/* flags for regflags */
573#define RF_ICASE 1 /* ignore case */
574#define RF_NOICASE 2 /* don't ignore case */
575#define RF_HASNL 4 /* can match a NL */
576#define RF_ICOMBINE 8 /* ignore combining characters */
577#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
578
579/*
580 * Global work variables for vim_regcomp().
581 */
582
583static char_u *regparse; /* Input-scan pointer. */
584static int prevchr_len; /* byte length of previous char */
585static int num_complex_braces; /* Complex \{...} count */
586static int regnpar; /* () count. */
587#ifdef FEAT_SYN_HL
588static int regnzpar; /* \z() count. */
589static int re_has_z; /* \z item detected */
590#endif
591static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
592static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000593static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000594static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
595static unsigned regflags; /* RF_ flags for prog */
596static long brace_min[10]; /* Minimums for complex brace repeats */
597static long brace_max[10]; /* Maximums for complex brace repeats */
598static int brace_count[10]; /* Current counts for complex brace repeats */
599#if defined(FEAT_SYN_HL) || defined(PROTO)
600static int had_eol; /* TRUE when EOL found by vim_regcomp() */
601#endif
602static int one_exactly = FALSE; /* only do one char for EXACTLY */
603
604static int reg_magic; /* magicness of the pattern: */
605#define MAGIC_NONE 1 /* "\V" very unmagic */
606#define MAGIC_OFF 2 /* "\M" or 'magic' off */
607#define MAGIC_ON 3 /* "\m" or 'magic' */
608#define MAGIC_ALL 4 /* "\v" very magic */
609
610static int reg_string; /* matching with a string instead of a buffer
611 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000612static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000613
614/*
615 * META contains all characters that may be magic, except '^' and '$'.
616 */
617
618#ifdef EBCDIC
619static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
620#else
621/* META[] is used often enough to justify turning it into a table. */
622static char_u META_flags[] = {
623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
625/* % & ( ) * + . */
626 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
627/* 1 2 3 4 5 6 7 8 9 < = > ? */
628 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
629/* @ A C D F H I K L M O */
630 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
631/* P S U V W X Z [ _ */
632 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
633/* a c d f h i k l m n o */
634 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
635/* p s u v w x z { | ~ */
636 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
637};
638#endif
639
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200640static int curchr; /* currently parsed character */
641/* Previous character. Note: prevchr is sometimes -1 when we are not at the
642 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
643 * because ^ was taken to be magic -- webb */
644static int prevchr;
645static int prevprevchr; /* previous-previous character */
646static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000647
648/* arguments for reg() */
649#define REG_NOPAREN 0 /* toplevel reg() */
650#define REG_PAREN 1 /* \(\) */
651#define REG_ZPAREN 2 /* \z(\) */
652#define REG_NPAREN 3 /* \%(\) */
653
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200654typedef struct
655{
656 char_u *regparse;
657 int prevchr_len;
658 int curchr;
659 int prevchr;
660 int prevprevchr;
661 int nextchr;
662 int at_start;
663 int prev_at_start;
664 int regnpar;
665} parse_state_T;
666
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667/*
668 * Forward declarations for vim_regcomp()'s friends.
669 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100670static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100671static int getchr(void);
672static void skipchr_keepstart(void);
673static int peekchr(void);
674static void skipchr(void);
675static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100676static long gethexchrs(int maxinputlen);
677static long getoctchrs(void);
678static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100679static int coll_get_char(void);
680static void regcomp_start(char_u *expr, int flags);
681static char_u *reg(int, int *);
682static char_u *regbranch(int *flagp);
683static char_u *regconcat(int *flagp);
684static char_u *regpiece(int *);
685static char_u *regatom(int *);
686static char_u *regnode(int);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100687static int use_multibytecode(int c);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100688static int prog_magic_wrong(void);
689static char_u *regnext(char_u *);
690static void regc(int b);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100691static void regmbc(int c);
Bram Moolenaara12a1612019-01-24 16:39:02 +0100692#define REGMBC(x) regmbc(x);
693#define CASEMBC(x) case x:
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100694static void reginsert(int, char_u *);
695static void reginsert_nr(int op, long val, char_u *opnd);
696static void reginsert_limits(int, long, long, char_u *);
697static char_u *re_put_long(char_u *pr, long_u val);
698static int read_limits(long *, long *);
699static void regtail(char_u *, char_u *);
700static void regoptail(char_u *, char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000701
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200702static regengine_T bt_regengine;
703static regengine_T nfa_regengine;
704
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705/*
706 * Return TRUE if compiled regular expression "prog" can match a line break.
707 */
708 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100709re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000710{
711 return (prog->regflags & RF_HASNL);
712}
713
714/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000715 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
716 * Returns a character representing the class. Zero means that no item was
717 * recognized. Otherwise "pp" is advanced to after the item.
718 */
719 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100720get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000721{
722 int c;
723 int l = 1;
724 char_u *p = *pp;
725
726 if (p[1] == '=')
727 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000728 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000729 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000730 if (p[l + 2] == '=' && p[l + 3] == ']')
731 {
Bram Moolenaardf177f62005-02-22 08:39:57 +0000732 if (has_mbyte)
733 c = mb_ptr2char(p + 2);
734 else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000735 c = p[2];
736 *pp += l + 4;
737 return c;
738 }
739 }
740 return 0;
741}
742
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200743#ifdef EBCDIC
744/*
745 * Table for equivalence class "c". (IBM-1047)
746 */
747char *EQUIVAL_CLASS_C[16] = {
748 "A\x62\x63\x64\x65\x66\x67",
749 "C\x68",
750 "E\x71\x72\x73\x74",
751 "I\x75\x76\x77\x78",
752 "N\x69",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200753 "O\xEB\xEC\xED\xEE\xEF\x80",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200754 "U\xFB\xFC\xFD\xFE",
755 "Y\xBA",
756 "a\x42\x43\x44\x45\x46\x47",
757 "c\x48",
758 "e\x51\x52\x53\x54",
759 "i\x55\x56\x57\x58",
760 "n\x49",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200761 "o\xCB\xCC\xCD\xCE\xCF\x70",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200762 "u\xDB\xDC\xDD\xDE",
763 "y\x8D\xDF",
764};
765#endif
766
Bram Moolenaardf177f62005-02-22 08:39:57 +0000767/*
768 * Produce the bytes for equivalence class "c".
769 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200770 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000771 */
772 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100773reg_equi_class(int c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000774{
Bram Moolenaardf177f62005-02-22 08:39:57 +0000775 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000776 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000777 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200778#ifdef EBCDIC
779 int i;
780
781 /* This might be slower than switch/case below. */
782 for (i = 0; i < 16; i++)
783 {
784 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
785 {
786 char *p = EQUIVAL_CLASS_C[i];
787
788 while (*p != 0)
789 regmbc(*p++);
790 return;
791 }
792 }
793#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000794 switch (c)
795 {
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200796 /* Do not use '\300' style, it results in a negative number. */
797 case 'A': case 0xc0: case 0xc1: case 0xc2:
798 case 0xc3: case 0xc4: case 0xc5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200799 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
800 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200801 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
802 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
803 regmbc(0xc5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200804 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
805 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
806 REGMBC(0x1ea2)
807 return;
808 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
809 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000810 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200811 case 'C': case 0xc7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200812 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200813 regmbc('C'); regmbc(0xc7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200814 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
815 REGMBC(0x10c)
816 return;
817 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
818 CASEMBC(0x1e0e) CASEMBC(0x1e10)
819 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
820 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000821 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200822 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200823 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
824 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200825 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
826 regmbc(0xca); regmbc(0xcb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200827 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
828 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
829 REGMBC(0x1ebc)
830 return;
831 case 'F': CASEMBC(0x1e1e)
832 regmbc('F'); REGMBC(0x1e1e)
833 return;
834 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
835 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
836 CASEMBC(0x1e20)
837 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
838 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
839 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
840 return;
841 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
842 CASEMBC(0x1e26) CASEMBC(0x1e28)
843 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
844 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000845 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200846 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200847 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
848 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200849 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
850 regmbc(0xce); regmbc(0xcf);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200851 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
852 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
853 REGMBC(0x1ec8)
854 return;
855 case 'J': CASEMBC(0x134)
856 regmbc('J'); REGMBC(0x134)
857 return;
858 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
859 CASEMBC(0x1e34)
860 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
861 REGMBC(0x1e30) REGMBC(0x1e34)
862 return;
863 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
864 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
865 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
866 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
867 REGMBC(0x1e3a)
868 return;
869 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
870 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000871 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200872 case 'N': case 0xd1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200873 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
874 CASEMBC(0x1e48)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200875 regmbc('N'); regmbc(0xd1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200876 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
877 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000878 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200879 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
880 case 0xd6: case 0xd8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200881 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
882 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200883 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
884 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
885 regmbc(0xd8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200886 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
887 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
888 REGMBC(0x1ec) REGMBC(0x1ece)
889 return;
890 case 'P': case 0x1e54: case 0x1e56:
891 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
892 return;
893 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
894 CASEMBC(0x1e58) CASEMBC(0x1e5e)
895 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
896 REGMBC(0x1e58) REGMBC(0x1e5e)
897 return;
898 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
899 CASEMBC(0x160) CASEMBC(0x1e60)
900 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
901 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
902 return;
903 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
904 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
905 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
906 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000907 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200908 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200909 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
910 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
911 CASEMBC(0x1ee6)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200912 regmbc('U'); regmbc(0xd9); regmbc(0xda);
913 regmbc(0xdb); regmbc(0xdc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200914 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
915 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
916 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
917 return;
918 case 'V': CASEMBC(0x1e7c)
919 regmbc('V'); REGMBC(0x1e7c)
920 return;
921 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
922 CASEMBC(0x1e84) CASEMBC(0x1e86)
923 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
924 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
925 return;
926 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
927 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000928 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200929 case 'Y': case 0xdd:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200930 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
931 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200932 regmbc('Y'); regmbc(0xdd);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200933 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
934 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
935 return;
936 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
937 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
938 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
939 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
940 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000941 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200942 case 'a': case 0xe0: case 0xe1: case 0xe2:
943 case 0xe3: case 0xe4: case 0xe5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200944 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
945 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200946 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
947 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
948 regmbc(0xe5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200949 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
950 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
951 REGMBC(0x1ea3)
952 return;
953 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
954 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000955 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200956 case 'c': case 0xe7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200957 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200958 regmbc('c'); regmbc(0xe7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200959 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
960 REGMBC(0x10d)
961 return;
Bram Moolenaar2c61ec62015-07-10 19:16:34 +0200962 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
963 CASEMBC(0x1e0f) CASEMBC(0x1e11)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200964 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
Bram Moolenaar2c61ec62015-07-10 19:16:34 +0200965 REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000966 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200967 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200968 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
969 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200970 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
971 regmbc(0xea); regmbc(0xeb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200972 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
973 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
974 REGMBC(0x1ebd)
975 return;
976 case 'f': CASEMBC(0x1e1f)
977 regmbc('f'); REGMBC(0x1e1f)
978 return;
979 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
980 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
981 CASEMBC(0x1e21)
982 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
983 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
984 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
985 return;
986 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
987 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
988 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
989 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
990 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000991 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200992 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200993 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
994 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200995 regmbc('i'); regmbc(0xec); regmbc(0xed);
996 regmbc(0xee); regmbc(0xef);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200997 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
998 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
999 return;
1000 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1001 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1002 return;
1003 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1004 CASEMBC(0x1e35)
1005 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1006 REGMBC(0x1e31) REGMBC(0x1e35)
1007 return;
1008 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1009 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1010 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1011 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1012 REGMBC(0x1e3b)
1013 return;
1014 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1015 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001016 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001017 case 'n': case 0xf1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001018 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1019 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001020 regmbc('n'); regmbc(0xf1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001021 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1022 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001023 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001024 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1025 case 0xf6: case 0xf8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001026 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1027 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001028 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1029 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1030 regmbc(0xf8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001031 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1032 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1033 REGMBC(0x1ed) REGMBC(0x1ecf)
1034 return;
1035 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1036 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1037 return;
1038 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1039 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1040 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1041 REGMBC(0x1e59) REGMBC(0x1e5f)
1042 return;
1043 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1044 CASEMBC(0x161) CASEMBC(0x1e61)
1045 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1046 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1047 return;
1048 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1049 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1050 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1051 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001052 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001053 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001054 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1055 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1056 CASEMBC(0x1ee7)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001057 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1058 regmbc(0xfb); regmbc(0xfc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001059 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1060 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1061 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1062 return;
1063 case 'v': CASEMBC(0x1e7d)
1064 regmbc('v'); REGMBC(0x1e7d)
1065 return;
1066 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1067 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1068 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1069 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1070 REGMBC(0x1e98)
1071 return;
1072 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1073 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001074 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001075 case 'y': case 0xfd: case 0xff:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001076 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1077 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001078 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001079 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1080 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1081 return;
1082 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1083 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1084 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1085 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1086 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001087 return;
1088 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001089#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001090 }
1091 regmbc(c);
1092}
1093
1094/*
1095 * Check for a collating element "[.a.]". "pp" points to the '['.
1096 * Returns a character. Zero means that no item was recognized. Otherwise
1097 * "pp" is advanced to after the item.
1098 * Currently only single characters are recognized!
1099 */
1100 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001101get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001102{
1103 int c;
1104 int l = 1;
1105 char_u *p = *pp;
1106
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001107 if (p[0] != NUL && p[1] == '.')
Bram Moolenaardf177f62005-02-22 08:39:57 +00001108 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00001109 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001110 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001111 if (p[l + 2] == '.' && p[l + 3] == ']')
1112 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00001113 if (has_mbyte)
1114 c = mb_ptr2char(p + 2);
1115 else
Bram Moolenaardf177f62005-02-22 08:39:57 +00001116 c = p[2];
1117 *pp += l + 4;
1118 return c;
1119 }
1120 }
1121 return 0;
1122}
1123
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001124static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1125static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1126
1127 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001128get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001129{
1130 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1131 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1132}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001133
1134/*
1135 * Skip over a "[]" range.
1136 * "p" must point to the character after the '['.
1137 * The returned pointer is on the matching ']', or the terminating NUL.
1138 */
1139 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001140skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001141{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001142 int l;
Bram Moolenaardf177f62005-02-22 08:39:57 +00001143
Bram Moolenaardf177f62005-02-22 08:39:57 +00001144 if (*p == '^') /* Complement of range. */
1145 ++p;
1146 if (*p == ']' || *p == '-')
1147 ++p;
1148 while (*p != NUL && *p != ']')
1149 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001150 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001151 p += l;
1152 else
Bram Moolenaardf177f62005-02-22 08:39:57 +00001153 if (*p == '-')
1154 {
1155 ++p;
1156 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001157 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001158 }
1159 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001160 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001161 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001162 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001163 p += 2;
1164 else if (*p == '[')
1165 {
1166 if (get_char_class(&p) == CLASS_NONE
1167 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001168 && get_coll_element(&p) == 0
1169 && *p != NUL)
1170 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001171 }
1172 else
1173 ++p;
1174 }
1175
1176 return p;
1177}
1178
1179/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001180 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001181 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001182 * Take care of characters with a backslash in front of it.
1183 * Skip strings inside [ and ].
1184 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1185 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1186 * is changed in-place.
1187 */
1188 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001189skip_regexp(
1190 char_u *startp,
1191 int dirc,
1192 int magic,
1193 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001194{
1195 int mymagic;
1196 char_u *p = startp;
1197
1198 if (magic)
1199 mymagic = MAGIC_ON;
1200 else
1201 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001202 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001203
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001204 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001205 {
1206 if (p[0] == dirc) /* found end of regexp */
1207 break;
1208 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1209 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1210 {
1211 p = skip_anyof(p + 1);
1212 if (p[0] == NUL)
1213 break;
1214 }
1215 else if (p[0] == '\\' && p[1] != NUL)
1216 {
1217 if (dirc == '?' && newp != NULL && p[1] == '?')
1218 {
1219 /* change "\?" to "?", make a copy first. */
1220 if (*newp == NULL)
1221 {
1222 *newp = vim_strsave(startp);
1223 if (*newp != NULL)
1224 p = *newp + (p - startp);
1225 }
1226 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001227 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001228 else
1229 ++p;
1230 }
1231 else
1232 ++p; /* skip next character */
1233 if (*p == 'v')
1234 mymagic = MAGIC_ALL;
1235 else if (*p == 'V')
1236 mymagic = MAGIC_NONE;
1237 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238 }
1239 return p;
1240}
1241
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02001242/*
1243 * Return TRUE if the back reference is legal. We must have seen the close
1244 * brace.
1245 * TODO: Should also check that we don't refer to something that is repeated
1246 * (+*=): what instance of the repetition should we match?
1247 */
1248 static int
1249seen_endbrace(int refnum)
1250{
1251 if (!had_endbrace[refnum])
1252 {
1253 char_u *p;
1254
1255 /* Trick: check if "@<=" or "@<!" follows, in which case
1256 * the \1 can appear before the referenced match. */
1257 for (p = regparse; *p != NUL; ++p)
1258 if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '='))
1259 break;
1260 if (*p == NUL)
1261 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01001262 emsg(_("E65: Illegal back reference"));
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02001263 rc_did_emsg = TRUE;
1264 return FALSE;
1265 }
1266 }
1267 return TRUE;
1268}
1269
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001271 * bt_regcomp() - compile a regular expression into internal code for the
1272 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001273 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001274 *
1275 * We can't allocate space until we know how big the compiled form will be,
1276 * but we can't compile it (and thus know how big it is) until we've got a
1277 * place to put the code. So we cheat: we compile it twice, once with code
1278 * generation turned off and size counting turned on, and once "for real".
1279 * This also means that we don't allocate space until we are sure that the
1280 * thing really will compile successfully, and we never have to move the
1281 * code and thus invalidate pointers into it. (Note that it has to be in
1282 * one piece because vim_free() must be able to free it all.)
1283 *
1284 * Whether upper/lower case is to be ignored is decided when executing the
1285 * program, it does not matter here.
1286 *
1287 * Beware that the optimization-preparation code in here knows about some
1288 * of the structure of the compiled regexp.
1289 * "re_flags": RE_MAGIC and/or RE_STRING.
1290 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001291 static regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001292bt_regcomp(char_u *expr, int re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001293{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001294 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001295 char_u *scan;
1296 char_u *longest;
1297 int len;
1298 int flags;
1299
1300 if (expr == NULL)
1301 EMSG_RET_NULL(_(e_null));
1302
1303 init_class_tab();
1304
1305 /*
1306 * First pass: determine size, legality.
1307 */
1308 regcomp_start(expr, re_flags);
1309 regcode = JUST_CALC_SIZE;
1310 regc(REGMAGIC);
1311 if (reg(REG_NOPAREN, &flags) == NULL)
1312 return NULL;
1313
Bram Moolenaar071d4272004-06-13 20:20:40 +00001314 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001315 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001316 if (r == NULL)
1317 return NULL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001318 r->re_in_use = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319
1320 /*
1321 * Second pass: emit code.
1322 */
1323 regcomp_start(expr, re_flags);
1324 regcode = r->program;
1325 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001326 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001327 {
1328 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001329 if (reg_toolong)
1330 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001331 return NULL;
1332 }
1333
1334 /* Dig out information for optimizations. */
1335 r->regstart = NUL; /* Worst-case defaults. */
1336 r->reganch = 0;
1337 r->regmust = NULL;
1338 r->regmlen = 0;
1339 r->regflags = regflags;
1340 if (flags & HASNL)
1341 r->regflags |= RF_HASNL;
1342 if (flags & HASLOOKBH)
1343 r->regflags |= RF_LOOKBH;
1344#ifdef FEAT_SYN_HL
1345 /* Remember whether this pattern has any \z specials in it. */
1346 r->reghasz = re_has_z;
1347#endif
1348 scan = r->program + 1; /* First BRANCH. */
1349 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1350 {
1351 scan = OPERAND(scan);
1352
1353 /* Starting-point info. */
1354 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1355 {
1356 r->reganch++;
1357 scan = regnext(scan);
1358 }
1359
1360 if (OP(scan) == EXACTLY)
1361 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001362 if (has_mbyte)
1363 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1364 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001365 r->regstart = *OPERAND(scan);
1366 }
1367 else if ((OP(scan) == BOW
1368 || OP(scan) == EOW
1369 || OP(scan) == NOTHING
1370 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1371 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1372 && OP(regnext(scan)) == EXACTLY)
1373 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00001374 if (has_mbyte)
1375 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1376 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00001377 r->regstart = *OPERAND(regnext(scan));
1378 }
1379
1380 /*
1381 * If there's something expensive in the r.e., find the longest
1382 * literal string that must appear and make it the regmust. Resolve
1383 * ties in favor of later strings, since the regstart check works
1384 * with the beginning of the r.e. and avoiding duplication
1385 * strengthens checking. Not a strong reason, but sufficient in the
1386 * absence of others.
1387 */
1388 /*
1389 * When the r.e. starts with BOW, it is faster to look for a regmust
1390 * first. Used a lot for "#" and "*" commands. (Added by mool).
1391 */
1392 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1393 && !(flags & HASNL))
1394 {
1395 longest = NULL;
1396 len = 0;
1397 for (; scan != NULL; scan = regnext(scan))
1398 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1399 {
1400 longest = OPERAND(scan);
1401 len = (int)STRLEN(OPERAND(scan));
1402 }
1403 r->regmust = longest;
1404 r->regmlen = len;
1405 }
1406 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001407#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001408 regdump(expr, r);
1409#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001410 r->engine = &bt_regengine;
1411 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001412}
1413
1414/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001415 * Free a compiled regexp program, returned by bt_regcomp().
1416 */
1417 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001418bt_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02001419{
1420 vim_free(prog);
1421}
1422
1423/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001424 * Setup to parse the regexp. Used once to get the length and once to do it.
1425 */
1426 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001427regcomp_start(
1428 char_u *expr,
1429 int re_flags) /* see vim_regcomp() */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001430{
1431 initchr(expr);
1432 if (re_flags & RE_MAGIC)
1433 reg_magic = MAGIC_ON;
1434 else
1435 reg_magic = MAGIC_OFF;
1436 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001437 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001438 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001439
1440 num_complex_braces = 0;
1441 regnpar = 1;
1442 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1443#ifdef FEAT_SYN_HL
1444 regnzpar = 1;
1445 re_has_z = 0;
1446#endif
1447 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001448 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001449 regflags = 0;
1450#if defined(FEAT_SYN_HL) || defined(PROTO)
1451 had_eol = FALSE;
1452#endif
1453}
1454
1455#if defined(FEAT_SYN_HL) || defined(PROTO)
1456/*
1457 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1458 * found. This is messy, but it works fine.
1459 */
1460 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001461vim_regcomp_had_eol(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001462{
1463 return had_eol;
1464}
1465#endif
1466
Bram Moolenaar0270f382018-07-17 05:43:58 +02001467// variables used for parsing
1468static int at_start; // True when on the first character
1469static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001470
Bram Moolenaar071d4272004-06-13 20:20:40 +00001471/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001472 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001473 *
1474 * Caller must absorb opening parenthesis.
1475 *
1476 * Combining parenthesis handling with the base level of regular expression
1477 * is a trifle forced, but the need to tie the tails of the branches to what
1478 * follows makes it hard to avoid.
1479 */
1480 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001481reg(
1482 int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1483 int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001484{
1485 char_u *ret;
1486 char_u *br;
1487 char_u *ender;
1488 int parno = 0;
1489 int flags;
1490
1491 *flagp = HASWIDTH; /* Tentatively. */
1492
1493#ifdef FEAT_SYN_HL
1494 if (paren == REG_ZPAREN)
1495 {
1496 /* Make a ZOPEN node. */
1497 if (regnzpar >= NSUBEXP)
1498 EMSG_RET_NULL(_("E50: Too many \\z("));
1499 parno = regnzpar;
1500 regnzpar++;
1501 ret = regnode(ZOPEN + parno);
1502 }
1503 else
1504#endif
1505 if (paren == REG_PAREN)
1506 {
1507 /* Make a MOPEN node. */
1508 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001509 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001510 parno = regnpar;
1511 ++regnpar;
1512 ret = regnode(MOPEN + parno);
1513 }
1514 else if (paren == REG_NPAREN)
1515 {
1516 /* Make a NOPEN node. */
1517 ret = regnode(NOPEN);
1518 }
1519 else
1520 ret = NULL;
1521
1522 /* Pick up the branches, linking them together. */
1523 br = regbranch(&flags);
1524 if (br == NULL)
1525 return NULL;
1526 if (ret != NULL)
1527 regtail(ret, br); /* [MZ]OPEN -> first. */
1528 else
1529 ret = br;
1530 /* If one of the branches can be zero-width, the whole thing can.
1531 * If one of the branches has * at start or matches a line-break, the
1532 * whole thing can. */
1533 if (!(flags & HASWIDTH))
1534 *flagp &= ~HASWIDTH;
1535 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1536 while (peekchr() == Magic('|'))
1537 {
1538 skipchr();
1539 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001540 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001541 return NULL;
1542 regtail(ret, br); /* BRANCH -> BRANCH. */
1543 if (!(flags & HASWIDTH))
1544 *flagp &= ~HASWIDTH;
1545 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1546 }
1547
1548 /* Make a closing node, and hook it on the end. */
1549 ender = regnode(
1550#ifdef FEAT_SYN_HL
1551 paren == REG_ZPAREN ? ZCLOSE + parno :
1552#endif
1553 paren == REG_PAREN ? MCLOSE + parno :
1554 paren == REG_NPAREN ? NCLOSE : END);
1555 regtail(ret, ender);
1556
1557 /* Hook the tails of the branches to the closing node. */
1558 for (br = ret; br != NULL; br = regnext(br))
1559 regoptail(br, ender);
1560
1561 /* Check for proper termination. */
1562 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1563 {
1564#ifdef FEAT_SYN_HL
1565 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001566 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001567 else
1568#endif
1569 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001570 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001571 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001572 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001573 }
1574 else if (paren == REG_NOPAREN && peekchr() != NUL)
1575 {
1576 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001577 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001579 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001580 /* NOTREACHED */
1581 }
1582 /*
1583 * Here we set the flag allowing back references to this set of
1584 * parentheses.
1585 */
1586 if (paren == REG_PAREN)
1587 had_endbrace[parno] = TRUE; /* have seen the close paren */
1588 return ret;
1589}
1590
1591/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001592 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001593 * Implements the & operator.
1594 */
1595 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001596regbranch(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001597{
1598 char_u *ret;
1599 char_u *chain = NULL;
1600 char_u *latest;
1601 int flags;
1602
1603 *flagp = WORST | HASNL; /* Tentatively. */
1604
1605 ret = regnode(BRANCH);
1606 for (;;)
1607 {
1608 latest = regconcat(&flags);
1609 if (latest == NULL)
1610 return NULL;
1611 /* If one of the branches has width, the whole thing has. If one of
1612 * the branches anchors at start-of-line, the whole thing does.
1613 * If one of the branches uses look-behind, the whole thing does. */
1614 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1615 /* If one of the branches doesn't match a line-break, the whole thing
1616 * doesn't. */
1617 *flagp &= ~HASNL | (flags & HASNL);
1618 if (chain != NULL)
1619 regtail(chain, latest);
1620 if (peekchr() != Magic('&'))
1621 break;
1622 skipchr();
1623 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001624 if (reg_toolong)
1625 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001626 reginsert(MATCH, latest);
1627 chain = latest;
1628 }
1629
1630 return ret;
1631}
1632
1633/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001634 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001635 * Implements the concatenation operator.
1636 */
1637 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001638regconcat(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001639{
1640 char_u *first = NULL;
1641 char_u *chain = NULL;
1642 char_u *latest;
1643 int flags;
1644 int cont = TRUE;
1645
1646 *flagp = WORST; /* Tentatively. */
1647
1648 while (cont)
1649 {
1650 switch (peekchr())
1651 {
1652 case NUL:
1653 case Magic('|'):
1654 case Magic('&'):
1655 case Magic(')'):
1656 cont = FALSE;
1657 break;
1658 case Magic('Z'):
Bram Moolenaar071d4272004-06-13 20:20:40 +00001659 regflags |= RF_ICOMBINE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001660 skipchr_keepstart();
1661 break;
1662 case Magic('c'):
1663 regflags |= RF_ICASE;
1664 skipchr_keepstart();
1665 break;
1666 case Magic('C'):
1667 regflags |= RF_NOICASE;
1668 skipchr_keepstart();
1669 break;
1670 case Magic('v'):
1671 reg_magic = MAGIC_ALL;
1672 skipchr_keepstart();
1673 curchr = -1;
1674 break;
1675 case Magic('m'):
1676 reg_magic = MAGIC_ON;
1677 skipchr_keepstart();
1678 curchr = -1;
1679 break;
1680 case Magic('M'):
1681 reg_magic = MAGIC_OFF;
1682 skipchr_keepstart();
1683 curchr = -1;
1684 break;
1685 case Magic('V'):
1686 reg_magic = MAGIC_NONE;
1687 skipchr_keepstart();
1688 curchr = -1;
1689 break;
1690 default:
1691 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001692 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001693 return NULL;
1694 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1695 if (chain == NULL) /* First piece. */
1696 *flagp |= flags & SPSTART;
1697 else
1698 regtail(chain, latest);
1699 chain = latest;
1700 if (first == NULL)
1701 first = latest;
1702 break;
1703 }
1704 }
1705 if (first == NULL) /* Loop ran zero times. */
1706 first = regnode(NOTHING);
1707 return first;
1708}
1709
1710/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001711 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001712 *
1713 * Note that the branching code sequences used for = and the general cases
1714 * of * and + are somewhat optimized: they use the same NOTHING node as
1715 * both the endmarker for their branch list and the body of the last branch.
1716 * It might seem that this node could be dispensed with entirely, but the
1717 * endmarker role is not redundant.
1718 */
1719 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001720regpiece(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001721{
1722 char_u *ret;
1723 int op;
1724 char_u *next;
1725 int flags;
1726 long minval;
1727 long maxval;
1728
1729 ret = regatom(&flags);
1730 if (ret == NULL)
1731 return NULL;
1732
1733 op = peekchr();
1734 if (re_multi_type(op) == NOT_MULTI)
1735 {
1736 *flagp = flags;
1737 return ret;
1738 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001739 /* default flags */
1740 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1741
1742 skipchr();
1743 switch (op)
1744 {
1745 case Magic('*'):
1746 if (flags & SIMPLE)
1747 reginsert(STAR, ret);
1748 else
1749 {
1750 /* Emit x* as (x&|), where & means "self". */
1751 reginsert(BRANCH, ret); /* Either x */
1752 regoptail(ret, regnode(BACK)); /* and loop */
1753 regoptail(ret, ret); /* back */
1754 regtail(ret, regnode(BRANCH)); /* or */
1755 regtail(ret, regnode(NOTHING)); /* null. */
1756 }
1757 break;
1758
1759 case Magic('+'):
1760 if (flags & SIMPLE)
1761 reginsert(PLUS, ret);
1762 else
1763 {
1764 /* Emit x+ as x(&|), where & means "self". */
1765 next = regnode(BRANCH); /* Either */
1766 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001767 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001768 regtail(next, regnode(BRANCH)); /* or */
1769 regtail(ret, regnode(NOTHING)); /* null. */
1770 }
1771 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1772 break;
1773
1774 case Magic('@'):
1775 {
1776 int lop = END;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001777 long nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001778
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001779 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001780 switch (no_Magic(getchr()))
1781 {
1782 case '=': lop = MATCH; break; /* \@= */
1783 case '!': lop = NOMATCH; break; /* \@! */
1784 case '>': lop = SUBPAT; break; /* \@> */
1785 case '<': switch (no_Magic(getchr()))
1786 {
1787 case '=': lop = BEHIND; break; /* \@<= */
1788 case '!': lop = NOBEHIND; break; /* \@<! */
1789 }
1790 }
1791 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001792 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001793 reg_magic == MAGIC_ALL);
1794 /* Look behind must match with behind_pos. */
1795 if (lop == BEHIND || lop == NOBEHIND)
1796 {
1797 regtail(ret, regnode(BHPOS));
1798 *flagp |= HASLOOKBH;
1799 }
1800 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001801 if (lop == BEHIND || lop == NOBEHIND)
1802 {
1803 if (nr < 0)
1804 nr = 0; /* no limit is same as zero limit */
1805 reginsert_nr(lop, nr, ret);
1806 }
1807 else
1808 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001809 break;
1810 }
1811
1812 case Magic('?'):
1813 case Magic('='):
1814 /* Emit x= as (x|) */
1815 reginsert(BRANCH, ret); /* Either x */
1816 regtail(ret, regnode(BRANCH)); /* or */
1817 next = regnode(NOTHING); /* null. */
1818 regtail(ret, next);
1819 regoptail(ret, next);
1820 break;
1821
1822 case Magic('{'):
1823 if (!read_limits(&minval, &maxval))
1824 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001825 if (flags & SIMPLE)
1826 {
1827 reginsert(BRACE_SIMPLE, ret);
1828 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1829 }
1830 else
1831 {
1832 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001833 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001834 reg_magic == MAGIC_ALL);
1835 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1836 regoptail(ret, regnode(BACK));
1837 regoptail(ret, ret);
1838 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1839 ++num_complex_braces;
1840 }
1841 if (minval > 0 && maxval > 0)
1842 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1843 break;
1844 }
1845 if (re_multi_type(peekchr()) != NOT_MULTI)
1846 {
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001847 // Can't have a multi follow a multi.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001848 if (peekchr() == Magic('*'))
Bram Moolenaar1be45b22019-01-14 22:46:15 +01001849 EMSG2_RET_NULL(_("E61: Nested %s*"), reg_magic >= MAGIC_ON);
1850 EMSG3_RET_NULL(_("E62: Nested %s%c"), reg_magic == MAGIC_ALL,
1851 no_Magic(peekchr()));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001852 }
1853
1854 return ret;
1855}
1856
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001857/* When making changes to classchars also change nfa_classcodes. */
1858static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1859static int classcodes[] = {
1860 ANY, IDENT, SIDENT, KWORD, SKWORD,
1861 FNAME, SFNAME, PRINT, SPRINT,
1862 WHITE, NWHITE, DIGIT, NDIGIT,
1863 HEX, NHEX, OCTAL, NOCTAL,
1864 WORD, NWORD, HEAD, NHEAD,
1865 ALPHA, NALPHA, LOWER, NLOWER,
1866 UPPER, NUPPER
1867};
1868
Bram Moolenaar071d4272004-06-13 20:20:40 +00001869/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001870 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001871 *
1872 * Optimization: gobbles an entire sequence of ordinary characters so that
1873 * it can turn them into a single node, which is smaller to store and
1874 * faster to run. Don't do this when one_exactly is set.
1875 */
1876 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001877regatom(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001878{
1879 char_u *ret;
1880 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001881 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001882 char_u *p;
1883 int extra = 0;
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001884 int save_prev_at_start = prev_at_start;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001885
1886 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001887
1888 c = getchr();
1889 switch (c)
1890 {
1891 case Magic('^'):
1892 ret = regnode(BOL);
1893 break;
1894
1895 case Magic('$'):
1896 ret = regnode(EOL);
1897#if defined(FEAT_SYN_HL) || defined(PROTO)
1898 had_eol = TRUE;
1899#endif
1900 break;
1901
1902 case Magic('<'):
1903 ret = regnode(BOW);
1904 break;
1905
1906 case Magic('>'):
1907 ret = regnode(EOW);
1908 break;
1909
1910 case Magic('_'):
1911 c = no_Magic(getchr());
1912 if (c == '^') /* "\_^" is start-of-line */
1913 {
1914 ret = regnode(BOL);
1915 break;
1916 }
1917 if (c == '$') /* "\_$" is end-of-line */
1918 {
1919 ret = regnode(EOL);
1920#if defined(FEAT_SYN_HL) || defined(PROTO)
1921 had_eol = TRUE;
1922#endif
1923 break;
1924 }
1925
1926 extra = ADD_NL;
1927 *flagp |= HASNL;
1928
1929 /* "\_[" is character range plus newline */
1930 if (c == '[')
1931 goto collection;
1932
1933 /* "\_x" is character class plus newline */
Bram Moolenaar2f40d122017-10-24 21:49:36 +02001934 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001935
1936 /*
1937 * Character classes.
1938 */
1939 case Magic('.'):
1940 case Magic('i'):
1941 case Magic('I'):
1942 case Magic('k'):
1943 case Magic('K'):
1944 case Magic('f'):
1945 case Magic('F'):
1946 case Magic('p'):
1947 case Magic('P'):
1948 case Magic('s'):
1949 case Magic('S'):
1950 case Magic('d'):
1951 case Magic('D'):
1952 case Magic('x'):
1953 case Magic('X'):
1954 case Magic('o'):
1955 case Magic('O'):
1956 case Magic('w'):
1957 case Magic('W'):
1958 case Magic('h'):
1959 case Magic('H'):
1960 case Magic('a'):
1961 case Magic('A'):
1962 case Magic('l'):
1963 case Magic('L'):
1964 case Magic('u'):
1965 case Magic('U'):
1966 p = vim_strchr(classchars, no_Magic(c));
1967 if (p == NULL)
1968 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaara12a1612019-01-24 16:39:02 +01001969
Bram Moolenaar362e1a32006-03-06 23:29:24 +00001970 /* When '.' is followed by a composing char ignore the dot, so that
1971 * the composing char is matched here. */
1972 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1973 {
1974 c = getchr();
1975 goto do_multibyte;
1976 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001977 ret = regnode(classcodes[p - classchars] + extra);
1978 *flagp |= HASWIDTH | SIMPLE;
1979 break;
1980
1981 case Magic('n'):
1982 if (reg_string)
1983 {
1984 /* In a string "\n" matches a newline character. */
1985 ret = regnode(EXACTLY);
1986 regc(NL);
1987 regc(NUL);
1988 *flagp |= HASWIDTH | SIMPLE;
1989 }
1990 else
1991 {
1992 /* In buffer text "\n" matches the end of a line. */
1993 ret = regnode(NEWL);
1994 *flagp |= HASWIDTH | HASNL;
1995 }
1996 break;
1997
1998 case Magic('('):
1999 if (one_exactly)
2000 EMSG_ONE_RET_NULL;
2001 ret = reg(REG_PAREN, &flags);
2002 if (ret == NULL)
2003 return NULL;
2004 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2005 break;
2006
2007 case NUL:
2008 case Magic('|'):
2009 case Magic('&'):
2010 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002011 if (one_exactly)
2012 EMSG_ONE_RET_NULL;
Bram Moolenaar95f09602016-11-10 20:01:45 +01002013 IEMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002014 /* NOTREACHED */
2015
2016 case Magic('='):
2017 case Magic('?'):
2018 case Magic('+'):
2019 case Magic('@'):
2020 case Magic('{'):
2021 case Magic('*'):
2022 c = no_Magic(c);
Bram Moolenaar1be45b22019-01-14 22:46:15 +01002023 EMSG3_RET_NULL(_("E64: %s%c follows nothing"),
2024 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL), c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002025 /* NOTREACHED */
2026
2027 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002028 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002029 {
2030 char_u *lp;
2031
2032 ret = regnode(EXACTLY);
2033 lp = reg_prev_sub;
2034 while (*lp != NUL)
2035 regc(*lp++);
2036 regc(NUL);
2037 if (*reg_prev_sub != NUL)
2038 {
2039 *flagp |= HASWIDTH;
2040 if ((lp - reg_prev_sub) == 1)
2041 *flagp |= SIMPLE;
2042 }
2043 }
2044 else
2045 EMSG_RET_NULL(_(e_nopresub));
2046 break;
2047
2048 case Magic('1'):
2049 case Magic('2'):
2050 case Magic('3'):
2051 case Magic('4'):
2052 case Magic('5'):
2053 case Magic('6'):
2054 case Magic('7'):
2055 case Magic('8'):
2056 case Magic('9'):
2057 {
2058 int refnum;
2059
2060 refnum = c - Magic('0');
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02002061 if (!seen_endbrace(refnum))
2062 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002063 ret = regnode(BACKREF + refnum);
2064 }
2065 break;
2066
Bram Moolenaar071d4272004-06-13 20:20:40 +00002067 case Magic('z'):
2068 {
2069 c = no_Magic(getchr());
2070 switch (c)
2071 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002072#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002073 case '(': if ((reg_do_extmatch & REX_SET) == 0)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002074 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002075 if (one_exactly)
2076 EMSG_ONE_RET_NULL;
2077 ret = reg(REG_ZPAREN, &flags);
2078 if (ret == NULL)
2079 return NULL;
2080 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2081 re_has_z = REX_SET;
2082 break;
2083
2084 case '1':
2085 case '2':
2086 case '3':
2087 case '4':
2088 case '5':
2089 case '6':
2090 case '7':
2091 case '8':
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002092 case '9': if ((reg_do_extmatch & REX_USE) == 0)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002093 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002094 ret = regnode(ZREF + c - '0');
2095 re_has_z = REX_USE;
2096 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002097#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002098
2099 case 's': ret = regnode(MOPEN + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002100 if (re_mult_next("\\zs") == FAIL)
2101 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002102 break;
2103
2104 case 'e': ret = regnode(MCLOSE + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002105 if (re_mult_next("\\ze") == FAIL)
2106 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002107 break;
2108
2109 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2110 }
2111 }
2112 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002113
2114 case Magic('%'):
2115 {
2116 c = no_Magic(getchr());
2117 switch (c)
2118 {
2119 /* () without a back reference */
2120 case '(':
2121 if (one_exactly)
2122 EMSG_ONE_RET_NULL;
2123 ret = reg(REG_NPAREN, &flags);
2124 if (ret == NULL)
2125 return NULL;
2126 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2127 break;
2128
2129 /* Catch \%^ and \%$ regardless of where they appear in the
2130 * pattern -- regardless of whether or not it makes sense. */
2131 case '^':
2132 ret = regnode(RE_BOF);
2133 break;
2134
2135 case '$':
2136 ret = regnode(RE_EOF);
2137 break;
2138
2139 case '#':
2140 ret = regnode(CURSOR);
2141 break;
2142
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002143 case 'V':
2144 ret = regnode(RE_VISUAL);
2145 break;
2146
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002147 case 'C':
2148 ret = regnode(RE_COMPOSING);
2149 break;
2150
Bram Moolenaar071d4272004-06-13 20:20:40 +00002151 /* \%[abc]: Emit as a list of branches, all ending at the last
2152 * branch which matches nothing. */
2153 case '[':
2154 if (one_exactly) /* doesn't nest */
2155 EMSG_ONE_RET_NULL;
2156 {
2157 char_u *lastbranch;
2158 char_u *lastnode = NULL;
2159 char_u *br;
2160
2161 ret = NULL;
2162 while ((c = getchr()) != ']')
2163 {
2164 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002165 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002166 reg_magic == MAGIC_ALL);
2167 br = regnode(BRANCH);
2168 if (ret == NULL)
2169 ret = br;
2170 else
2171 regtail(lastnode, br);
2172
2173 ungetchr();
2174 one_exactly = TRUE;
2175 lastnode = regatom(flagp);
2176 one_exactly = FALSE;
2177 if (lastnode == NULL)
2178 return NULL;
2179 }
2180 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002181 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002182 reg_magic == MAGIC_ALL);
2183 lastbranch = regnode(BRANCH);
2184 br = regnode(NOTHING);
2185 if (ret != JUST_CALC_SIZE)
2186 {
2187 regtail(lastnode, br);
2188 regtail(lastbranch, br);
2189 /* connect all branches to the NOTHING
2190 * branch at the end */
2191 for (br = ret; br != lastnode; )
2192 {
2193 if (OP(br) == BRANCH)
2194 {
2195 regtail(br, lastbranch);
2196 br = OPERAND(br);
2197 }
2198 else
2199 br = regnext(br);
2200 }
2201 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002202 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002203 break;
2204 }
2205
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002206 case 'd': /* %d123 decimal */
2207 case 'o': /* %o123 octal */
2208 case 'x': /* %xab hex 2 */
2209 case 'u': /* %uabcd hex 4 */
2210 case 'U': /* %U1234abcd hex 8 */
2211 {
Bram Moolenaar4c22a912017-11-02 22:29:38 +01002212 long i;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002213
2214 switch (c)
2215 {
2216 case 'd': i = getdecchrs(); break;
2217 case 'o': i = getoctchrs(); break;
2218 case 'x': i = gethexchrs(2); break;
2219 case 'u': i = gethexchrs(4); break;
2220 case 'U': i = gethexchrs(8); break;
2221 default: i = -1; break;
2222 }
2223
2224 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002225 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002226 _("E678: Invalid character after %s%%[dxouU]"),
2227 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002228 if (use_multibytecode(i))
2229 ret = regnode(MULTIBYTECODE);
2230 else
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002231 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002232 if (i == 0)
2233 regc(0x0a);
2234 else
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002235 regmbc(i);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002236 regc(NUL);
2237 *flagp |= HASWIDTH;
2238 break;
2239 }
2240
Bram Moolenaar071d4272004-06-13 20:20:40 +00002241 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002242 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2243 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002244 {
2245 long_u n = 0;
2246 int cmp;
2247
2248 cmp = c;
2249 if (cmp == '<' || cmp == '>')
2250 c = getchr();
2251 while (VIM_ISDIGIT(c))
2252 {
2253 n = n * 10 + (c - '0');
2254 c = getchr();
2255 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002256 if (c == '\'' && n == 0)
2257 {
2258 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2259 c = getchr();
2260 ret = regnode(RE_MARK);
2261 if (ret == JUST_CALC_SIZE)
2262 regsize += 2;
2263 else
2264 {
2265 *regcode++ = c;
2266 *regcode++ = cmp;
2267 }
2268 break;
2269 }
2270 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002271 {
2272 if (c == 'l')
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002273 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002274 ret = regnode(RE_LNUM);
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002275 if (save_prev_at_start)
2276 at_start = TRUE;
2277 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002278 else if (c == 'c')
2279 ret = regnode(RE_COL);
2280 else
2281 ret = regnode(RE_VCOL);
2282 if (ret == JUST_CALC_SIZE)
2283 regsize += 5;
2284 else
2285 {
2286 /* put the number and the optional
2287 * comparator after the opcode */
2288 regcode = re_put_long(regcode, n);
2289 *regcode++ = cmp;
2290 }
2291 break;
2292 }
2293 }
2294
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002295 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002296 reg_magic == MAGIC_ALL);
2297 }
2298 }
2299 break;
2300
2301 case Magic('['):
2302collection:
2303 {
2304 char_u *lp;
2305
2306 /*
2307 * If there is no matching ']', we assume the '[' is a normal
2308 * character. This makes 'incsearch' and ":help [" work.
2309 */
2310 lp = skip_anyof(regparse);
2311 if (*lp == ']') /* there is a matching ']' */
2312 {
2313 int startc = -1; /* > 0 when next '-' is a range */
2314 int endc;
2315
2316 /*
2317 * In a character class, different parsing rules apply.
2318 * Not even \ is special anymore, nothing is.
2319 */
2320 if (*regparse == '^') /* Complement of range. */
2321 {
2322 ret = regnode(ANYBUT + extra);
2323 regparse++;
2324 }
2325 else
2326 ret = regnode(ANYOF + extra);
2327
2328 /* At the start ']' and '-' mean the literal character. */
2329 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002330 {
2331 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002332 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002333 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002334
2335 while (*regparse != NUL && *regparse != ']')
2336 {
2337 if (*regparse == '-')
2338 {
2339 ++regparse;
2340 /* The '-' is not used for a range at the end and
2341 * after or before a '\n'. */
2342 if (*regparse == ']' || *regparse == NUL
2343 || startc == -1
2344 || (regparse[0] == '\\' && regparse[1] == 'n'))
2345 {
2346 regc('-');
2347 startc = '-'; /* [--x] is a range */
2348 }
2349 else
2350 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002351 /* Also accept "a-[.z.]" */
2352 endc = 0;
2353 if (*regparse == '[')
2354 endc = get_coll_element(&regparse);
2355 if (endc == 0)
2356 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002357 if (has_mbyte)
2358 endc = mb_ptr2char_adv(&regparse);
2359 else
Bram Moolenaardf177f62005-02-22 08:39:57 +00002360 endc = *regparse++;
2361 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002362
2363 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002364 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002365 endc = coll_get_char();
2366
Bram Moolenaar071d4272004-06-13 20:20:40 +00002367 if (startc > endc)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002368 EMSG_RET_NULL(_(e_reverse_range));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002369 if (has_mbyte && ((*mb_char2len)(startc) > 1
2370 || (*mb_char2len)(endc) > 1))
2371 {
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002372 /* Limit to a range of 256 chars. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002373 if (endc > startc + 256)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002374 EMSG_RET_NULL(_(e_large_class));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002375 while (++startc <= endc)
2376 regmbc(startc);
2377 }
2378 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002379 {
2380#ifdef EBCDIC
2381 int alpha_only = FALSE;
2382
2383 /* for alphabetical range skip the gaps
2384 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2385 if (isalpha(startc) && isalpha(endc))
2386 alpha_only = TRUE;
2387#endif
2388 while (++startc <= endc)
2389#ifdef EBCDIC
2390 if (!alpha_only || isalpha(startc))
2391#endif
2392 regc(startc);
2393 }
2394 startc = -1;
2395 }
2396 }
2397 /*
2398 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2399 * accepts "\t", "\e", etc., but only when the 'l' flag in
2400 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002401 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002402 */
2403 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002404 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002405 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002406 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002407 && vim_strchr(REGEXP_ABBR,
2408 regparse[1]) != NULL)))
2409 {
2410 regparse++;
2411 if (*regparse == 'n')
2412 {
2413 /* '\n' in range: also match NL */
2414 if (ret != JUST_CALC_SIZE)
2415 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002416 /* Using \n inside [^] does not change what
2417 * matches. "[^\n]" is the same as ".". */
2418 if (*ret == ANYOF)
2419 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002420 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002421 *flagp |= HASNL;
2422 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002423 /* else: must have had a \n already */
2424 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002425 regparse++;
2426 startc = -1;
2427 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002428 else if (*regparse == 'd'
2429 || *regparse == 'o'
2430 || *regparse == 'x'
2431 || *regparse == 'u'
2432 || *regparse == 'U')
2433 {
2434 startc = coll_get_char();
2435 if (startc == 0)
2436 regc(0x0a);
2437 else
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002438 regmbc(startc);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002439 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002440 else
2441 {
2442 startc = backslash_trans(*regparse++);
2443 regc(startc);
2444 }
2445 }
2446 else if (*regparse == '[')
2447 {
2448 int c_class;
2449 int cu;
2450
Bram Moolenaardf177f62005-02-22 08:39:57 +00002451 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002452 startc = -1;
2453 /* Characters assumed to be 8 bits! */
2454 switch (c_class)
2455 {
2456 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002457 c_class = get_equi_class(&regparse);
2458 if (c_class != 0)
2459 {
2460 /* produce equivalence class */
2461 reg_equi_class(c_class);
2462 }
2463 else if ((c_class =
2464 get_coll_element(&regparse)) != 0)
2465 {
2466 /* produce a collating element */
2467 regmbc(c_class);
2468 }
2469 else
2470 {
2471 /* literal '[', allow [[-x] as a range */
2472 startc = *regparse++;
2473 regc(startc);
2474 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002475 break;
2476 case CLASS_ALNUM:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002477 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002478 if (isalnum(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002479 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002480 break;
2481 case CLASS_ALPHA:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002482 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002483 if (isalpha(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002484 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002485 break;
2486 case CLASS_BLANK:
2487 regc(' ');
2488 regc('\t');
2489 break;
2490 case CLASS_CNTRL:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002491 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002492 if (iscntrl(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002493 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002494 break;
2495 case CLASS_DIGIT:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002496 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002497 if (VIM_ISDIGIT(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002498 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002499 break;
2500 case CLASS_GRAPH:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002501 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002502 if (isgraph(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002503 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002504 break;
2505 case CLASS_LOWER:
2506 for (cu = 1; cu <= 255; cu++)
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002507 if (MB_ISLOWER(cu) && cu != 170
2508 && cu != 186)
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002509 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002510 break;
2511 case CLASS_PRINT:
2512 for (cu = 1; cu <= 255; cu++)
2513 if (vim_isprintc(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002514 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002515 break;
2516 case CLASS_PUNCT:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002517 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002518 if (ispunct(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002519 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002520 break;
2521 case CLASS_SPACE:
2522 for (cu = 9; cu <= 13; cu++)
2523 regc(cu);
2524 regc(' ');
2525 break;
2526 case CLASS_UPPER:
2527 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002528 if (MB_ISUPPER(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002529 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002530 break;
2531 case CLASS_XDIGIT:
2532 for (cu = 1; cu <= 255; cu++)
2533 if (vim_isxdigit(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002534 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002535 break;
2536 case CLASS_TAB:
2537 regc('\t');
2538 break;
2539 case CLASS_RETURN:
2540 regc('\r');
2541 break;
2542 case CLASS_BACKSPACE:
2543 regc('\b');
2544 break;
2545 case CLASS_ESCAPE:
2546 regc('\033');
2547 break;
2548 }
2549 }
2550 else
2551 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002552 if (has_mbyte)
2553 {
2554 int len;
2555
2556 /* produce a multibyte character, including any
2557 * following composing characters */
2558 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002559 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002560 if (enc_utf8 && utf_char2len(startc) != len)
2561 startc = -1; /* composing chars */
2562 while (--len >= 0)
2563 regc(*regparse++);
2564 }
2565 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002566 {
2567 startc = *regparse++;
2568 regc(startc);
2569 }
2570 }
2571 }
2572 regc(NUL);
2573 prevchr_len = 1; /* last char was the ']' */
2574 if (*regparse != ']')
2575 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2576 skipchr(); /* let's be friends with the lexer again */
2577 *flagp |= HASWIDTH | SIMPLE;
2578 break;
2579 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002580 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002581 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002582 }
2583 /* FALLTHROUGH */
2584
2585 default:
2586 {
2587 int len;
2588
Bram Moolenaar071d4272004-06-13 20:20:40 +00002589 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002590 * before a multi and when it's a composing char. */
2591 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002592 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002593do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002594 ret = regnode(MULTIBYTECODE);
2595 regmbc(c);
2596 *flagp |= HASWIDTH | SIMPLE;
2597 break;
2598 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002599
2600 ret = regnode(EXACTLY);
2601
2602 /*
2603 * Append characters as long as:
2604 * - there is no following multi, we then need the character in
2605 * front of it as a single character operand
2606 * - not running into a Magic character
2607 * - "one_exactly" is not set
2608 * But always emit at least one character. Might be a Multi,
2609 * e.g., a "[" without matching "]".
2610 */
2611 for (len = 0; c != NUL && (len == 0
2612 || (re_multi_type(peekchr()) == NOT_MULTI
2613 && !one_exactly
2614 && !is_Magic(c))); ++len)
2615 {
2616 c = no_Magic(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002617 if (has_mbyte)
2618 {
2619 regmbc(c);
2620 if (enc_utf8)
2621 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002622 int l;
2623
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002624 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002625 for (;;)
2626 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002627 l = utf_ptr2len(regparse);
2628 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002629 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002630 regmbc(utf_ptr2char(regparse));
2631 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002632 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002633 }
2634 }
2635 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002636 regc(c);
2637 c = getchr();
2638 }
2639 ungetchr();
2640
2641 regc(NUL);
2642 *flagp |= HASWIDTH;
2643 if (len == 1)
2644 *flagp |= SIMPLE;
2645 }
2646 break;
2647 }
2648
2649 return ret;
2650}
2651
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002652/*
2653 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2654 * character "c".
2655 */
2656 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002657use_multibytecode(int c)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002658{
2659 return has_mbyte && (*mb_char2len)(c) > 1
2660 && (re_multi_type(peekchr()) != NOT_MULTI
2661 || (enc_utf8 && utf_iscomposing(c)));
2662}
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002663
Bram Moolenaar071d4272004-06-13 20:20:40 +00002664/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002665 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002666 * Return pointer to generated code.
2667 */
2668 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002669regnode(int op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002670{
2671 char_u *ret;
2672
2673 ret = regcode;
2674 if (ret == JUST_CALC_SIZE)
2675 regsize += 3;
2676 else
2677 {
2678 *regcode++ = op;
2679 *regcode++ = NUL; /* Null "next" pointer. */
2680 *regcode++ = NUL;
2681 }
2682 return ret;
2683}
2684
2685/*
2686 * Emit (if appropriate) a byte of code
2687 */
2688 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002689regc(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002690{
2691 if (regcode == JUST_CALC_SIZE)
2692 regsize++;
2693 else
2694 *regcode++ = b;
2695}
2696
Bram Moolenaar071d4272004-06-13 20:20:40 +00002697/*
2698 * Emit (if appropriate) a multi-byte character of code
2699 */
2700 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002701regmbc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002702{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002703 if (!has_mbyte && c > 0xff)
2704 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002705 if (regcode == JUST_CALC_SIZE)
2706 regsize += (*mb_char2len)(c);
2707 else
2708 regcode += (*mb_char2bytes)(c, regcode);
2709}
Bram Moolenaar071d4272004-06-13 20:20:40 +00002710
2711/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002712 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002713 *
2714 * Means relocating the operand.
2715 */
2716 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002717reginsert(int op, char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002718{
2719 char_u *src;
2720 char_u *dst;
2721 char_u *place;
2722
2723 if (regcode == JUST_CALC_SIZE)
2724 {
2725 regsize += 3;
2726 return;
2727 }
2728 src = regcode;
2729 regcode += 3;
2730 dst = regcode;
2731 while (src > opnd)
2732 *--dst = *--src;
2733
2734 place = opnd; /* Op node, where operand used to be. */
2735 *place++ = op;
2736 *place++ = NUL;
2737 *place = NUL;
2738}
2739
2740/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002741 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002742 * Add a number to the operator.
2743 */
2744 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002745reginsert_nr(int op, long val, char_u *opnd)
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002746{
2747 char_u *src;
2748 char_u *dst;
2749 char_u *place;
2750
2751 if (regcode == JUST_CALC_SIZE)
2752 {
2753 regsize += 7;
2754 return;
2755 }
2756 src = regcode;
2757 regcode += 7;
2758 dst = regcode;
2759 while (src > opnd)
2760 *--dst = *--src;
2761
2762 place = opnd; /* Op node, where operand used to be. */
2763 *place++ = op;
2764 *place++ = NUL;
2765 *place++ = NUL;
2766 place = re_put_long(place, (long_u)val);
2767}
2768
2769/*
2770 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002771 * The operator has the given limit values as operands. Also set next pointer.
2772 *
2773 * Means relocating the operand.
2774 */
2775 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002776reginsert_limits(
2777 int op,
2778 long minval,
2779 long maxval,
2780 char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002781{
2782 char_u *src;
2783 char_u *dst;
2784 char_u *place;
2785
2786 if (regcode == JUST_CALC_SIZE)
2787 {
2788 regsize += 11;
2789 return;
2790 }
2791 src = regcode;
2792 regcode += 11;
2793 dst = regcode;
2794 while (src > opnd)
2795 *--dst = *--src;
2796
2797 place = opnd; /* Op node, where operand used to be. */
2798 *place++ = op;
2799 *place++ = NUL;
2800 *place++ = NUL;
2801 place = re_put_long(place, (long_u)minval);
2802 place = re_put_long(place, (long_u)maxval);
2803 regtail(opnd, place);
2804}
2805
2806/*
2807 * Write a long as four bytes at "p" and return pointer to the next char.
2808 */
2809 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002810re_put_long(char_u *p, long_u val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002811{
2812 *p++ = (char_u) ((val >> 24) & 0377);
2813 *p++ = (char_u) ((val >> 16) & 0377);
2814 *p++ = (char_u) ((val >> 8) & 0377);
2815 *p++ = (char_u) (val & 0377);
2816 return p;
2817}
2818
2819/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002820 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002821 */
2822 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002823regtail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002824{
2825 char_u *scan;
2826 char_u *temp;
2827 int offset;
2828
2829 if (p == JUST_CALC_SIZE)
2830 return;
2831
2832 /* Find last node. */
2833 scan = p;
2834 for (;;)
2835 {
2836 temp = regnext(scan);
2837 if (temp == NULL)
2838 break;
2839 scan = temp;
2840 }
2841
Bram Moolenaar582fd852005-03-28 20:58:01 +00002842 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002843 offset = (int)(scan - val);
2844 else
2845 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002846 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002847 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002848 * values in too many places. */
2849 if (offset > 0xffff)
2850 reg_toolong = TRUE;
2851 else
2852 {
2853 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2854 *(scan + 2) = (char_u) (offset & 0377);
2855 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002856}
2857
2858/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002859 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002860 */
2861 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002862regoptail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002863{
2864 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2865 if (p == NULL || p == JUST_CALC_SIZE
2866 || (OP(p) != BRANCH
2867 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2868 return;
2869 regtail(OPERAND(p), val);
2870}
2871
2872/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002873 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002874 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002875/*
2876 * Start parsing at "str".
2877 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002878 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002879initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002880{
2881 regparse = str;
2882 prevchr_len = 0;
2883 curchr = prevprevchr = prevchr = nextchr = -1;
2884 at_start = TRUE;
2885 prev_at_start = FALSE;
2886}
2887
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002888/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002889 * Save the current parse state, so that it can be restored and parsing
2890 * starts in the same state again.
2891 */
2892 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002893save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002894{
2895 ps->regparse = regparse;
2896 ps->prevchr_len = prevchr_len;
2897 ps->curchr = curchr;
2898 ps->prevchr = prevchr;
2899 ps->prevprevchr = prevprevchr;
2900 ps->nextchr = nextchr;
2901 ps->at_start = at_start;
2902 ps->prev_at_start = prev_at_start;
2903 ps->regnpar = regnpar;
2904}
2905
2906/*
2907 * Restore a previously saved parse state.
2908 */
2909 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002910restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002911{
2912 regparse = ps->regparse;
2913 prevchr_len = ps->prevchr_len;
2914 curchr = ps->curchr;
2915 prevchr = ps->prevchr;
2916 prevprevchr = ps->prevprevchr;
2917 nextchr = ps->nextchr;
2918 at_start = ps->at_start;
2919 prev_at_start = ps->prev_at_start;
2920 regnpar = ps->regnpar;
2921}
2922
2923
2924/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002925 * Get the next character without advancing.
2926 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002927 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002928peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002929{
Bram Moolenaardf177f62005-02-22 08:39:57 +00002930 static int after_slash = FALSE;
2931
Bram Moolenaar071d4272004-06-13 20:20:40 +00002932 if (curchr == -1)
2933 {
2934 switch (curchr = regparse[0])
2935 {
2936 case '.':
2937 case '[':
2938 case '~':
2939 /* magic when 'magic' is on */
2940 if (reg_magic >= MAGIC_ON)
2941 curchr = Magic(curchr);
2942 break;
2943 case '(':
2944 case ')':
2945 case '{':
2946 case '%':
2947 case '+':
2948 case '=':
2949 case '?':
2950 case '@':
2951 case '!':
2952 case '&':
2953 case '|':
2954 case '<':
2955 case '>':
2956 case '#': /* future ext. */
2957 case '"': /* future ext. */
2958 case '\'': /* future ext. */
2959 case ',': /* future ext. */
2960 case '-': /* future ext. */
2961 case ':': /* future ext. */
2962 case ';': /* future ext. */
2963 case '`': /* future ext. */
2964 case '/': /* Can't be used in / command */
2965 /* magic only after "\v" */
2966 if (reg_magic == MAGIC_ALL)
2967 curchr = Magic(curchr);
2968 break;
2969 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00002970 /* * is not magic as the very first character, eg "?*ptr", when
2971 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
2972 * "\(\*" is not magic, thus must be magic if "after_slash" */
2973 if (reg_magic >= MAGIC_ON
2974 && !at_start
2975 && !(prev_at_start && prevchr == Magic('^'))
2976 && (after_slash
2977 || (prevchr != Magic('(')
2978 && prevchr != Magic('&')
2979 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002980 curchr = Magic('*');
2981 break;
2982 case '^':
2983 /* '^' is only magic as the very first character and if it's after
2984 * "\(", "\|", "\&' or "\n" */
2985 if (reg_magic >= MAGIC_OFF
2986 && (at_start
2987 || reg_magic == MAGIC_ALL
2988 || prevchr == Magic('(')
2989 || prevchr == Magic('|')
2990 || prevchr == Magic('&')
2991 || prevchr == Magic('n')
2992 || (no_Magic(prevchr) == '('
2993 && prevprevchr == Magic('%'))))
2994 {
2995 curchr = Magic('^');
2996 at_start = TRUE;
2997 prev_at_start = FALSE;
2998 }
2999 break;
3000 case '$':
3001 /* '$' is only magic as the very last char and if it's in front of
3002 * either "\|", "\)", "\&", or "\n" */
3003 if (reg_magic >= MAGIC_OFF)
3004 {
3005 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003006 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003007
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003008 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003009 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003010 || p[1] == 'm' || p[1] == 'M'
3011 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3012 {
3013 if (p[1] == 'v')
3014 is_magic_all = TRUE;
3015 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3016 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003017 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003018 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003019 if (p[0] == NUL
3020 || (p[0] == '\\'
3021 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3022 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003023 || (is_magic_all
3024 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003025 || reg_magic == MAGIC_ALL)
3026 curchr = Magic('$');
3027 }
3028 break;
3029 case '\\':
3030 {
3031 int c = regparse[1];
3032
3033 if (c == NUL)
3034 curchr = '\\'; /* trailing '\' */
3035 else if (
3036#ifdef EBCDIC
3037 vim_strchr(META, c)
3038#else
3039 c <= '~' && META_flags[c]
3040#endif
3041 )
3042 {
3043 /*
3044 * META contains everything that may be magic sometimes,
3045 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02003046 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +00003047 * magicness. Therefore, \ is so meta-magic that it is
3048 * not in META.
3049 */
3050 curchr = -1;
3051 prev_at_start = at_start;
3052 at_start = FALSE; /* be able to say "/\*ptr" */
3053 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003054 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003055 peekchr();
3056 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003057 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003058 curchr = toggle_Magic(curchr);
3059 }
3060 else if (vim_strchr(REGEXP_ABBR, c))
3061 {
3062 /*
3063 * Handle abbreviations, like "\t" for TAB -- webb
3064 */
3065 curchr = backslash_trans(c);
3066 }
3067 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3068 curchr = toggle_Magic(c);
3069 else
3070 {
3071 /*
3072 * Next character can never be (made) magic?
3073 * Then backslashing it won't do anything.
3074 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003075 if (has_mbyte)
3076 curchr = (*mb_ptr2char)(regparse + 1);
3077 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003078 curchr = c;
3079 }
3080 break;
3081 }
3082
Bram Moolenaar071d4272004-06-13 20:20:40 +00003083 default:
3084 if (has_mbyte)
3085 curchr = (*mb_ptr2char)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003086 }
3087 }
3088
3089 return curchr;
3090}
3091
3092/*
3093 * Eat one lexed character. Do this in a way that we can undo it.
3094 */
3095 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003096skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003097{
3098 /* peekchr() eats a backslash, do the same here */
3099 if (*regparse == '\\')
3100 prevchr_len = 1;
3101 else
3102 prevchr_len = 0;
3103 if (regparse[prevchr_len] != NUL)
3104 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003105 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003106 /* exclude composing chars that mb_ptr2len does include */
3107 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003108 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003109 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003110 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003111 ++prevchr_len;
3112 }
3113 regparse += prevchr_len;
3114 prev_at_start = at_start;
3115 at_start = FALSE;
3116 prevprevchr = prevchr;
3117 prevchr = curchr;
3118 curchr = nextchr; /* use previously unget char, or -1 */
3119 nextchr = -1;
3120}
3121
3122/*
3123 * Skip a character while keeping the value of prev_at_start for at_start.
3124 * prevchr and prevprevchr are also kept.
3125 */
3126 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003127skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003128{
3129 int as = prev_at_start;
3130 int pr = prevchr;
3131 int prpr = prevprevchr;
3132
3133 skipchr();
3134 at_start = as;
3135 prevchr = pr;
3136 prevprevchr = prpr;
3137}
3138
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003139/*
3140 * Get the next character from the pattern. We know about magic and such, so
3141 * therefore we need a lexical analyzer.
3142 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003143 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003144getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003145{
3146 int chr = peekchr();
3147
3148 skipchr();
3149 return chr;
3150}
3151
3152/*
3153 * put character back. Works only once!
3154 */
3155 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003156ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003157{
3158 nextchr = curchr;
3159 curchr = prevchr;
3160 prevchr = prevprevchr;
3161 at_start = prev_at_start;
3162 prev_at_start = FALSE;
3163
3164 /* Backup regparse, so that it's at the same position as before the
3165 * getchr(). */
3166 regparse -= prevchr_len;
3167}
3168
3169/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003170 * Get and return the value of the hex string at the current position.
3171 * Return -1 if there is no valid hex number.
3172 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003173 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003174 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003175 * The parameter controls the maximum number of input characters. This will be
3176 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3177 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003178 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003179gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003180{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003181 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003182 int c;
3183 int i;
3184
3185 for (i = 0; i < maxinputlen; ++i)
3186 {
3187 c = regparse[0];
3188 if (!vim_isxdigit(c))
3189 break;
3190 nr <<= 4;
3191 nr |= hex2nr(c);
3192 ++regparse;
3193 }
3194
3195 if (i == 0)
3196 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003197 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003198}
3199
3200/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003201 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003202 * current position. Return -1 for invalid. Consumes all digits.
3203 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003204 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003205getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003206{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003207 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003208 int c;
3209 int i;
3210
3211 for (i = 0; ; ++i)
3212 {
3213 c = regparse[0];
3214 if (c < '0' || c > '9')
3215 break;
3216 nr *= 10;
3217 nr += c - '0';
3218 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003219 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003220 }
3221
3222 if (i == 0)
3223 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003224 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003225}
3226
3227/*
3228 * get and return the value of the octal string immediately after the current
3229 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3230 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3231 * treat 8 or 9 as recognised characters. Position is updated:
3232 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003233 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003234 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003235 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003236getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003237{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003238 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003239 int c;
3240 int i;
3241
3242 for (i = 0; i < 3 && nr < 040; ++i)
3243 {
3244 c = regparse[0];
3245 if (c < '0' || c > '7')
3246 break;
3247 nr <<= 3;
3248 nr |= hex2nr(c);
3249 ++regparse;
3250 }
3251
3252 if (i == 0)
3253 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003254 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003255}
3256
3257/*
3258 * Get a number after a backslash that is inside [].
3259 * When nothing is recognized return a backslash.
3260 */
3261 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003262coll_get_char(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003263{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003264 long nr = -1;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003265
3266 switch (*regparse++)
3267 {
3268 case 'd': nr = getdecchrs(); break;
3269 case 'o': nr = getoctchrs(); break;
3270 case 'x': nr = gethexchrs(2); break;
3271 case 'u': nr = gethexchrs(4); break;
3272 case 'U': nr = gethexchrs(8); break;
3273 }
3274 if (nr < 0)
3275 {
3276 /* If getting the number fails be backwards compatible: the character
3277 * is a backslash. */
3278 --regparse;
3279 nr = '\\';
3280 }
3281 return nr;
3282}
3283
3284/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003285 * read_limits - Read two integers to be taken as a minimum and maximum.
3286 * If the first character is '-', then the range is reversed.
3287 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3288 * missing, a very big number is the default.
3289 */
3290 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003291read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003292{
3293 int reverse = FALSE;
3294 char_u *first_char;
3295 long tmp;
3296
3297 if (*regparse == '-')
3298 {
3299 /* Starts with '-', so reverse the range later */
3300 regparse++;
3301 reverse = TRUE;
3302 }
3303 first_char = regparse;
3304 *minval = getdigits(&regparse);
3305 if (*regparse == ',') /* There is a comma */
3306 {
3307 if (vim_isdigit(*++regparse))
3308 *maxval = getdigits(&regparse);
3309 else
3310 *maxval = MAX_LIMIT;
3311 }
3312 else if (VIM_ISDIGIT(*first_char))
3313 *maxval = *minval; /* It was \{n} or \{-n} */
3314 else
3315 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3316 if (*regparse == '\\')
3317 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003318 if (*regparse != '}')
Bram Moolenaar1be45b22019-01-14 22:46:15 +01003319 EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"),
3320 reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003321
3322 /*
3323 * Reverse the range if there was a '-', or make sure it is in the right
3324 * order otherwise.
3325 */
3326 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3327 {
3328 tmp = *minval;
3329 *minval = *maxval;
3330 *maxval = tmp;
3331 }
3332 skipchr(); /* let's be friends with the lexer again */
3333 return OK;
3334}
3335
3336/*
3337 * vim_regexec and friends
3338 */
3339
3340/*
3341 * Global work variables for vim_regexec().
3342 */
3343
Bram Moolenaar071d4272004-06-13 20:20:40 +00003344/*
3345 * Structure used to save the current input state, when it needs to be
3346 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003347 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003348 */
3349typedef struct
3350{
3351 union
3352 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003353 char_u *ptr; /* rex.input pointer, for single-line regexp */
3354 lpos_T pos; /* rex.input pos, for multi-line regexp */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003355 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003356 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003357} regsave_T;
3358
3359/* struct to save start/end pointer/position in for \(\) */
3360typedef struct
3361{
3362 union
3363 {
3364 char_u *ptr;
3365 lpos_T pos;
3366 } se_u;
3367} save_se_T;
3368
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003369/* used for BEHIND and NOBEHIND matching */
3370typedef struct regbehind_S
3371{
3372 regsave_T save_after;
3373 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003374 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003375 save_se_T save_start[NSUBEXP];
3376 save_se_T save_end[NSUBEXP];
3377} regbehind_T;
3378
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003379static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm, int *timed_out);
Bram Moolenaar09463262017-06-17 20:55:06 +02003380static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, int *timed_out);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003381static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003382#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003383static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003384#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003385static void save_subexpr(regbehind_T *bp);
3386static void restore_subexpr(regbehind_T *bp);
3387static void reg_nextline(void);
3388static void reg_save(regsave_T *save, garray_T *gap);
3389static void reg_restore(regsave_T *save, garray_T *gap);
3390static int reg_save_equal(regsave_T *save);
3391static void save_se_multi(save_se_T *savep, lpos_T *posp);
3392static void save_se_one(save_se_T *savep, char_u **pp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003393
3394/* Save the sub-expressions before attempting a match. */
3395#define save_se(savep, posp, pp) \
3396 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3397
3398/* After a failed match restore the sub-expressions. */
3399#define restore_se(savep, posp, pp) { \
3400 if (REG_MULTI) \
3401 *(posp) = (savep)->se_u.pos; \
3402 else \
3403 *(pp) = (savep)->se_u.ptr; }
3404
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003405static int re_num_cmp(long_u val, char_u *scan);
3406static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar09463262017-06-17 20:55:06 +02003407static int regmatch(char_u *prog, proftime_T *tm, int *timed_out);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003408static int regrepeat(char_u *p, long maxcount);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003409
3410#ifdef DEBUG
3411int regnarrate = 0;
3412#endif
3413
3414/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003415 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3416 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003417 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003418 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003419static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003420static unsigned reg_tofreelen;
3421
3422/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02003423 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003424 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003425 * done:
3426 * single-line multi-line
3427 * reg_match &regmatch_T NULL
3428 * reg_mmatch NULL &regmmatch_T
3429 * reg_startp reg_match->startp <invalid>
3430 * reg_endp reg_match->endp <invalid>
3431 * reg_startpos <invalid> reg_mmatch->startpos
3432 * reg_endpos <invalid> reg_mmatch->endpos
3433 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003434 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003435 * reg_firstlnum <invalid> first line in which to search
3436 * reg_maxline 0 last line nr
3437 * reg_line_lbr FALSE or TRUE FALSE
3438 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003439typedef struct {
3440 regmatch_T *reg_match;
3441 regmmatch_T *reg_mmatch;
3442 char_u **reg_startp;
3443 char_u **reg_endp;
3444 lpos_T *reg_startpos;
3445 lpos_T *reg_endpos;
3446 win_T *reg_win;
3447 buf_T *reg_buf;
3448 linenr_T reg_firstlnum;
3449 linenr_T reg_maxline;
3450 int reg_line_lbr; /* "\n" in string is line break */
3451
Bram Moolenaar0270f382018-07-17 05:43:58 +02003452 // The current match-position is stord in these variables:
3453 linenr_T lnum; // line number, relative to first line
3454 char_u *line; // start of current line
3455 char_u *input; // current input, points into "regline"
3456
3457 int need_clear_subexpr; // subexpressions still need to be cleared
3458#ifdef FEAT_SYN_HL
3459 int need_clear_zsubexpr; // extmatch subexpressions still need to be
3460 // cleared
3461#endif
3462
Bram Moolenaar6100d022016-10-02 16:51:57 +02003463 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3464 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3465 * contains '\c' or '\C' the value is overruled. */
3466 int reg_ic;
3467
Bram Moolenaar0270f382018-07-17 05:43:58 +02003468 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z
Bram Moolenaar6100d022016-10-02 16:51:57 +02003469 * flag in the regexp. Defaults to false, always. */
3470 int reg_icombine;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003471
3472 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3473 * there is no maximum. */
3474 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003475
3476 // State for the NFA engine regexec.
3477 int nfa_has_zend; // NFA regexp \ze operator encountered.
3478 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
3479 int nfa_nsubexpr; // Number of sub expressions actually being used
3480 // during execution. 1 if only the whole match
3481 // (subexpr 0) is used.
3482 // listid is global, so that it increases on recursive calls to
3483 // nfa_regmatch(), which means we don't have to clear the lastlist field of
3484 // all the states.
3485 int nfa_listid;
3486 int nfa_alt_listid;
3487
3488#ifdef FEAT_SYN_HL
3489 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
3490#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003491} regexec_T;
3492
3493static regexec_T rex;
3494static int rex_in_use = FALSE;
3495
Bram Moolenaar071d4272004-06-13 20:20:40 +00003496
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003497/* Values for rs_state in regitem_T. */
3498typedef enum regstate_E
3499{
3500 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3501 , RS_MOPEN /* MOPEN + [0-9] */
3502 , RS_MCLOSE /* MCLOSE + [0-9] */
3503#ifdef FEAT_SYN_HL
3504 , RS_ZOPEN /* ZOPEN + [0-9] */
3505 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3506#endif
3507 , RS_BRANCH /* BRANCH */
3508 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3509 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3510 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3511 , RS_NOMATCH /* NOMATCH */
3512 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3513 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3514 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3515 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3516} regstate_T;
3517
3518/*
3519 * When there are alternatives a regstate_T is put on the regstack to remember
3520 * what we are doing.
3521 * Before it may be another type of item, depending on rs_state, to remember
3522 * more things.
3523 */
3524typedef struct regitem_S
3525{
3526 regstate_T rs_state; /* what we are doing, one of RS_ above */
3527 char_u *rs_scan; /* current node in program */
3528 union
3529 {
3530 save_se_T sesave;
3531 regsave_T regsave;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003532 } rs_un; /* room for saving rex.input */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003533 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003534} regitem_T;
3535
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003536static regitem_T *regstack_push(regstate_T state, char_u *scan);
3537static void regstack_pop(char_u **scan);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003538
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003539/* used for STAR, PLUS and BRACE_SIMPLE matching */
3540typedef struct regstar_S
3541{
3542 int nextb; /* next byte */
3543 int nextb_ic; /* next byte reverse case */
3544 long count;
3545 long minval;
3546 long maxval;
3547} regstar_T;
3548
3549/* used to store input position when a BACK was encountered, so that we now if
3550 * we made any progress since the last time. */
3551typedef struct backpos_S
3552{
3553 char_u *bp_scan; /* "scan" where BACK was encountered */
3554 regsave_T bp_pos; /* last input position */
3555} backpos_T;
3556
3557/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003558 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3559 * to avoid invoking malloc() and free() often.
3560 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3561 * or regbehind_T.
3562 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003563 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003564static garray_T regstack = {0, 0, 0, 0, NULL};
3565static garray_T backpos = {0, 0, 0, 0, NULL};
3566
3567/*
3568 * Both for regstack and backpos tables we use the following strategy of
3569 * allocation (to reduce malloc/free calls):
3570 * - Initial size is fairly small.
3571 * - When needed, the tables are grown bigger (8 times at first, double after
3572 * that).
3573 * - After executing the match we free the memory only if the array has grown.
3574 * Thus the memory is kept allocated when it's at the initial size.
3575 * This makes it fast while not keeping a lot of memory allocated.
3576 * A three times speed increase was observed when using many simple patterns.
3577 */
3578#define REGSTACK_INITIAL 2048
3579#define BACKPOS_INITIAL 64
3580
3581#if defined(EXITFREE) || defined(PROTO)
3582 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003583free_regexp_stuff(void)
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003584{
3585 ga_clear(&regstack);
3586 ga_clear(&backpos);
3587 vim_free(reg_tofree);
3588 vim_free(reg_prev_sub);
3589}
3590#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003591
Bram Moolenaar071d4272004-06-13 20:20:40 +00003592/*
3593 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3594 */
3595 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01003596reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003597{
3598 /* when looking behind for a match/no-match lnum is negative. But we
3599 * can't go before line 1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003600 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003601 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003602 if (lnum > rex.reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003603 /* Must have matched the "\n" in the last line. */
3604 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02003605 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003606}
3607
3608static regsave_T behind_pos;
3609
3610#ifdef FEAT_SYN_HL
3611static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3612static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3613static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3614static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3615#endif
3616
3617/* TRUE if using multi-line regexp. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003618#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003619
Bram Moolenaar071d4272004-06-13 20:20:40 +00003620/*
3621 * Match a regexp against a string.
3622 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3623 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003624 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003625 *
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003626 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003627 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003628 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003629bt_regexec_nl(
3630 regmatch_T *rmp,
3631 char_u *line, /* string to match against */
3632 colnr_T col, /* column to start looking for match */
3633 int line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003634{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003635 rex.reg_match = rmp;
3636 rex.reg_mmatch = NULL;
3637 rex.reg_maxline = 0;
3638 rex.reg_line_lbr = line_lbr;
3639 rex.reg_buf = curbuf;
3640 rex.reg_win = NULL;
3641 rex.reg_ic = rmp->rm_ic;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003642 rex.reg_icombine = FALSE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003643 rex.reg_maxcol = 0;
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003644
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003645 return bt_regexec_both(line, col, NULL, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003646}
3647
Bram Moolenaar071d4272004-06-13 20:20:40 +00003648/*
3649 * Match a regexp against multiple lines.
3650 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3651 * Uses curbuf for line count and 'iskeyword'.
3652 *
3653 * Return zero if there is no match. Return number of lines contained in the
3654 * match otherwise.
3655 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003656 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003657bt_regexec_multi(
3658 regmmatch_T *rmp,
3659 win_T *win, /* window in which to search or NULL */
3660 buf_T *buf, /* buffer in which to search */
3661 linenr_T lnum, /* nr of line to start looking for match */
3662 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003663 proftime_T *tm, /* timeout limit or NULL */
3664 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003665{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003666 rex.reg_match = NULL;
3667 rex.reg_mmatch = rmp;
3668 rex.reg_buf = buf;
3669 rex.reg_win = win;
3670 rex.reg_firstlnum = lnum;
3671 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
3672 rex.reg_line_lbr = FALSE;
3673 rex.reg_ic = rmp->rmm_ic;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003674 rex.reg_icombine = FALSE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003675 rex.reg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003676
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003677 return bt_regexec_both(NULL, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003678}
3679
3680/*
3681 * Match a regexp against a string ("line" points to the string) or multiple
3682 * lines ("line" is NULL, use reg_getline()).
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003683 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003684 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003685 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003686bt_regexec_both(
3687 char_u *line,
3688 colnr_T col, /* column to start looking for match */
Bram Moolenaar09463262017-06-17 20:55:06 +02003689 proftime_T *tm, /* timeout limit or NULL */
3690 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003691{
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003692 bt_regprog_T *prog;
3693 char_u *s;
3694 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003695
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003696 /* Create "regstack" and "backpos" if they are not allocated yet.
3697 * We allocate *_INITIAL amount of bytes first and then set the grow size
3698 * to much bigger value to avoid many malloc calls in case of deep regular
3699 * expressions. */
3700 if (regstack.ga_data == NULL)
3701 {
3702 /* Use an item size of 1 byte, since we push different things
3703 * onto the regstack. */
3704 ga_init2(&regstack, 1, REGSTACK_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003705 (void)ga_grow(&regstack, REGSTACK_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003706 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3707 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003708
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003709 if (backpos.ga_data == NULL)
3710 {
3711 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003712 (void)ga_grow(&backpos, BACKPOS_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003713 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3714 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003715
Bram Moolenaar071d4272004-06-13 20:20:40 +00003716 if (REG_MULTI)
3717 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003718 prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003719 line = reg_getline((linenr_T)0);
Bram Moolenaar6100d022016-10-02 16:51:57 +02003720 rex.reg_startpos = rex.reg_mmatch->startpos;
3721 rex.reg_endpos = rex.reg_mmatch->endpos;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003722 }
3723 else
3724 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003725 prog = (bt_regprog_T *)rex.reg_match->regprog;
3726 rex.reg_startp = rex.reg_match->startp;
3727 rex.reg_endp = rex.reg_match->endp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003728 }
3729
3730 /* Be paranoid... */
3731 if (prog == NULL || line == NULL)
3732 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003733 emsg(_(e_null));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003734 goto theend;
3735 }
3736
3737 /* Check validity of program. */
3738 if (prog_magic_wrong())
3739 goto theend;
3740
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003741 /* If the start column is past the maximum column: no need to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003742 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003743 goto theend;
3744
Bram Moolenaar6100d022016-10-02 16:51:57 +02003745 /* If pattern contains "\c" or "\C": overrule value of rex.reg_ic */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003746 if (prog->regflags & RF_ICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003747 rex.reg_ic = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003748 else if (prog->regflags & RF_NOICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003749 rex.reg_ic = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003750
Bram Moolenaar6100d022016-10-02 16:51:57 +02003751 /* If pattern contains "\Z" overrule value of rex.reg_icombine */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003752 if (prog->regflags & RF_ICOMBINE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003753 rex.reg_icombine = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003754
3755 /* If there is a "must appear" string, look for it. */
3756 if (prog->regmust != NULL)
3757 {
3758 int c;
3759
Bram Moolenaar071d4272004-06-13 20:20:40 +00003760 if (has_mbyte)
3761 c = (*mb_ptr2char)(prog->regmust);
3762 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003763 c = *prog->regmust;
3764 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003765
3766 /*
3767 * This is used very often, esp. for ":global". Use three versions of
3768 * the loop to avoid overhead of conditions.
3769 */
Bram Moolenaara12a1612019-01-24 16:39:02 +01003770 if (!rex.reg_ic && !has_mbyte)
Bram Moolenaar05159a02005-02-26 23:04:13 +00003771 while ((s = vim_strbyte(s, c)) != NULL)
3772 {
3773 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3774 break; /* Found it. */
3775 ++s;
3776 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02003777 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar05159a02005-02-26 23:04:13 +00003778 while ((s = vim_strchr(s, c)) != NULL)
3779 {
3780 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3781 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003782 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003783 }
Bram Moolenaar05159a02005-02-26 23:04:13 +00003784 else
3785 while ((s = cstrchr(s, c)) != NULL)
3786 {
3787 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3788 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003789 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003790 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003791 if (s == NULL) /* Not present. */
3792 goto theend;
3793 }
3794
Bram Moolenaar0270f382018-07-17 05:43:58 +02003795 rex.line = line;
3796 rex.lnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003797 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003798
3799 /* Simplest case: Anchored match need be tried only once. */
3800 if (prog->reganch)
3801 {
3802 int c;
3803
Bram Moolenaar071d4272004-06-13 20:20:40 +00003804 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003805 c = (*mb_ptr2char)(rex.line + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003806 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02003807 c = rex.line[col];
Bram Moolenaar071d4272004-06-13 20:20:40 +00003808 if (prog->regstart == NUL
3809 || prog->regstart == c
Bram Moolenaara12a1612019-01-24 16:39:02 +01003810 || (rex.reg_ic
3811 && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003812 || (c < 255 && prog->regstart < 255 &&
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003813 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar09463262017-06-17 20:55:06 +02003814 retval = regtry(prog, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003815 else
3816 retval = 0;
3817 }
3818 else
3819 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003820#ifdef FEAT_RELTIME
3821 int tm_count = 0;
3822#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003823 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003824 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003825 {
3826 if (prog->regstart != NUL)
3827 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003828 /* Skip until the char we know it must start with.
3829 * Used often, do some work to avoid call overhead. */
Bram Moolenaara12a1612019-01-24 16:39:02 +01003830 if (!rex.reg_ic && !has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003831 s = vim_strbyte(rex.line + col, prog->regstart);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003832 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02003833 s = cstrchr(rex.line + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003834 if (s == NULL)
3835 {
3836 retval = 0;
3837 break;
3838 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02003839 col = (int)(s - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003840 }
3841
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003842 /* Check for maximum column to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003843 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003844 {
3845 retval = 0;
3846 break;
3847 }
3848
Bram Moolenaar09463262017-06-17 20:55:06 +02003849 retval = regtry(prog, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003850 if (retval > 0)
3851 break;
3852
3853 /* if not currently on the first line, get it again */
Bram Moolenaar0270f382018-07-17 05:43:58 +02003854 if (rex.lnum != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003855 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003856 rex.lnum = 0;
3857 rex.line = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003858 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02003859 if (rex.line[col] == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003860 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003861 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003862 col += (*mb_ptr2len)(rex.line + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003863 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003864 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003865#ifdef FEAT_RELTIME
3866 /* Check for timeout once in a twenty times to avoid overhead. */
3867 if (tm != NULL && ++tm_count == 20)
3868 {
3869 tm_count = 0;
3870 if (profile_passed_limit(tm))
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003871 {
3872 if (timed_out != NULL)
3873 *timed_out = TRUE;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003874 break;
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003875 }
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003876 }
3877#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003878 }
3879 }
3880
Bram Moolenaar071d4272004-06-13 20:20:40 +00003881theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003882 /* Free "reg_tofree" when it's a bit big.
3883 * Free regstack and backpos if they are bigger than their initial size. */
3884 if (reg_tofreelen > 400)
Bram Moolenaard23a8232018-02-10 18:45:26 +01003885 VIM_CLEAR(reg_tofree);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003886 if (regstack.ga_maxlen > REGSTACK_INITIAL)
3887 ga_clear(&regstack);
3888 if (backpos.ga_maxlen > BACKPOS_INITIAL)
3889 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003890
Bram Moolenaar071d4272004-06-13 20:20:40 +00003891 return retval;
3892}
3893
3894#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00003895/*
3896 * Create a new extmatch and mark it as referenced once.
3897 */
3898 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01003899make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003900{
3901 reg_extmatch_T *em;
3902
3903 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3904 if (em != NULL)
3905 em->refcnt = 1;
3906 return em;
3907}
3908
3909/*
3910 * Add a reference to an extmatch.
3911 */
3912 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01003913ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003914{
3915 if (em != NULL)
3916 em->refcnt++;
3917 return em;
3918}
3919
3920/*
3921 * Remove a reference to an extmatch. If there are no references left, free
3922 * the info.
3923 */
3924 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003925unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003926{
3927 int i;
3928
3929 if (em != NULL && --em->refcnt <= 0)
3930 {
3931 for (i = 0; i < NSUBEXP; ++i)
3932 vim_free(em->matches[i]);
3933 vim_free(em);
3934 }
3935}
3936#endif
3937
3938/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02003939 * regtry - try match of "prog" with at rex.line["col"].
Bram Moolenaar071d4272004-06-13 20:20:40 +00003940 * Returns 0 for failure, number of lines contained in the match otherwise.
3941 */
3942 static long
Bram Moolenaar09463262017-06-17 20:55:06 +02003943regtry(
3944 bt_regprog_T *prog,
3945 colnr_T col,
3946 proftime_T *tm, /* timeout limit or NULL */
3947 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003948{
Bram Moolenaar0270f382018-07-17 05:43:58 +02003949 rex.input = rex.line + col;
3950 rex.need_clear_subexpr = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003951#ifdef FEAT_SYN_HL
Bram Moolenaar0270f382018-07-17 05:43:58 +02003952 // Clear the external match subpointers if necessary.
3953 rex.need_clear_zsubexpr = (prog->reghasz == REX_SET);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003954#endif
3955
Bram Moolenaar09463262017-06-17 20:55:06 +02003956 if (regmatch(prog->program + 1, tm, timed_out) == 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003957 return 0;
3958
3959 cleanup_subexpr();
3960 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003961 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003962 if (rex.reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003963 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003964 rex.reg_startpos[0].lnum = 0;
3965 rex.reg_startpos[0].col = col;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003966 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02003967 if (rex.reg_endpos[0].lnum < 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003968 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003969 rex.reg_endpos[0].lnum = rex.lnum;
3970 rex.reg_endpos[0].col = (int)(rex.input - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003971 }
3972 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003973 /* Use line number of "\ze". */
Bram Moolenaar0270f382018-07-17 05:43:58 +02003974 rex.lnum = rex.reg_endpos[0].lnum;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003975 }
3976 else
3977 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003978 if (rex.reg_startp[0] == NULL)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003979 rex.reg_startp[0] = rex.line + col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003980 if (rex.reg_endp[0] == NULL)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003981 rex.reg_endp[0] = rex.input;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003982 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003983#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003984 /* Package any found \z(...\) matches for export. Default is none. */
3985 unref_extmatch(re_extmatch_out);
3986 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003987
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003988 if (prog->reghasz == REX_SET)
3989 {
3990 int i;
3991
3992 cleanup_zsubexpr();
3993 re_extmatch_out = make_extmatch();
3994 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003995 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003996 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003997 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003998 /* Only accept single line matches. */
3999 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004000 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4001 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004002 re_extmatch_out->matches[i] =
4003 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004004 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004005 reg_endzpos[i].col - reg_startzpos[i].col);
4006 }
4007 else
4008 {
4009 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4010 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004011 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004012 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004013 }
4014 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004015 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004016#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02004017 return 1 + rex.lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004018}
4019
Bram Moolenaar071d4272004-06-13 20:20:40 +00004020/*
4021 * Get class of previous character.
4022 */
4023 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004024reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004025{
Bram Moolenaar0270f382018-07-17 05:43:58 +02004026 if (rex.input > rex.line)
4027 return mb_get_class_buf(rex.input - 1
Bram Moolenaara12a1612019-01-24 16:39:02 +01004028 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004029 return -1;
4030}
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004031
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004032/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02004033 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004034 */
4035 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004036reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004037{
4038 pos_T top, bot;
4039 linenr_T lnum;
4040 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004041 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004042 int mode;
4043 colnr_T start, end;
4044 colnr_T start2, end2;
4045 colnr_T cols;
4046
4047 /* Check if the buffer is the current buffer. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004048 if (rex.reg_buf != curbuf || VIsual.lnum == 0)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004049 return FALSE;
4050
4051 if (VIsual_active)
4052 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004053 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004054 {
4055 top = VIsual;
4056 bot = wp->w_cursor;
4057 }
4058 else
4059 {
4060 top = wp->w_cursor;
4061 bot = VIsual;
4062 }
4063 mode = VIsual_mode;
4064 }
4065 else
4066 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004067 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004068 {
4069 top = curbuf->b_visual.vi_start;
4070 bot = curbuf->b_visual.vi_end;
4071 }
4072 else
4073 {
4074 top = curbuf->b_visual.vi_end;
4075 bot = curbuf->b_visual.vi_start;
4076 }
4077 mode = curbuf->b_visual.vi_mode;
4078 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004079 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004080 if (lnum < top.lnum || lnum > bot.lnum)
4081 return FALSE;
4082
4083 if (mode == 'v')
4084 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004085 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004086 if ((lnum == top.lnum && col < top.col)
4087 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4088 return FALSE;
4089 }
4090 else if (mode == Ctrl_V)
4091 {
4092 getvvcol(wp, &top, &start, NULL, &end);
4093 getvvcol(wp, &bot, &start2, NULL, &end2);
4094 if (start2 < start)
4095 start = start2;
4096 if (end2 > end)
4097 end = end2;
4098 if (top.col == MAXCOL || bot.col == MAXCOL)
4099 end = MAXCOL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004100 cols = win_linetabsize(wp, rex.line, (colnr_T)(rex.input - rex.line));
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004101 if (cols < start || cols > end - (*p_sel == 'e'))
4102 return FALSE;
4103 }
4104 return TRUE;
4105}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004106
Bram Moolenaar0270f382018-07-17 05:43:58 +02004107#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004108
4109/*
4110 * The arguments from BRACE_LIMITS are stored here. They are actually local
4111 * to regmatch(), but they are here to reduce the amount of stack space used
4112 * (it can be called recursively many times).
4113 */
4114static long bl_minval;
4115static long bl_maxval;
4116
4117/*
4118 * regmatch - main matching routine
4119 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004120 * Conceptually the strategy is simple: Check to see whether the current node
4121 * matches, push an item onto the regstack and loop to see whether the rest
4122 * matches, and then act accordingly. In practice we make some effort to
4123 * avoid using the regstack, in particular by going through "ordinary" nodes
4124 * (that don't need to know whether the rest of the match failed) by a nested
4125 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004126 *
Bram Moolenaar0270f382018-07-17 05:43:58 +02004127 * Returns TRUE when there is a match. Leaves rex.input and rex.lnum just after
Bram Moolenaar071d4272004-06-13 20:20:40 +00004128 * the last matched character.
Bram Moolenaar0270f382018-07-17 05:43:58 +02004129 * Returns FALSE when there is no match. Leaves rex.input and rex.lnum in an
Bram Moolenaar071d4272004-06-13 20:20:40 +00004130 * undefined state!
4131 */
4132 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004133regmatch(
Bram Moolenaar09463262017-06-17 20:55:06 +02004134 char_u *scan, /* Current node. */
4135 proftime_T *tm UNUSED, /* timeout limit or NULL */
4136 int *timed_out UNUSED) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004137{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004138 char_u *next; /* Next node. */
4139 int op;
4140 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004141 regitem_T *rp;
4142 int no;
4143 int status; /* one of the RA_ values: */
4144#define RA_FAIL 1 /* something failed, abort */
4145#define RA_CONT 2 /* continue in inner loop */
4146#define RA_BREAK 3 /* break inner loop */
4147#define RA_MATCH 4 /* successful match */
4148#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar09463262017-06-17 20:55:06 +02004149#ifdef FEAT_RELTIME
4150 int tm_count = 0;
4151#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00004152
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004153 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004154 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004155 regstack.ga_len = 0;
4156 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004157
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004158 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004159 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004160 */
4161 for (;;)
4162 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004163 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4164 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004165 fast_breakcheck();
4166
4167#ifdef DEBUG
4168 if (scan != NULL && regnarrate)
4169 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004170 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004171 mch_errmsg("(\n");
4172 }
4173#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004174
4175 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004176 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004177 * regstack.
4178 */
4179 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004180 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004181 if (got_int || scan == NULL)
4182 {
4183 status = RA_FAIL;
4184 break;
4185 }
Bram Moolenaar09463262017-06-17 20:55:06 +02004186#ifdef FEAT_RELTIME
4187 /* Check for timeout once in a 100 times to avoid overhead. */
4188 if (tm != NULL && ++tm_count == 100)
4189 {
4190 tm_count = 0;
4191 if (profile_passed_limit(tm))
4192 {
4193 if (timed_out != NULL)
4194 *timed_out = TRUE;
4195 status = RA_FAIL;
4196 break;
4197 }
4198 }
4199#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004200 status = RA_CONT;
4201
Bram Moolenaar071d4272004-06-13 20:20:40 +00004202#ifdef DEBUG
4203 if (regnarrate)
4204 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004205 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004206 mch_errmsg("...\n");
4207# ifdef FEAT_SYN_HL
4208 if (re_extmatch_in != NULL)
4209 {
4210 int i;
4211
4212 mch_errmsg(_("External submatches:\n"));
4213 for (i = 0; i < NSUBEXP; i++)
4214 {
4215 mch_errmsg(" \"");
4216 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004217 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004218 mch_errmsg("\"\n");
4219 }
4220 }
4221# endif
4222 }
4223#endif
4224 next = regnext(scan);
4225
4226 op = OP(scan);
4227 /* Check for character class with NL added. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004228 if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
Bram Moolenaar0270f382018-07-17 05:43:58 +02004229 && *rex.input == NUL && rex.lnum <= rex.reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004230 {
4231 reg_nextline();
4232 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004233 else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n')
Bram Moolenaar071d4272004-06-13 20:20:40 +00004234 {
4235 ADVANCE_REGINPUT();
4236 }
4237 else
4238 {
4239 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004240 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004241 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004242 c = (*mb_ptr2char)(rex.input);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004243 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02004244 c = *rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004245 switch (op)
4246 {
4247 case BOL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004248 if (rex.input != rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004249 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004250 break;
4251
4252 case EOL:
4253 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004254 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004255 break;
4256
4257 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004258 /* We're not at the beginning of the file when below the first
4259 * line where we started, not at the start of the line or we
4260 * didn't start at the first line of the buffer. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004261 if (rex.lnum != 0 || rex.input != rex.line
Bram Moolenaar6100d022016-10-02 16:51:57 +02004262 || (REG_MULTI && rex.reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004263 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004264 break;
4265
4266 case RE_EOF:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004267 if (rex.lnum != rex.reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004268 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004269 break;
4270
4271 case CURSOR:
4272 /* Check if the buffer is in a window and compare the
Bram Moolenaar6100d022016-10-02 16:51:57 +02004273 * rex.reg_win->w_cursor position to the match position. */
4274 if (rex.reg_win == NULL
Bram Moolenaar0270f382018-07-17 05:43:58 +02004275 || (rex.lnum + rex.reg_firstlnum
Bram Moolenaar6100d022016-10-02 16:51:57 +02004276 != rex.reg_win->w_cursor.lnum)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004277 || ((colnr_T)(rex.input - rex.line)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004278 != rex.reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004279 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004280 break;
4281
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004282 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004283 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004284 {
4285 int mark = OPERAND(scan)[0];
4286 int cmp = OPERAND(scan)[1];
4287 pos_T *pos;
4288
Bram Moolenaar6100d022016-10-02 16:51:57 +02004289 pos = getmark_buf(rex.reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004290 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004291 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004292 || (pos->lnum == rex.lnum + rex.reg_firstlnum
4293 ? (pos->col == (colnr_T)(rex.input - rex.line)
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004294 ? (cmp == '<' || cmp == '>')
Bram Moolenaar0270f382018-07-17 05:43:58 +02004295 : (pos->col < (colnr_T)(rex.input - rex.line)
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004296 ? cmp != '>'
4297 : cmp != '<'))
Bram Moolenaar0270f382018-07-17 05:43:58 +02004298 : (pos->lnum < rex.lnum + rex.reg_firstlnum
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004299 ? cmp != '>'
4300 : cmp != '<')))
4301 status = RA_NOMATCH;
4302 }
4303 break;
4304
4305 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004306 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004307 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004308 break;
4309
Bram Moolenaar071d4272004-06-13 20:20:40 +00004310 case RE_LNUM:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004311 if (!REG_MULTI || !re_num_cmp((long_u)(rex.lnum + rex.reg_firstlnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00004312 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004313 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004314 break;
4315
4316 case RE_COL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004317 if (!re_num_cmp((long_u)(rex.input - rex.line) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004318 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004319 break;
4320
4321 case RE_VCOL:
4322 if (!re_num_cmp((long_u)win_linetabsize(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004323 rex.reg_win == NULL ? curwin : rex.reg_win,
Bram Moolenaar0270f382018-07-17 05:43:58 +02004324 rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004325 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004326 break;
4327
Bram Moolenaar0270f382018-07-17 05:43:58 +02004328 case BOW: /* \<word; rex.input points to w */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004329 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004330 status = RA_NOMATCH;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004331 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004332 {
4333 int this_class;
4334
4335 /* Get class of current and previous char (if it exists). */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004336 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004337 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004338 status = RA_NOMATCH; /* not on a word at all */
4339 else if (reg_prev_class() == this_class)
4340 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004341 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004342 else
4343 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004344 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line
4345 && vim_iswordc_buf(rex.input[-1], rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004346 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004347 }
4348 break;
4349
Bram Moolenaar0270f382018-07-17 05:43:58 +02004350 case EOW: /* word\>; rex.input points after d */
4351 if (rex.input == rex.line) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004352 status = RA_NOMATCH;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004353 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004354 {
4355 int this_class, prev_class;
4356
4357 /* Get class of current and previous char (if it exists). */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004358 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004359 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004360 if (this_class == prev_class
4361 || prev_class == 0 || prev_class == 1)
4362 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004363 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004364 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004365 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004366 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf)
4367 || (rex.input[0] != NUL
Bram Moolenaar6100d022016-10-02 16:51:57 +02004368 && vim_iswordc_buf(c, rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004369 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004370 }
4371 break; /* Matched with EOW */
4372
4373 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004374 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004375 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004376 status = RA_NOMATCH;
4377 else
4378 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004379 break;
4380
4381 case IDENT:
4382 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004383 status = RA_NOMATCH;
4384 else
4385 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004386 break;
4387
4388 case SIDENT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004389 if (VIM_ISDIGIT(*rex.input) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004390 status = RA_NOMATCH;
4391 else
4392 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004393 break;
4394
4395 case KWORD:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004396 if (!vim_iswordp_buf(rex.input, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004397 status = RA_NOMATCH;
4398 else
4399 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004400 break;
4401
4402 case SKWORD:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004403 if (VIM_ISDIGIT(*rex.input)
4404 || !vim_iswordp_buf(rex.input, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004405 status = RA_NOMATCH;
4406 else
4407 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004408 break;
4409
4410 case FNAME:
4411 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004412 status = RA_NOMATCH;
4413 else
4414 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004415 break;
4416
4417 case SFNAME:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004418 if (VIM_ISDIGIT(*rex.input) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004419 status = RA_NOMATCH;
4420 else
4421 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004422 break;
4423
4424 case PRINT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004425 if (!vim_isprintc(PTR2CHAR(rex.input)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004426 status = RA_NOMATCH;
4427 else
4428 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004429 break;
4430
4431 case SPRINT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004432 if (VIM_ISDIGIT(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004433 status = RA_NOMATCH;
4434 else
4435 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004436 break;
4437
4438 case WHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004439 if (!VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004440 status = RA_NOMATCH;
4441 else
4442 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004443 break;
4444
4445 case NWHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004446 if (c == NUL || VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004447 status = RA_NOMATCH;
4448 else
4449 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004450 break;
4451
4452 case DIGIT:
4453 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004454 status = RA_NOMATCH;
4455 else
4456 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004457 break;
4458
4459 case NDIGIT:
4460 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004461 status = RA_NOMATCH;
4462 else
4463 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004464 break;
4465
4466 case HEX:
4467 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004468 status = RA_NOMATCH;
4469 else
4470 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004471 break;
4472
4473 case NHEX:
4474 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004475 status = RA_NOMATCH;
4476 else
4477 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004478 break;
4479
4480 case OCTAL:
4481 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004482 status = RA_NOMATCH;
4483 else
4484 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004485 break;
4486
4487 case NOCTAL:
4488 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004489 status = RA_NOMATCH;
4490 else
4491 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004492 break;
4493
4494 case WORD:
4495 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004496 status = RA_NOMATCH;
4497 else
4498 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004499 break;
4500
4501 case NWORD:
4502 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004503 status = RA_NOMATCH;
4504 else
4505 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004506 break;
4507
4508 case HEAD:
4509 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004510 status = RA_NOMATCH;
4511 else
4512 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004513 break;
4514
4515 case NHEAD:
4516 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004517 status = RA_NOMATCH;
4518 else
4519 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004520 break;
4521
4522 case ALPHA:
4523 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004524 status = RA_NOMATCH;
4525 else
4526 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004527 break;
4528
4529 case NALPHA:
4530 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004531 status = RA_NOMATCH;
4532 else
4533 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004534 break;
4535
4536 case LOWER:
4537 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004538 status = RA_NOMATCH;
4539 else
4540 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004541 break;
4542
4543 case NLOWER:
4544 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004545 status = RA_NOMATCH;
4546 else
4547 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004548 break;
4549
4550 case UPPER:
4551 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004552 status = RA_NOMATCH;
4553 else
4554 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004555 break;
4556
4557 case NUPPER:
4558 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004559 status = RA_NOMATCH;
4560 else
4561 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004562 break;
4563
4564 case EXACTLY:
4565 {
4566 int len;
4567 char_u *opnd;
4568
4569 opnd = OPERAND(scan);
4570 /* Inline the first byte, for speed. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004571 if (*opnd != *rex.input
Bram Moolenaara12a1612019-01-24 16:39:02 +01004572 && (!rex.reg_ic
4573 || (!enc_utf8
4574 && MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004575 status = RA_NOMATCH;
4576 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004577 {
4578 /* match empty string always works; happens when "~" is
4579 * empty. */
4580 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004581 else
4582 {
Bram Moolenaara12a1612019-01-24 16:39:02 +01004583 if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic))
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004584 {
4585 len = 1; /* matched a single byte above */
4586 }
4587 else
4588 {
4589 /* Need to match first byte again for multi-byte. */
4590 len = (int)STRLEN(opnd);
Bram Moolenaar0270f382018-07-17 05:43:58 +02004591 if (cstrncmp(opnd, rex.input, &len) != 0)
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004592 status = RA_NOMATCH;
4593 }
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004594 /* Check for following composing character, unless %C
4595 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004596 if (status != RA_NOMATCH
4597 && enc_utf8
Bram Moolenaar0270f382018-07-17 05:43:58 +02004598 && UTF_COMPOSINGLIKE(rex.input, rex.input + len)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004599 && !rex.reg_icombine
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004600 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004601 {
4602 /* raaron: This code makes a composing character get
4603 * ignored, which is the correct behavior (sometimes)
4604 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004605 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004606 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004607 if (status != RA_NOMATCH)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004608 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004609 }
4610 }
4611 break;
4612
4613 case ANYOF:
4614 case ANYBUT:
4615 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004616 status = RA_NOMATCH;
4617 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4618 status = RA_NOMATCH;
4619 else
4620 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004621 break;
4622
Bram Moolenaar071d4272004-06-13 20:20:40 +00004623 case MULTIBYTECODE:
4624 if (has_mbyte)
4625 {
4626 int i, len;
4627 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004628 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004629
4630 opnd = OPERAND(scan);
4631 /* Safety check (just in case 'encoding' was changed since
4632 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004633 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004634 {
4635 status = RA_NOMATCH;
4636 break;
4637 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004638 if (enc_utf8)
Bram Moolenaarace95982017-03-29 17:30:27 +02004639 opndc = utf_ptr2char(opnd);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004640 if (enc_utf8 && utf_iscomposing(opndc))
4641 {
4642 /* When only a composing char is given match at any
4643 * position where that composing char appears. */
4644 status = RA_NOMATCH;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004645 for (i = 0; rex.input[i] != NUL;
4646 i += utf_ptr2len(rex.input + i))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004647 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004648 inpc = utf_ptr2char(rex.input + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004649 if (!utf_iscomposing(inpc))
4650 {
4651 if (i > 0)
4652 break;
4653 }
4654 else if (opndc == inpc)
4655 {
4656 /* Include all following composing chars. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004657 len = i + utfc_ptr2len(rex.input + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004658 status = RA_MATCH;
4659 break;
4660 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004661 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004662 }
4663 else
4664 for (i = 0; i < len; ++i)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004665 if (opnd[i] != rex.input[i])
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004666 {
4667 status = RA_NOMATCH;
4668 break;
4669 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004670 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004671 }
4672 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004673 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004674 break;
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004675 case RE_COMPOSING:
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004676 if (enc_utf8)
4677 {
4678 /* Skip composing characters. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004679 while (utf_iscomposing(utf_ptr2char(rex.input)))
4680 MB_CPTR_ADV(rex.input);
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004681 }
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004682 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004683
4684 case NOTHING:
4685 break;
4686
4687 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004688 {
4689 int i;
4690 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004691
Bram Moolenaar582fd852005-03-28 20:58:01 +00004692 /*
4693 * When we run into BACK we need to check if we don't keep
4694 * looping without matching any input. The second and later
4695 * times a BACK is encountered it fails if the input is still
4696 * at the same position as the previous time.
4697 * The positions are stored in "backpos" and found by the
4698 * current value of "scan", the position in the RE program.
4699 */
4700 bp = (backpos_T *)backpos.ga_data;
4701 for (i = 0; i < backpos.ga_len; ++i)
4702 if (bp[i].bp_scan == scan)
4703 break;
4704 if (i == backpos.ga_len)
4705 {
4706 /* First time at this BACK, make room to store the pos. */
4707 if (ga_grow(&backpos, 1) == FAIL)
4708 status = RA_FAIL;
4709 else
4710 {
4711 /* get "ga_data" again, it may have changed */
4712 bp = (backpos_T *)backpos.ga_data;
4713 bp[i].bp_scan = scan;
4714 ++backpos.ga_len;
4715 }
4716 }
4717 else if (reg_save_equal(&bp[i].bp_pos))
4718 /* Still at same position as last time, fail. */
4719 status = RA_NOMATCH;
4720
4721 if (status != RA_FAIL && status != RA_NOMATCH)
4722 reg_save(&bp[i].bp_pos, &backpos);
4723 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004724 break;
4725
Bram Moolenaar071d4272004-06-13 20:20:40 +00004726 case MOPEN + 0: /* Match start: \zs */
4727 case MOPEN + 1: /* \( */
4728 case MOPEN + 2:
4729 case MOPEN + 3:
4730 case MOPEN + 4:
4731 case MOPEN + 5:
4732 case MOPEN + 6:
4733 case MOPEN + 7:
4734 case MOPEN + 8:
4735 case MOPEN + 9:
4736 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004737 no = op - MOPEN;
4738 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004739 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004740 if (rp == NULL)
4741 status = RA_FAIL;
4742 else
4743 {
4744 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004745 save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
4746 &rex.reg_startp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004747 /* We simply continue and handle the result when done. */
4748 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004749 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004750 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004751
4752 case NOPEN: /* \%( */
4753 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004754 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004755 status = RA_FAIL;
4756 /* We simply continue and handle the result when done. */
4757 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004758
4759#ifdef FEAT_SYN_HL
4760 case ZOPEN + 1:
4761 case ZOPEN + 2:
4762 case ZOPEN + 3:
4763 case ZOPEN + 4:
4764 case ZOPEN + 5:
4765 case ZOPEN + 6:
4766 case ZOPEN + 7:
4767 case ZOPEN + 8:
4768 case ZOPEN + 9:
4769 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004770 no = op - ZOPEN;
4771 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004772 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004773 if (rp == NULL)
4774 status = RA_FAIL;
4775 else
4776 {
4777 rp->rs_no = no;
4778 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4779 &reg_startzp[no]);
4780 /* We simply continue and handle the result when done. */
4781 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004782 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004783 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004784#endif
4785
4786 case MCLOSE + 0: /* Match end: \ze */
4787 case MCLOSE + 1: /* \) */
4788 case MCLOSE + 2:
4789 case MCLOSE + 3:
4790 case MCLOSE + 4:
4791 case MCLOSE + 5:
4792 case MCLOSE + 6:
4793 case MCLOSE + 7:
4794 case MCLOSE + 8:
4795 case MCLOSE + 9:
4796 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004797 no = op - MCLOSE;
4798 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004799 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004800 if (rp == NULL)
4801 status = RA_FAIL;
4802 else
4803 {
4804 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004805 save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
4806 &rex.reg_endp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004807 /* We simply continue and handle the result when done. */
4808 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004809 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004810 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004811
4812#ifdef FEAT_SYN_HL
4813 case ZCLOSE + 1: /* \) after \z( */
4814 case ZCLOSE + 2:
4815 case ZCLOSE + 3:
4816 case ZCLOSE + 4:
4817 case ZCLOSE + 5:
4818 case ZCLOSE + 6:
4819 case ZCLOSE + 7:
4820 case ZCLOSE + 8:
4821 case ZCLOSE + 9:
4822 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004823 no = op - ZCLOSE;
4824 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004825 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004826 if (rp == NULL)
4827 status = RA_FAIL;
4828 else
4829 {
4830 rp->rs_no = no;
4831 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4832 &reg_endzp[no]);
4833 /* We simply continue and handle the result when done. */
4834 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004835 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004836 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004837#endif
4838
4839 case BACKREF + 1:
4840 case BACKREF + 2:
4841 case BACKREF + 3:
4842 case BACKREF + 4:
4843 case BACKREF + 5:
4844 case BACKREF + 6:
4845 case BACKREF + 7:
4846 case BACKREF + 8:
4847 case BACKREF + 9:
4848 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004849 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004850
4851 no = op - BACKREF;
4852 cleanup_subexpr();
4853 if (!REG_MULTI) /* Single-line regexp */
4854 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004855 if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004856 {
4857 /* Backref was not set: Match an empty string. */
4858 len = 0;
4859 }
4860 else
4861 {
4862 /* Compare current input with back-ref in the same
4863 * line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004864 len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
Bram Moolenaar0270f382018-07-17 05:43:58 +02004865 if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004866 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004867 }
4868 }
4869 else /* Multi-line regexp */
4870 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004871 if (rex.reg_startpos[no].lnum < 0
4872 || rex.reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004873 {
4874 /* Backref was not set: Match an empty string. */
4875 len = 0;
4876 }
4877 else
4878 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004879 if (rex.reg_startpos[no].lnum == rex.lnum
4880 && rex.reg_endpos[no].lnum == rex.lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004881 {
4882 /* Compare back-ref within the current line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004883 len = rex.reg_endpos[no].col
4884 - rex.reg_startpos[no].col;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004885 if (cstrncmp(rex.line + rex.reg_startpos[no].col,
4886 rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004887 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004888 }
4889 else
4890 {
4891 /* Messy situation: Need to compare between two
4892 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02004893 int r = match_with_backref(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004894 rex.reg_startpos[no].lnum,
4895 rex.reg_startpos[no].col,
4896 rex.reg_endpos[no].lnum,
4897 rex.reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02004898 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02004899
4900 if (r != RA_MATCH)
4901 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004902 }
4903 }
4904 }
4905
4906 /* Matched the backref, skip over it. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004907 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004908 }
4909 break;
4910
4911#ifdef FEAT_SYN_HL
4912 case ZREF + 1:
4913 case ZREF + 2:
4914 case ZREF + 3:
4915 case ZREF + 4:
4916 case ZREF + 5:
4917 case ZREF + 6:
4918 case ZREF + 7:
4919 case ZREF + 8:
4920 case ZREF + 9:
4921 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004922 int len;
4923
4924 cleanup_zsubexpr();
4925 no = op - ZREF;
4926 if (re_extmatch_in != NULL
4927 && re_extmatch_in->matches[no] != NULL)
4928 {
4929 len = (int)STRLEN(re_extmatch_in->matches[no]);
4930 if (cstrncmp(re_extmatch_in->matches[no],
Bram Moolenaar0270f382018-07-17 05:43:58 +02004931 rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004932 status = RA_NOMATCH;
4933 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02004934 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004935 }
4936 else
4937 {
4938 /* Backref was not set: Match an empty string. */
4939 }
4940 }
4941 break;
4942#endif
4943
4944 case BRANCH:
4945 {
4946 if (OP(next) != BRANCH) /* No choice. */
4947 next = OPERAND(scan); /* Avoid recursion. */
4948 else
4949 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004950 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004951 if (rp == NULL)
4952 status = RA_FAIL;
4953 else
4954 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004955 }
4956 }
4957 break;
4958
4959 case BRACE_LIMITS:
4960 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004961 if (OP(next) == BRACE_SIMPLE)
4962 {
4963 bl_minval = OPERAND_MIN(scan);
4964 bl_maxval = OPERAND_MAX(scan);
4965 }
4966 else if (OP(next) >= BRACE_COMPLEX
4967 && OP(next) < BRACE_COMPLEX + 10)
4968 {
4969 no = OP(next) - BRACE_COMPLEX;
4970 brace_min[no] = OPERAND_MIN(scan);
4971 brace_max[no] = OPERAND_MAX(scan);
4972 brace_count[no] = 0;
4973 }
4974 else
4975 {
Bram Moolenaar95f09602016-11-10 20:01:45 +01004976 internal_error("BRACE_LIMITS");
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004977 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004978 }
4979 }
4980 break;
4981
4982 case BRACE_COMPLEX + 0:
4983 case BRACE_COMPLEX + 1:
4984 case BRACE_COMPLEX + 2:
4985 case BRACE_COMPLEX + 3:
4986 case BRACE_COMPLEX + 4:
4987 case BRACE_COMPLEX + 5:
4988 case BRACE_COMPLEX + 6:
4989 case BRACE_COMPLEX + 7:
4990 case BRACE_COMPLEX + 8:
4991 case BRACE_COMPLEX + 9:
4992 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004993 no = op - BRACE_COMPLEX;
4994 ++brace_count[no];
4995
4996 /* If not matched enough times yet, try one more */
4997 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004998 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004999 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005000 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005001 if (rp == NULL)
5002 status = RA_FAIL;
5003 else
5004 {
5005 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005006 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005007 next = OPERAND(scan);
5008 /* We continue and handle the result when done. */
5009 }
5010 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005011 }
5012
5013 /* If matched enough times, may try matching some more */
5014 if (brace_min[no] <= brace_max[no])
5015 {
5016 /* Range is the normal way around, use longest match */
5017 if (brace_count[no] <= brace_max[no])
5018 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005019 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005020 if (rp == NULL)
5021 status = RA_FAIL;
5022 else
5023 {
5024 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005025 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005026 next = OPERAND(scan);
5027 /* We continue and handle the result when done. */
5028 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005029 }
5030 }
5031 else
5032 {
5033 /* Range is backwards, use shortest match first */
5034 if (brace_count[no] <= brace_min[no])
5035 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005036 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005037 if (rp == NULL)
5038 status = RA_FAIL;
5039 else
5040 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005041 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005042 /* We continue and handle the result when done. */
5043 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005044 }
5045 }
5046 }
5047 break;
5048
5049 case BRACE_SIMPLE:
5050 case STAR:
5051 case PLUS:
5052 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005053 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005054
5055 /*
5056 * Lookahead to avoid useless match attempts when we know
5057 * what character comes next.
5058 */
5059 if (OP(next) == EXACTLY)
5060 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005061 rst.nextb = *OPERAND(next);
Bram Moolenaar6100d022016-10-02 16:51:57 +02005062 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005063 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005064 if (MB_ISUPPER(rst.nextb))
5065 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005066 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005067 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005068 }
5069 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005070 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005071 }
5072 else
5073 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005074 rst.nextb = NUL;
5075 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005076 }
5077 if (op != BRACE_SIMPLE)
5078 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005079 rst.minval = (op == STAR) ? 0 : 1;
5080 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005081 }
5082 else
5083 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005084 rst.minval = bl_minval;
5085 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005086 }
5087
5088 /*
5089 * When maxval > minval, try matching as much as possible, up
5090 * to maxval. When maxval < minval, try matching at least the
5091 * minimal number (since the range is backwards, that's also
5092 * maxval!).
5093 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005094 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005095 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005096 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005097 status = RA_FAIL;
5098 break;
5099 }
5100 if (rst.minval <= rst.maxval
5101 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5102 {
5103 /* It could match. Prepare for trying to match what
5104 * follows. The code is below. Parameters are stored in
5105 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005106 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005107 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005108 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005109 status = RA_FAIL;
5110 }
5111 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005112 status = RA_FAIL;
5113 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005114 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005115 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005116 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005117 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005118 if (rp == NULL)
5119 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005120 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005121 {
5122 *(((regstar_T *)rp) - 1) = rst;
5123 status = RA_BREAK; /* skip the restore bits */
5124 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005125 }
5126 }
5127 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005128 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005129
Bram Moolenaar071d4272004-06-13 20:20:40 +00005130 }
5131 break;
5132
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005133 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005134 case MATCH:
5135 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005136 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005137 if (rp == NULL)
5138 status = RA_FAIL;
5139 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005140 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005141 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005142 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005143 next = OPERAND(scan);
5144 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005145 }
5146 break;
5147
5148 case BEHIND:
5149 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005150 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005151 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005152 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005153 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005154 status = RA_FAIL;
5155 }
5156 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005157 status = RA_FAIL;
5158 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005159 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005160 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005161 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005162 if (rp == NULL)
5163 status = RA_FAIL;
5164 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005165 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005166 /* Need to save the subexpr to be able to restore them
5167 * when there is a match but we don't use it. */
5168 save_subexpr(((regbehind_T *)rp) - 1);
5169
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005170 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005171 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005172 /* First try if what follows matches. If it does then we
5173 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005174 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005175 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005176 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005177
5178 case BHPOS:
5179 if (REG_MULTI)
5180 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005181 if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line)
5182 || behind_pos.rs_u.pos.lnum != rex.lnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005183 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005184 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02005185 else if (behind_pos.rs_u.ptr != rex.input)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005186 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005187 break;
5188
5189 case NEWL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02005190 if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005191 || rex.reg_line_lbr)
5192 && (c != '\n' || !rex.reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005193 status = RA_NOMATCH;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005194 else if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005195 ADVANCE_REGINPUT();
5196 else
5197 reg_nextline();
5198 break;
5199
5200 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005201 status = RA_MATCH; /* Success! */
5202 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005203
5204 default:
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005205 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00005206#ifdef DEBUG
5207 printf("Illegal op code %d\n", op);
5208#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005209 status = RA_FAIL;
5210 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005211 }
5212 }
5213
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005214 /* If we can't continue sequentially, break the inner loop. */
5215 if (status != RA_CONT)
5216 break;
5217
5218 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005219 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005220
5221 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005222
5223 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005224 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005225 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005226 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005227 while (regstack.ga_len > 0 && status != RA_FAIL)
5228 {
5229 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5230 switch (rp->rs_state)
5231 {
5232 case RS_NOPEN:
5233 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005234 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005235 break;
5236
5237 case RS_MOPEN:
5238 /* Pop the state. Restore pointers when there is no match. */
5239 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005240 restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
5241 &rex.reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005242 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005243 break;
5244
5245#ifdef FEAT_SYN_HL
5246 case RS_ZOPEN:
5247 /* Pop the state. Restore pointers when there is no match. */
5248 if (status == RA_NOMATCH)
5249 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5250 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005251 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005252 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005253#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005254
5255 case RS_MCLOSE:
5256 /* Pop the state. Restore pointers when there is no match. */
5257 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005258 restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
5259 &rex.reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005260 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005261 break;
5262
5263#ifdef FEAT_SYN_HL
5264 case RS_ZCLOSE:
5265 /* Pop the state. Restore pointers when there is no match. */
5266 if (status == RA_NOMATCH)
5267 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5268 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005269 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005270 break;
5271#endif
5272
5273 case RS_BRANCH:
5274 if (status == RA_MATCH)
5275 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005276 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005277 else
5278 {
5279 if (status != RA_BREAK)
5280 {
5281 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005282 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005283 scan = rp->rs_scan;
5284 }
5285 if (scan == NULL || OP(scan) != BRANCH)
5286 {
5287 /* no more branches, didn't find a match */
5288 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005289 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005290 }
5291 else
5292 {
5293 /* Prepare to try a branch. */
5294 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005295 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005296 scan = OPERAND(scan);
5297 }
5298 }
5299 break;
5300
5301 case RS_BRCPLX_MORE:
5302 /* Pop the state. Restore pointers when there is no match. */
5303 if (status == RA_NOMATCH)
5304 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005305 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005306 --brace_count[rp->rs_no]; /* decrement match count */
5307 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005308 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005309 break;
5310
5311 case RS_BRCPLX_LONG:
5312 /* Pop the state. Restore pointers when there is no match. */
5313 if (status == RA_NOMATCH)
5314 {
5315 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005316 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005317 --brace_count[rp->rs_no];
5318 /* continue with the items after "\{}" */
5319 status = RA_CONT;
5320 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005321 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005322 if (status == RA_CONT)
5323 scan = regnext(scan);
5324 break;
5325
5326 case RS_BRCPLX_SHORT:
5327 /* Pop the state. Restore pointers when there is no match. */
5328 if (status == RA_NOMATCH)
5329 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005330 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005331 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005332 if (status == RA_NOMATCH)
5333 {
5334 scan = OPERAND(scan);
5335 status = RA_CONT;
5336 }
5337 break;
5338
5339 case RS_NOMATCH:
5340 /* Pop the state. If the operand matches for NOMATCH or
5341 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5342 * except for SUBPAT, and continue with the next item. */
5343 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5344 status = RA_NOMATCH;
5345 else
5346 {
5347 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005348 if (rp->rs_no != SUBPAT) /* zero-width */
5349 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005350 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005351 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005352 if (status == RA_CONT)
5353 scan = regnext(scan);
5354 break;
5355
5356 case RS_BEHIND1:
5357 if (status == RA_NOMATCH)
5358 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005359 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005360 regstack.ga_len -= sizeof(regbehind_T);
5361 }
5362 else
5363 {
5364 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5365 * the behind part does (not) match before the current
5366 * position in the input. This must be done at every
5367 * position in the input and checking if the match ends at
5368 * the current position. */
5369
5370 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005371 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005372
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005373 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005374 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005375 * result, hitting the start of the line or the previous
5376 * line (for multi-line matching).
5377 * Set behind_pos to where the match should end, BHPOS
5378 * will match it. Save the current value. */
5379 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5380 behind_pos = rp->rs_un.regsave;
5381
5382 rp->rs_state = RS_BEHIND2;
5383
Bram Moolenaar582fd852005-03-28 20:58:01 +00005384 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005385 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005386 }
5387 break;
5388
5389 case RS_BEHIND2:
5390 /*
5391 * Looping for BEHIND / NOBEHIND match.
5392 */
5393 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5394 {
5395 /* found a match that ends where "next" started */
5396 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5397 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005398 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5399 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005400 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005401 {
5402 /* But we didn't want a match. Need to restore the
5403 * subexpr, because what follows matched, so they have
5404 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005405 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005406 restore_subexpr(((regbehind_T *)rp) - 1);
5407 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005408 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005409 regstack.ga_len -= sizeof(regbehind_T);
5410 }
5411 else
5412 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005413 long limit;
5414
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005415 /* No match or a match that doesn't end where we want it: Go
5416 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005417 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005418 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005419 if (REG_MULTI)
5420 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005421 if (limit > 0
5422 && ((rp->rs_un.regsave.rs_u.pos.lnum
5423 < behind_pos.rs_u.pos.lnum
Bram Moolenaar0270f382018-07-17 05:43:58 +02005424 ? (colnr_T)STRLEN(rex.line)
Bram Moolenaar61602c52013-06-01 19:54:43 +02005425 : behind_pos.rs_u.pos.col)
5426 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5427 no = FAIL;
5428 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005429 {
5430 if (rp->rs_un.regsave.rs_u.pos.lnum
5431 < behind_pos.rs_u.pos.lnum
5432 || reg_getline(
5433 --rp->rs_un.regsave.rs_u.pos.lnum)
5434 == NULL)
5435 no = FAIL;
5436 else
5437 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005438 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005439 rp->rs_un.regsave.rs_u.pos.col =
Bram Moolenaar0270f382018-07-17 05:43:58 +02005440 (colnr_T)STRLEN(rex.line);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005441 }
5442 }
5443 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005444 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005445 if (has_mbyte)
Bram Moolenaarbc197192018-02-13 16:35:06 +01005446 {
5447 char_u *line =
Bram Moolenaar866f3552019-01-01 22:19:08 +01005448 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum);
Bram Moolenaarbc197192018-02-13 16:35:06 +01005449
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005450 rp->rs_un.regsave.rs_u.pos.col -=
Bram Moolenaarbc197192018-02-13 16:35:06 +01005451 (*mb_head_off)(line, line
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005452 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaarbc197192018-02-13 16:35:06 +01005453 }
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005454 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005455 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005456 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005457 }
5458 else
5459 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005460 if (rp->rs_un.regsave.rs_u.ptr == rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005461 no = FAIL;
5462 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005463 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005464 MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005465 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5466 - rp->rs_un.regsave.rs_u.ptr) > limit)
5467 no = FAIL;
5468 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005469 }
5470 if (no == OK)
5471 {
5472 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005473 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005474 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005475 if (status == RA_MATCH)
5476 {
5477 /* We did match, so subexpr may have been changed,
5478 * need to restore them for the next try. */
5479 status = RA_NOMATCH;
5480 restore_subexpr(((regbehind_T *)rp) - 1);
5481 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005482 }
5483 else
5484 {
5485 /* Can't advance. For NOBEHIND that's a match. */
5486 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5487 if (rp->rs_no == NOBEHIND)
5488 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005489 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5490 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005491 status = RA_MATCH;
5492 }
5493 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005494 {
5495 /* We do want a proper match. Need to restore the
5496 * subexpr if we had a match, because they may have
5497 * been set. */
5498 if (status == RA_MATCH)
5499 {
5500 status = RA_NOMATCH;
5501 restore_subexpr(((regbehind_T *)rp) - 1);
5502 }
5503 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005504 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005505 regstack.ga_len -= sizeof(regbehind_T);
5506 }
5507 }
5508 break;
5509
5510 case RS_STAR_LONG:
5511 case RS_STAR_SHORT:
5512 {
5513 regstar_T *rst = ((regstar_T *)rp) - 1;
5514
5515 if (status == RA_MATCH)
5516 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005517 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005518 regstack.ga_len -= sizeof(regstar_T);
5519 break;
5520 }
5521
5522 /* Tried once already, restore input pointers. */
5523 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005524 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005525
5526 /* Repeat until we found a position where it could match. */
5527 for (;;)
5528 {
5529 if (status != RA_BREAK)
5530 {
5531 /* Tried first position already, advance. */
5532 if (rp->rs_state == RS_STAR_LONG)
5533 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005534 /* Trying for longest match, but couldn't or
5535 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005536 if (--rst->count < rst->minval)
5537 break;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005538 if (rex.input == rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005539 {
5540 /* backup to last char of previous line */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005541 --rex.lnum;
5542 rex.line = reg_getline(rex.lnum);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005543 /* Just in case regrepeat() didn't count
5544 * right. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005545 if (rex.line == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005546 break;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005547 rex.input = rex.line + STRLEN(rex.line);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005548 fast_breakcheck();
5549 }
5550 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02005551 MB_PTR_BACK(rex.line, rex.input);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005552 }
5553 else
5554 {
5555 /* Range is backwards, use shortest match first.
5556 * Careful: maxval and minval are exchanged!
5557 * Couldn't or didn't match: try advancing one
5558 * char. */
5559 if (rst->count == rst->minval
5560 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5561 break;
5562 ++rst->count;
5563 }
5564 if (got_int)
5565 break;
5566 }
5567 else
5568 status = RA_NOMATCH;
5569
5570 /* If it could match, try it. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005571 if (rst->nextb == NUL || *rex.input == rst->nextb
5572 || *rex.input == rst->nextb_ic)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005573 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005574 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005575 scan = regnext(rp->rs_scan);
5576 status = RA_CONT;
5577 break;
5578 }
5579 }
5580 if (status != RA_CONT)
5581 {
5582 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005583 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005584 regstack.ga_len -= sizeof(regstar_T);
5585 status = RA_NOMATCH;
5586 }
5587 }
5588 break;
5589 }
5590
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005591 /* If we want to continue the inner loop or didn't pop a state
5592 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005593 if (status == RA_CONT || rp == (regitem_T *)
5594 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5595 break;
5596 }
5597
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005598 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005599 if (status == RA_CONT)
5600 continue;
5601
5602 /*
5603 * If the regstack is empty or something failed we are done.
5604 */
5605 if (regstack.ga_len == 0 || status == RA_FAIL)
5606 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005607 if (scan == NULL)
5608 {
5609 /*
5610 * We get here only if there's trouble -- normally "case END" is
5611 * the terminating point.
5612 */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005613 emsg(_(e_re_corr));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005614#ifdef DEBUG
5615 printf("Premature EOL\n");
5616#endif
5617 }
5618 return (status == RA_MATCH);
5619 }
5620
5621 } /* End of loop until the regstack is empty. */
5622
5623 /* NOTREACHED */
5624}
5625
5626/*
5627 * Push an item onto the regstack.
5628 * Returns pointer to new item. Returns NULL when out of memory.
5629 */
5630 static regitem_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01005631regstack_push(regstate_T state, char_u *scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005632{
5633 regitem_T *rp;
5634
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005635 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005636 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005637 emsg(_(e_maxmempat));
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005638 return NULL;
5639 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005640 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005641 return NULL;
5642
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005643 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005644 rp->rs_state = state;
5645 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005646
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005647 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005648 return rp;
5649}
5650
5651/*
5652 * Pop an item from the regstack.
5653 */
5654 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01005655regstack_pop(char_u **scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005656{
5657 regitem_T *rp;
5658
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005659 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005660 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005661
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005662 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005663}
5664
Bram Moolenaar071d4272004-06-13 20:20:40 +00005665/*
5666 * regrepeat - repeatedly match something simple, return how many.
Bram Moolenaar0270f382018-07-17 05:43:58 +02005667 * Advances rex.input (and rex.lnum) to just after the matched chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005668 */
5669 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01005670regrepeat(
5671 char_u *p,
5672 long maxcount) /* maximum number of matches allowed */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005673{
5674 long count = 0;
5675 char_u *scan;
5676 char_u *opnd;
5677 int mask;
5678 int testval = 0;
5679
Bram Moolenaar0270f382018-07-17 05:43:58 +02005680 scan = rex.input; /* Make local copy of rex.input for speed. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005681 opnd = OPERAND(p);
5682 switch (OP(p))
5683 {
5684 case ANY:
5685 case ANY + ADD_NL:
5686 while (count < maxcount)
5687 {
5688 /* Matching anything means we continue until end-of-line (or
5689 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5690 while (*scan != NUL && count < maxcount)
5691 {
5692 ++count;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005693 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005694 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02005695 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005696 || rex.reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005697 break;
5698 ++count; /* count the line-break */
5699 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005700 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005701 if (got_int)
5702 break;
5703 }
5704 break;
5705
5706 case IDENT:
5707 case IDENT + ADD_NL:
5708 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005709 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005710 case SIDENT:
5711 case SIDENT + ADD_NL:
5712 while (count < maxcount)
5713 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005714 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005715 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005716 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005717 }
5718 else if (*scan == NUL)
5719 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005720 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005721 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005722 break;
5723 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005724 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005725 if (got_int)
5726 break;
5727 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005728 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005729 ++scan;
5730 else
5731 break;
5732 ++count;
5733 }
5734 break;
5735
5736 case KWORD:
5737 case KWORD + ADD_NL:
5738 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005739 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005740 case SKWORD:
5741 case SKWORD + ADD_NL:
5742 while (count < maxcount)
5743 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005744 if (vim_iswordp_buf(scan, rex.reg_buf)
Bram Moolenaarf813a182013-01-30 13:59:37 +01005745 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005746 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005747 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005748 }
5749 else if (*scan == NUL)
5750 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005751 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005752 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005753 break;
5754 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005755 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005756 if (got_int)
5757 break;
5758 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005759 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005760 ++scan;
5761 else
5762 break;
5763 ++count;
5764 }
5765 break;
5766
5767 case FNAME:
5768 case FNAME + ADD_NL:
5769 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005770 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005771 case SFNAME:
5772 case SFNAME + ADD_NL:
5773 while (count < maxcount)
5774 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005775 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005776 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005777 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005778 }
5779 else if (*scan == NUL)
5780 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005781 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005782 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005783 break;
5784 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005785 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005786 if (got_int)
5787 break;
5788 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005789 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005790 ++scan;
5791 else
5792 break;
5793 ++count;
5794 }
5795 break;
5796
5797 case PRINT:
5798 case PRINT + ADD_NL:
5799 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005800 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005801 case SPRINT:
5802 case SPRINT + ADD_NL:
5803 while (count < maxcount)
5804 {
5805 if (*scan == NUL)
5806 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005807 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005808 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005809 break;
5810 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005811 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005812 if (got_int)
5813 break;
5814 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005815 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5816 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005817 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005818 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005819 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005820 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005821 ++scan;
5822 else
5823 break;
5824 ++count;
5825 }
5826 break;
5827
5828 case WHITE:
5829 case WHITE + ADD_NL:
5830 testval = mask = RI_WHITE;
5831do_class:
5832 while (count < maxcount)
5833 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005834 int l;
Bram Moolenaara12a1612019-01-24 16:39:02 +01005835
Bram Moolenaar071d4272004-06-13 20:20:40 +00005836 if (*scan == NUL)
5837 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005838 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005839 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005840 break;
5841 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005842 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005843 if (got_int)
5844 break;
5845 }
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005846 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005847 {
5848 if (testval != 0)
5849 break;
5850 scan += l;
5851 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005852 else if ((class_tab[*scan] & mask) == testval)
5853 ++scan;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005854 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005855 ++scan;
5856 else
5857 break;
5858 ++count;
5859 }
5860 break;
5861
5862 case NWHITE:
5863 case NWHITE + ADD_NL:
5864 mask = RI_WHITE;
5865 goto do_class;
5866 case DIGIT:
5867 case DIGIT + ADD_NL:
5868 testval = mask = RI_DIGIT;
5869 goto do_class;
5870 case NDIGIT:
5871 case NDIGIT + ADD_NL:
5872 mask = RI_DIGIT;
5873 goto do_class;
5874 case HEX:
5875 case HEX + ADD_NL:
5876 testval = mask = RI_HEX;
5877 goto do_class;
5878 case NHEX:
5879 case NHEX + ADD_NL:
5880 mask = RI_HEX;
5881 goto do_class;
5882 case OCTAL:
5883 case OCTAL + ADD_NL:
5884 testval = mask = RI_OCTAL;
5885 goto do_class;
5886 case NOCTAL:
5887 case NOCTAL + ADD_NL:
5888 mask = RI_OCTAL;
5889 goto do_class;
5890 case WORD:
5891 case WORD + ADD_NL:
5892 testval = mask = RI_WORD;
5893 goto do_class;
5894 case NWORD:
5895 case NWORD + ADD_NL:
5896 mask = RI_WORD;
5897 goto do_class;
5898 case HEAD:
5899 case HEAD + ADD_NL:
5900 testval = mask = RI_HEAD;
5901 goto do_class;
5902 case NHEAD:
5903 case NHEAD + ADD_NL:
5904 mask = RI_HEAD;
5905 goto do_class;
5906 case ALPHA:
5907 case ALPHA + ADD_NL:
5908 testval = mask = RI_ALPHA;
5909 goto do_class;
5910 case NALPHA:
5911 case NALPHA + ADD_NL:
5912 mask = RI_ALPHA;
5913 goto do_class;
5914 case LOWER:
5915 case LOWER + ADD_NL:
5916 testval = mask = RI_LOWER;
5917 goto do_class;
5918 case NLOWER:
5919 case NLOWER + ADD_NL:
5920 mask = RI_LOWER;
5921 goto do_class;
5922 case UPPER:
5923 case UPPER + ADD_NL:
5924 testval = mask = RI_UPPER;
5925 goto do_class;
5926 case NUPPER:
5927 case NUPPER + ADD_NL:
5928 mask = RI_UPPER;
5929 goto do_class;
5930
5931 case EXACTLY:
5932 {
5933 int cu, cl;
5934
5935 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005936 * would have been used for it. It does handle single-byte
5937 * characters, such as latin1. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02005938 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005939 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005940 cu = MB_TOUPPER(*opnd);
5941 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005942 while (count < maxcount && (*scan == cu || *scan == cl))
5943 {
5944 count++;
5945 scan++;
5946 }
5947 }
5948 else
5949 {
5950 cu = *opnd;
5951 while (count < maxcount && *scan == cu)
5952 {
5953 count++;
5954 scan++;
5955 }
5956 }
5957 break;
5958 }
5959
Bram Moolenaar071d4272004-06-13 20:20:40 +00005960 case MULTIBYTECODE:
5961 {
5962 int i, len, cf = 0;
5963
5964 /* Safety check (just in case 'encoding' was changed since
5965 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005966 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005967 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005968 if (rex.reg_ic && enc_utf8)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005969 cf = utf_fold(utf_ptr2char(opnd));
Bram Moolenaar069dd082015-05-04 09:56:49 +02005970 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005971 {
5972 for (i = 0; i < len; ++i)
5973 if (opnd[i] != scan[i])
5974 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005975 if (i < len && (!rex.reg_ic || !enc_utf8
Bram Moolenaar071d4272004-06-13 20:20:40 +00005976 || utf_fold(utf_ptr2char(scan)) != cf))
5977 break;
5978 scan += len;
5979 ++count;
5980 }
5981 }
5982 }
5983 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005984
5985 case ANYOF:
5986 case ANYOF + ADD_NL:
5987 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005988 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005989
5990 case ANYBUT:
5991 case ANYBUT + ADD_NL:
5992 while (count < maxcount)
5993 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005994 int len;
Bram Moolenaara12a1612019-01-24 16:39:02 +01005995
Bram Moolenaar071d4272004-06-13 20:20:40 +00005996 if (*scan == NUL)
5997 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005998 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005999 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006000 break;
6001 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02006002 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006003 if (got_int)
6004 break;
6005 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02006006 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006007 ++scan;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006008 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006009 {
6010 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6011 break;
6012 scan += len;
6013 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006014 else
6015 {
6016 if ((cstrchr(opnd, *scan) == NULL) == testval)
6017 break;
6018 ++scan;
6019 }
6020 ++count;
6021 }
6022 break;
6023
6024 case NEWL:
6025 while (count < maxcount
Bram Moolenaar0270f382018-07-17 05:43:58 +02006026 && ((*scan == NUL && rex.lnum <= rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02006027 && !rex.reg_line_lbr && REG_MULTI)
6028 || (*scan == '\n' && rex.reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006029 {
6030 count++;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006031 if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006032 ADVANCE_REGINPUT();
6033 else
6034 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02006035 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006036 if (got_int)
6037 break;
6038 }
6039 break;
6040
6041 default: /* Oh dear. Called inappropriately. */
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006042 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006043#ifdef DEBUG
6044 printf("Called regrepeat with op code %d\n", OP(p));
6045#endif
6046 break;
6047 }
6048
Bram Moolenaar0270f382018-07-17 05:43:58 +02006049 rex.input = scan;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006050
6051 return (int)count;
6052}
6053
6054/*
6055 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006056 * Returns NULL when calculating size, when there is no next item and when
6057 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006058 */
6059 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006060regnext(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006061{
6062 int offset;
6063
Bram Moolenaard3005802009-11-25 17:21:32 +00006064 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006065 return NULL;
6066
6067 offset = NEXT(p);
6068 if (offset == 0)
6069 return NULL;
6070
Bram Moolenaar582fd852005-03-28 20:58:01 +00006071 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006072 return p - offset;
6073 else
6074 return p + offset;
6075}
6076
6077/*
6078 * Check the regexp program for its magic number.
6079 * Return TRUE if it's wrong.
6080 */
6081 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006082prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006083{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006084 regprog_T *prog;
6085
Bram Moolenaar6100d022016-10-02 16:51:57 +02006086 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006087 if (prog->engine == &nfa_regengine)
6088 /* For NFA matcher we don't check the magic */
6089 return FALSE;
6090
6091 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006092 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006093 emsg(_(e_re_corr));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006094 return TRUE;
6095 }
6096 return FALSE;
6097}
6098
6099/*
6100 * Cleanup the subexpressions, if this wasn't done yet.
6101 * This construction is used to clear the subexpressions only when they are
6102 * used (to increase speed).
6103 */
6104 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006105cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006106{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006107 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006108 {
6109 if (REG_MULTI)
6110 {
6111 /* Use 0xff to set lnum to -1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006112 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6113 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006114 }
6115 else
6116 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006117 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6118 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006119 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006120 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006121 }
6122}
6123
6124#ifdef FEAT_SYN_HL
6125 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006126cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006127{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006128 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006129 {
6130 if (REG_MULTI)
6131 {
6132 /* Use 0xff to set lnum to -1 */
6133 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6134 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6135 }
6136 else
6137 {
6138 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6139 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6140 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006141 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006142 }
6143}
6144#endif
6145
6146/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006147 * Save the current subexpr to "bp", so that they can be restored
6148 * later by restore_subexpr().
6149 */
6150 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006151save_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006152{
6153 int i;
6154
Bram Moolenaar0270f382018-07-17 05:43:58 +02006155 /* When "rex.need_clear_subexpr" is set we don't need to save the values, only
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006156 * remember that this flag needs to be set again when restoring. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006157 bp->save_need_clear_subexpr = rex.need_clear_subexpr;
6158 if (!rex.need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006159 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006160 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006161 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006162 if (REG_MULTI)
6163 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006164 bp->save_start[i].se_u.pos = rex.reg_startpos[i];
6165 bp->save_end[i].se_u.pos = rex.reg_endpos[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006166 }
6167 else
6168 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006169 bp->save_start[i].se_u.ptr = rex.reg_startp[i];
6170 bp->save_end[i].se_u.ptr = rex.reg_endp[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006171 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006172 }
6173 }
6174}
6175
6176/*
6177 * Restore the subexpr from "bp".
6178 */
6179 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006180restore_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006181{
6182 int i;
6183
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006184 /* Only need to restore saved values when they are not to be cleared. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006185 rex.need_clear_subexpr = bp->save_need_clear_subexpr;
6186 if (!rex.need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006187 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006188 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006189 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006190 if (REG_MULTI)
6191 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006192 rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
6193 rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006194 }
6195 else
6196 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006197 rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
6198 rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006199 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006200 }
6201 }
6202}
6203
6204/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02006205 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006206 */
6207 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006208reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006209{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006210 rex.line = reg_getline(++rex.lnum);
6211 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006212 fast_breakcheck();
6213}
6214
6215/*
6216 * Save the input line and position in a regsave_T.
6217 */
6218 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006219reg_save(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006220{
6221 if (REG_MULTI)
6222 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006223 save->rs_u.pos.col = (colnr_T)(rex.input - rex.line);
6224 save->rs_u.pos.lnum = rex.lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006225 }
6226 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02006227 save->rs_u.ptr = rex.input;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006228 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006229}
6230
6231/*
6232 * Restore the input line and position from a regsave_T.
6233 */
6234 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006235reg_restore(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006236{
6237 if (REG_MULTI)
6238 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006239 if (rex.lnum != save->rs_u.pos.lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006240 {
6241 /* only call reg_getline() when the line number changed to save
6242 * a bit of time */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006243 rex.lnum = save->rs_u.pos.lnum;
6244 rex.line = reg_getline(rex.lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006245 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006246 rex.input = rex.line + save->rs_u.pos.col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006247 }
6248 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02006249 rex.input = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006250 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006251}
6252
6253/*
6254 * Return TRUE if current position is equal to saved position.
6255 */
6256 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006257reg_save_equal(regsave_T *save)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006258{
6259 if (REG_MULTI)
Bram Moolenaar0270f382018-07-17 05:43:58 +02006260 return rex.lnum == save->rs_u.pos.lnum
6261 && rex.input == rex.line + save->rs_u.pos.col;
6262 return rex.input == save->rs_u.ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006263}
6264
6265/*
6266 * Tentatively set the sub-expression start to the current position (after
6267 * calling regmatch() they will have changed). Need to save the existing
6268 * values for when there is no match.
6269 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6270 * depending on REG_MULTI.
6271 */
6272 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006273save_se_multi(save_se_T *savep, lpos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006274{
6275 savep->se_u.pos = *posp;
Bram Moolenaar0270f382018-07-17 05:43:58 +02006276 posp->lnum = rex.lnum;
6277 posp->col = (colnr_T)(rex.input - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006278}
6279
6280 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006281save_se_one(save_se_T *savep, char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006282{
6283 savep->se_u.ptr = *pp;
Bram Moolenaar0270f382018-07-17 05:43:58 +02006284 *pp = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006285}
6286
6287/*
6288 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6289 */
6290 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006291re_num_cmp(long_u val, char_u *scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006292{
6293 long_u n = OPERAND_MIN(scan);
6294
6295 if (OPERAND_CMP(scan) == '>')
6296 return val > n;
6297 if (OPERAND_CMP(scan) == '<')
6298 return val < n;
6299 return val == n;
6300}
6301
Bram Moolenaar580abea2013-06-14 20:31:28 +02006302/*
6303 * Check whether a backreference matches.
6304 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006305 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6306 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006307 */
6308 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006309match_with_backref(
6310 linenr_T start_lnum,
6311 colnr_T start_col,
6312 linenr_T end_lnum,
6313 colnr_T end_col,
6314 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006315{
6316 linenr_T clnum = start_lnum;
6317 colnr_T ccol = start_col;
6318 int len;
6319 char_u *p;
6320
6321 if (bytelen != NULL)
6322 *bytelen = 0;
6323 for (;;)
6324 {
6325 /* Since getting one line may invalidate the other, need to make copy.
6326 * Slow! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006327 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006328 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006329 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02006330 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6331 {
6332 len += 50; /* get some extra */
6333 vim_free(reg_tofree);
6334 reg_tofree = alloc(len);
6335 if (reg_tofree == NULL)
6336 return RA_FAIL; /* out of memory!*/
6337 reg_tofreelen = len;
6338 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006339 STRCPY(reg_tofree, rex.line);
6340 rex.input = reg_tofree + (rex.input - rex.line);
6341 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006342 }
6343
6344 /* Get the line to compare with. */
6345 p = reg_getline(clnum);
6346 if (clnum == end_lnum)
6347 len = end_col - ccol;
6348 else
6349 len = (int)STRLEN(p + ccol);
6350
Bram Moolenaar0270f382018-07-17 05:43:58 +02006351 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006352 return RA_NOMATCH; /* doesn't match */
6353 if (bytelen != NULL)
6354 *bytelen += len;
6355 if (clnum == end_lnum)
6356 break; /* match and at end! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006357 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006358 return RA_NOMATCH; /* text too short */
6359
6360 /* Advance to next line. */
6361 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006362 if (bytelen != NULL)
6363 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006364 ++clnum;
6365 ccol = 0;
6366 if (got_int)
6367 return RA_FAIL;
6368 }
6369
Bram Moolenaar0270f382018-07-17 05:43:58 +02006370 /* found a match! Note that rex.line may now point to a copy of the line,
Bram Moolenaar580abea2013-06-14 20:31:28 +02006371 * that should not matter. */
6372 return RA_MATCH;
6373}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006374
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006375#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006376
6377/*
6378 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6379 */
6380 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006381regdump(char_u *pattern, bt_regprog_T *r)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006382{
6383 char_u *s;
6384 int op = EXACTLY; /* Arbitrary non-END op. */
6385 char_u *next;
6386 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006387 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006388
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006389#ifdef BT_REGEXP_LOG
6390 f = fopen("bt_regexp_log.log", "a");
6391#else
6392 f = stdout;
6393#endif
6394 if (f == NULL)
6395 return;
6396 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006397
6398 s = r->program + 1;
6399 /*
6400 * Loop until we find the END that isn't before a referred next (an END
6401 * can also appear in a NOMATCH operand).
6402 */
6403 while (op != END || s <= end)
6404 {
6405 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006406 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006407 next = regnext(s);
6408 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006409 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006410 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006411 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006412 if (end < next)
6413 end = next;
6414 if (op == BRACE_LIMITS)
6415 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006416 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006417 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006418 s += 8;
6419 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006420 else if (op == BEHIND || op == NOBEHIND)
6421 {
6422 /* one int */
6423 fprintf(f, " count %ld", OPERAND_MIN(s));
6424 s += 4;
6425 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006426 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6427 {
6428 /* one int plus comperator */
6429 fprintf(f, " count %ld", OPERAND_MIN(s));
6430 s += 5;
6431 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006432 s += 3;
6433 if (op == ANYOF || op == ANYOF + ADD_NL
6434 || op == ANYBUT || op == ANYBUT + ADD_NL
6435 || op == EXACTLY)
6436 {
6437 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006438 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006439 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006440 fprintf(f, "%c", *s++);
6441 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006442 s++;
6443 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006444 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006445 }
6446
6447 /* Header fields of interest. */
6448 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006449 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006450 ? (char *)transchar(r->regstart)
6451 : "multibyte", r->regstart);
6452 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006453 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006454 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006455 fprintf(f, "must have \"%s\"", r->regmust);
6456 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006457
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006458#ifdef BT_REGEXP_LOG
6459 fclose(f);
6460#endif
6461}
6462#endif /* BT_REGEXP_DUMP */
6463
6464#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006465/*
6466 * regprop - printable representation of opcode
6467 */
6468 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006469regprop(char_u *op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006470{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006471 char *p;
6472 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006473
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006474 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006475
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006476 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006477 {
6478 case BOL:
6479 p = "BOL";
6480 break;
6481 case EOL:
6482 p = "EOL";
6483 break;
6484 case RE_BOF:
6485 p = "BOF";
6486 break;
6487 case RE_EOF:
6488 p = "EOF";
6489 break;
6490 case CURSOR:
6491 p = "CURSOR";
6492 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006493 case RE_VISUAL:
6494 p = "RE_VISUAL";
6495 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006496 case RE_LNUM:
6497 p = "RE_LNUM";
6498 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006499 case RE_MARK:
6500 p = "RE_MARK";
6501 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006502 case RE_COL:
6503 p = "RE_COL";
6504 break;
6505 case RE_VCOL:
6506 p = "RE_VCOL";
6507 break;
6508 case BOW:
6509 p = "BOW";
6510 break;
6511 case EOW:
6512 p = "EOW";
6513 break;
6514 case ANY:
6515 p = "ANY";
6516 break;
6517 case ANY + ADD_NL:
6518 p = "ANY+NL";
6519 break;
6520 case ANYOF:
6521 p = "ANYOF";
6522 break;
6523 case ANYOF + ADD_NL:
6524 p = "ANYOF+NL";
6525 break;
6526 case ANYBUT:
6527 p = "ANYBUT";
6528 break;
6529 case ANYBUT + ADD_NL:
6530 p = "ANYBUT+NL";
6531 break;
6532 case IDENT:
6533 p = "IDENT";
6534 break;
6535 case IDENT + ADD_NL:
6536 p = "IDENT+NL";
6537 break;
6538 case SIDENT:
6539 p = "SIDENT";
6540 break;
6541 case SIDENT + ADD_NL:
6542 p = "SIDENT+NL";
6543 break;
6544 case KWORD:
6545 p = "KWORD";
6546 break;
6547 case KWORD + ADD_NL:
6548 p = "KWORD+NL";
6549 break;
6550 case SKWORD:
6551 p = "SKWORD";
6552 break;
6553 case SKWORD + ADD_NL:
6554 p = "SKWORD+NL";
6555 break;
6556 case FNAME:
6557 p = "FNAME";
6558 break;
6559 case FNAME + ADD_NL:
6560 p = "FNAME+NL";
6561 break;
6562 case SFNAME:
6563 p = "SFNAME";
6564 break;
6565 case SFNAME + ADD_NL:
6566 p = "SFNAME+NL";
6567 break;
6568 case PRINT:
6569 p = "PRINT";
6570 break;
6571 case PRINT + ADD_NL:
6572 p = "PRINT+NL";
6573 break;
6574 case SPRINT:
6575 p = "SPRINT";
6576 break;
6577 case SPRINT + ADD_NL:
6578 p = "SPRINT+NL";
6579 break;
6580 case WHITE:
6581 p = "WHITE";
6582 break;
6583 case WHITE + ADD_NL:
6584 p = "WHITE+NL";
6585 break;
6586 case NWHITE:
6587 p = "NWHITE";
6588 break;
6589 case NWHITE + ADD_NL:
6590 p = "NWHITE+NL";
6591 break;
6592 case DIGIT:
6593 p = "DIGIT";
6594 break;
6595 case DIGIT + ADD_NL:
6596 p = "DIGIT+NL";
6597 break;
6598 case NDIGIT:
6599 p = "NDIGIT";
6600 break;
6601 case NDIGIT + ADD_NL:
6602 p = "NDIGIT+NL";
6603 break;
6604 case HEX:
6605 p = "HEX";
6606 break;
6607 case HEX + ADD_NL:
6608 p = "HEX+NL";
6609 break;
6610 case NHEX:
6611 p = "NHEX";
6612 break;
6613 case NHEX + ADD_NL:
6614 p = "NHEX+NL";
6615 break;
6616 case OCTAL:
6617 p = "OCTAL";
6618 break;
6619 case OCTAL + ADD_NL:
6620 p = "OCTAL+NL";
6621 break;
6622 case NOCTAL:
6623 p = "NOCTAL";
6624 break;
6625 case NOCTAL + ADD_NL:
6626 p = "NOCTAL+NL";
6627 break;
6628 case WORD:
6629 p = "WORD";
6630 break;
6631 case WORD + ADD_NL:
6632 p = "WORD+NL";
6633 break;
6634 case NWORD:
6635 p = "NWORD";
6636 break;
6637 case NWORD + ADD_NL:
6638 p = "NWORD+NL";
6639 break;
6640 case HEAD:
6641 p = "HEAD";
6642 break;
6643 case HEAD + ADD_NL:
6644 p = "HEAD+NL";
6645 break;
6646 case NHEAD:
6647 p = "NHEAD";
6648 break;
6649 case NHEAD + ADD_NL:
6650 p = "NHEAD+NL";
6651 break;
6652 case ALPHA:
6653 p = "ALPHA";
6654 break;
6655 case ALPHA + ADD_NL:
6656 p = "ALPHA+NL";
6657 break;
6658 case NALPHA:
6659 p = "NALPHA";
6660 break;
6661 case NALPHA + ADD_NL:
6662 p = "NALPHA+NL";
6663 break;
6664 case LOWER:
6665 p = "LOWER";
6666 break;
6667 case LOWER + ADD_NL:
6668 p = "LOWER+NL";
6669 break;
6670 case NLOWER:
6671 p = "NLOWER";
6672 break;
6673 case NLOWER + ADD_NL:
6674 p = "NLOWER+NL";
6675 break;
6676 case UPPER:
6677 p = "UPPER";
6678 break;
6679 case UPPER + ADD_NL:
6680 p = "UPPER+NL";
6681 break;
6682 case NUPPER:
6683 p = "NUPPER";
6684 break;
6685 case NUPPER + ADD_NL:
6686 p = "NUPPER+NL";
6687 break;
6688 case BRANCH:
6689 p = "BRANCH";
6690 break;
6691 case EXACTLY:
6692 p = "EXACTLY";
6693 break;
6694 case NOTHING:
6695 p = "NOTHING";
6696 break;
6697 case BACK:
6698 p = "BACK";
6699 break;
6700 case END:
6701 p = "END";
6702 break;
6703 case MOPEN + 0:
6704 p = "MATCH START";
6705 break;
6706 case MOPEN + 1:
6707 case MOPEN + 2:
6708 case MOPEN + 3:
6709 case MOPEN + 4:
6710 case MOPEN + 5:
6711 case MOPEN + 6:
6712 case MOPEN + 7:
6713 case MOPEN + 8:
6714 case MOPEN + 9:
6715 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6716 p = NULL;
6717 break;
6718 case MCLOSE + 0:
6719 p = "MATCH END";
6720 break;
6721 case MCLOSE + 1:
6722 case MCLOSE + 2:
6723 case MCLOSE + 3:
6724 case MCLOSE + 4:
6725 case MCLOSE + 5:
6726 case MCLOSE + 6:
6727 case MCLOSE + 7:
6728 case MCLOSE + 8:
6729 case MCLOSE + 9:
6730 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6731 p = NULL;
6732 break;
6733 case BACKREF + 1:
6734 case BACKREF + 2:
6735 case BACKREF + 3:
6736 case BACKREF + 4:
6737 case BACKREF + 5:
6738 case BACKREF + 6:
6739 case BACKREF + 7:
6740 case BACKREF + 8:
6741 case BACKREF + 9:
6742 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6743 p = NULL;
6744 break;
6745 case NOPEN:
6746 p = "NOPEN";
6747 break;
6748 case NCLOSE:
6749 p = "NCLOSE";
6750 break;
6751#ifdef FEAT_SYN_HL
6752 case ZOPEN + 1:
6753 case ZOPEN + 2:
6754 case ZOPEN + 3:
6755 case ZOPEN + 4:
6756 case ZOPEN + 5:
6757 case ZOPEN + 6:
6758 case ZOPEN + 7:
6759 case ZOPEN + 8:
6760 case ZOPEN + 9:
6761 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6762 p = NULL;
6763 break;
6764 case ZCLOSE + 1:
6765 case ZCLOSE + 2:
6766 case ZCLOSE + 3:
6767 case ZCLOSE + 4:
6768 case ZCLOSE + 5:
6769 case ZCLOSE + 6:
6770 case ZCLOSE + 7:
6771 case ZCLOSE + 8:
6772 case ZCLOSE + 9:
6773 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6774 p = NULL;
6775 break;
6776 case ZREF + 1:
6777 case ZREF + 2:
6778 case ZREF + 3:
6779 case ZREF + 4:
6780 case ZREF + 5:
6781 case ZREF + 6:
6782 case ZREF + 7:
6783 case ZREF + 8:
6784 case ZREF + 9:
6785 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6786 p = NULL;
6787 break;
6788#endif
6789 case STAR:
6790 p = "STAR";
6791 break;
6792 case PLUS:
6793 p = "PLUS";
6794 break;
6795 case NOMATCH:
6796 p = "NOMATCH";
6797 break;
6798 case MATCH:
6799 p = "MATCH";
6800 break;
6801 case BEHIND:
6802 p = "BEHIND";
6803 break;
6804 case NOBEHIND:
6805 p = "NOBEHIND";
6806 break;
6807 case SUBPAT:
6808 p = "SUBPAT";
6809 break;
6810 case BRACE_LIMITS:
6811 p = "BRACE_LIMITS";
6812 break;
6813 case BRACE_SIMPLE:
6814 p = "BRACE_SIMPLE";
6815 break;
6816 case BRACE_COMPLEX + 0:
6817 case BRACE_COMPLEX + 1:
6818 case BRACE_COMPLEX + 2:
6819 case BRACE_COMPLEX + 3:
6820 case BRACE_COMPLEX + 4:
6821 case BRACE_COMPLEX + 5:
6822 case BRACE_COMPLEX + 6:
6823 case BRACE_COMPLEX + 7:
6824 case BRACE_COMPLEX + 8:
6825 case BRACE_COMPLEX + 9:
6826 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6827 p = NULL;
6828 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006829 case MULTIBYTECODE:
6830 p = "MULTIBYTECODE";
6831 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006832 case NEWL:
6833 p = "NEWL";
6834 break;
6835 default:
6836 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6837 p = NULL;
6838 break;
6839 }
6840 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006841 STRCAT(buf, p);
6842 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006843}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006844#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006845
Bram Moolenaarfb031402014-09-09 17:18:49 +02006846/*
6847 * Used in a place where no * or \+ can follow.
6848 */
6849 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006850re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02006851{
6852 if (re_multi_type(peekchr()) == MULTI_MULT)
Bram Moolenaar1be45b22019-01-14 22:46:15 +01006853 {
6854 semsg(_("E888: (NFA regexp) cannot repeat %s"), what);
6855 rc_did_emsg = TRUE;
6856 return FAIL;
6857 }
Bram Moolenaarfb031402014-09-09 17:18:49 +02006858 return OK;
6859}
6860
Bram Moolenaar071d4272004-06-13 20:20:40 +00006861typedef struct
6862{
6863 int a, b, c;
6864} decomp_T;
6865
6866
6867/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006868static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006869{
6870 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6871 {0x5d0,0,0}, /* 0xfb21 alt alef */
6872 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6873 {0x5d4,0,0}, /* 0xfb23 alt he */
6874 {0x5db,0,0}, /* 0xfb24 alt kaf */
6875 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6876 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6877 {0x5e8,0,0}, /* 0xfb27 alt resh */
6878 {0x5ea,0,0}, /* 0xfb28 alt tav */
6879 {'+', 0, 0}, /* 0xfb29 alt plus */
6880 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6881 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6882 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6883 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6884 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6885 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6886 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6887 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6888 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6889 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6890 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6891 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6892 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
6893 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
6894 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
6895 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
6896 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
6897 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
6898 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
6899 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
6900 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
6901 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
6902 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
6903 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
6904 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
6905 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
6906 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
6907 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
6908 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
6909 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
6910 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
6911 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
6912 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
6913 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
6914 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
6915 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
6916 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
6917 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
6918};
6919
6920 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006921mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006922{
6923 decomp_T d;
6924
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02006925 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006926 {
6927 d = decomp_table[c - 0xfb20];
6928 *c1 = d.a;
6929 *c2 = d.b;
6930 *c3 = d.c;
6931 }
6932 else
6933 {
6934 *c1 = c;
6935 *c2 = *c3 = 0;
6936 }
6937}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006938
6939/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02006940 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006941 * Return 0 if strings match, non-zero otherwise.
6942 * Correct the length "*n" when composing characters are ignored.
6943 */
6944 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006945cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006946{
6947 int result;
6948
Bram Moolenaar6100d022016-10-02 16:51:57 +02006949 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006950 result = STRNCMP(s1, s2, *n);
6951 else
6952 result = MB_STRNICMP(s1, s2, *n);
6953
Bram Moolenaar071d4272004-06-13 20:20:40 +00006954 /* if it failed and it's utf8 and we want to combineignore: */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006955 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006956 {
6957 char_u *str1, *str2;
6958 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006959 int junk;
6960
6961 /* we have to handle the strcmp ourselves, since it is necessary to
6962 * deal with the composing characters by ignoring them: */
6963 str1 = s1;
6964 str2 = s2;
6965 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00006966 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006967 {
6968 c1 = mb_ptr2char_adv(&str1);
6969 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006970
6971 /* decompose the character if necessary, into 'base' characters
6972 * because I don't care about Arabic, I will hard-code the Hebrew
6973 * which I *do* care about! So sue me... */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006974 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006975 {
6976 /* decomposition necessary? */
6977 mb_decompose(c1, &c11, &junk, &junk);
6978 mb_decompose(c2, &c12, &junk, &junk);
6979 c1 = c11;
6980 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006981 if (c11 != c12
6982 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006983 break;
6984 }
6985 }
6986 result = c2 - c1;
6987 if (result == 0)
6988 *n = (int)(str2 - s2);
6989 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006990
6991 return result;
6992}
6993
6994/*
6995 * cstrchr: This function is used a lot for simple searches, keep it fast!
6996 */
6997 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006998cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006999{
7000 char_u *p;
7001 int cc;
7002
Bram Moolenaara12a1612019-01-24 16:39:02 +01007003 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007004 return vim_strchr(s, c);
7005
7006 /* tolower() and toupper() can be slow, comparing twice should be a lot
7007 * faster (esp. when using MS Visual C++!).
7008 * For UTF-8 need to use folded case. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007009 if (enc_utf8 && c > 0x80)
7010 cc = utf_fold(c);
7011 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007012 if (MB_ISUPPER(c))
7013 cc = MB_TOLOWER(c);
7014 else if (MB_ISLOWER(c))
7015 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007016 else
7017 return vim_strchr(s, c);
7018
Bram Moolenaar071d4272004-06-13 20:20:40 +00007019 if (has_mbyte)
7020 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007021 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007022 {
7023 if (enc_utf8 && c > 0x80)
7024 {
7025 if (utf_fold(utf_ptr2char(p)) == cc)
7026 return p;
7027 }
7028 else if (*p == c || *p == cc)
7029 return p;
7030 }
7031 }
7032 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007033 /* Faster version for when there are no multi-byte characters. */
7034 for (p = s; *p != NUL; ++p)
7035 if (*p == c || *p == cc)
7036 return p;
7037
7038 return NULL;
7039}
7040
7041/***************************************************************
7042 * regsub stuff *
7043 ***************************************************************/
7044
Bram Moolenaar071d4272004-06-13 20:20:40 +00007045/*
7046 * We should define ftpr as a pointer to a function returning a pointer to
7047 * a function returning a pointer to a function ...
7048 * This is impossible, so we declare a pointer to a function returning a
7049 * pointer to a function returning void. This should work for all compilers.
7050 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007051typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007052
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007053static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007054
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007055 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007056do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007057{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007058 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007059
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007060 return (fptr_T)NULL;
7061}
7062
7063 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007064do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007065{
7066 *d = MB_TOUPPER(c);
7067
7068 return (fptr_T)do_Upper;
7069}
7070
7071 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007072do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007073{
7074 *d = MB_TOLOWER(c);
7075
7076 return (fptr_T)NULL;
7077}
7078
7079 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007080do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007081{
7082 *d = MB_TOLOWER(c);
7083
7084 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007085}
7086
7087/*
7088 * regtilde(): Replace tildes in the pattern by the old pattern.
7089 *
7090 * Short explanation of the tilde: It stands for the previous replacement
7091 * pattern. If that previous pattern also contains a ~ we should go back a
7092 * step further... But we insert the previous pattern into the current one
7093 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007094 * This still does not handle the case where "magic" changes. So require the
7095 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007096 *
7097 * The tildes are parsed once before the first call to vim_regsub().
7098 */
7099 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007100regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007101{
7102 char_u *newsub = source;
7103 char_u *tmpsub;
7104 char_u *p;
7105 int len;
7106 int prevlen;
7107
7108 for (p = newsub; *p; ++p)
7109 {
7110 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7111 {
7112 if (reg_prev_sub != NULL)
7113 {
7114 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7115 prevlen = (int)STRLEN(reg_prev_sub);
7116 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7117 if (tmpsub != NULL)
7118 {
7119 /* copy prefix */
7120 len = (int)(p - newsub); /* not including ~ */
7121 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007122 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007123 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7124 /* copy postfix */
7125 if (!magic)
7126 ++p; /* back off \ */
7127 STRCPY(tmpsub + len + prevlen, p + 1);
7128
7129 if (newsub != source) /* already allocated newsub */
7130 vim_free(newsub);
7131 newsub = tmpsub;
7132 p = newsub + len + prevlen;
7133 }
7134 }
7135 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007136 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007137 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007138 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007139 --p;
7140 }
7141 else
7142 {
7143 if (*p == '\\' && p[1]) /* skip escaped characters */
7144 ++p;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007145 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007146 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007147 }
7148 }
7149
7150 vim_free(reg_prev_sub);
7151 if (newsub != source) /* newsub was allocated, just keep it */
7152 reg_prev_sub = newsub;
7153 else /* no ~ found, need to save newsub */
7154 reg_prev_sub = vim_strsave(newsub);
7155 return newsub;
7156}
7157
7158#ifdef FEAT_EVAL
7159static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7160
Bram Moolenaar6100d022016-10-02 16:51:57 +02007161/* These pointers are used for reg_submatch(). Needed for when the
7162 * substitution string is an expression that contains a call to substitute()
7163 * and submatch(). */
7164typedef struct {
7165 regmatch_T *sm_match;
7166 regmmatch_T *sm_mmatch;
7167 linenr_T sm_firstlnum;
7168 linenr_T sm_maxline;
7169 int sm_line_lbr;
7170} regsubmatch_T;
7171
7172static regsubmatch_T rsm; /* can only be used when can_f_submatch is TRUE */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007173#endif
7174
7175#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007176
7177/*
7178 * Put the submatches in "argv[0]" which is a list passed into call_func() by
7179 * vim_regsub_both().
7180 */
7181 static int
7182fill_submatch_list(int argc UNUSED, typval_T *argv, int argcount)
7183{
7184 listitem_T *li;
7185 int i;
7186 char_u *s;
7187
7188 if (argcount == 0)
7189 /* called function doesn't take an argument */
7190 return 0;
7191
7192 /* Relies on sl_list to be the first item in staticList10_T. */
7193 init_static_list((staticList10_T *)(argv->vval.v_list));
7194
7195 /* There are always 10 list items in staticList10_T. */
7196 li = argv->vval.v_list->lv_first;
7197 for (i = 0; i < 10; ++i)
7198 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007199 s = rsm.sm_match->startp[i];
7200 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007201 s = NULL;
7202 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007203 s = vim_strnsave(s, (int)(rsm.sm_match->endp[i] - s));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007204 li->li_tv.v_type = VAR_STRING;
7205 li->li_tv.vval.v_string = s;
7206 li = li->li_next;
7207 }
7208 return 1;
7209}
7210
7211 static void
7212clear_submatch_list(staticList10_T *sl)
7213{
7214 int i;
7215
7216 for (i = 0; i < 10; ++i)
7217 vim_free(sl->sl_items[i].li_tv.vval.v_string);
7218}
7219
Bram Moolenaar071d4272004-06-13 20:20:40 +00007220/*
7221 * vim_regsub() - perform substitutions after a vim_regexec() or
7222 * vim_regexec_multi() match.
7223 *
7224 * If "copy" is TRUE really copy into "dest".
7225 * If "copy" is FALSE nothing is copied, this is just to find out the length
7226 * of the result.
7227 *
7228 * If "backslash" is TRUE, a backslash will be removed later, need to double
7229 * them to keep them, and insert a backslash before a CR to avoid it being
7230 * replaced with a line break later.
7231 *
7232 * Note: The matched text must not change between the call of
7233 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7234 * references invalid!
7235 *
7236 * Returns the size of the replacement, including terminating NUL.
7237 */
7238 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007239vim_regsub(
7240 regmatch_T *rmp,
7241 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007242 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007243 char_u *dest,
7244 int copy,
7245 int magic,
7246 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007247{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007248 int result;
7249 regexec_T rex_save;
7250 int rex_in_use_save = rex_in_use;
7251
7252 if (rex_in_use)
7253 /* Being called recursively, save the state. */
7254 rex_save = rex;
7255 rex_in_use = TRUE;
7256
7257 rex.reg_match = rmp;
7258 rex.reg_mmatch = NULL;
7259 rex.reg_maxline = 0;
7260 rex.reg_buf = curbuf;
7261 rex.reg_line_lbr = TRUE;
7262 result = vim_regsub_both(source, expr, dest, copy, magic, backslash);
7263
7264 rex_in_use = rex_in_use_save;
7265 if (rex_in_use)
7266 rex = rex_save;
7267
7268 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007269}
7270#endif
7271
7272 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007273vim_regsub_multi(
7274 regmmatch_T *rmp,
7275 linenr_T lnum,
7276 char_u *source,
7277 char_u *dest,
7278 int copy,
7279 int magic,
7280 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007281{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007282 int result;
7283 regexec_T rex_save;
7284 int rex_in_use_save = rex_in_use;
7285
7286 if (rex_in_use)
7287 /* Being called recursively, save the state. */
7288 rex_save = rex;
7289 rex_in_use = TRUE;
7290
7291 rex.reg_match = NULL;
7292 rex.reg_mmatch = rmp;
7293 rex.reg_buf = curbuf; /* always works on the current buffer! */
7294 rex.reg_firstlnum = lnum;
7295 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7296 rex.reg_line_lbr = FALSE;
7297 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash);
7298
7299 rex_in_use = rex_in_use_save;
7300 if (rex_in_use)
7301 rex = rex_save;
7302
7303 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007304}
7305
7306 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007307vim_regsub_both(
7308 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007309 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007310 char_u *dest,
7311 int copy,
7312 int magic,
7313 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007314{
7315 char_u *src;
7316 char_u *dst;
7317 char_u *s;
7318 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007319 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007320 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007321 fptr_T func_all = (fptr_T)NULL;
7322 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007323 linenr_T clnum = 0; /* init for GCC */
7324 int len = 0; /* init for GCC */
7325#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007326 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007327#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007328
7329 /* Be paranoid... */
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007330 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007331 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007332 emsg(_(e_null));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007333 return 0;
7334 }
7335 if (prog_magic_wrong())
7336 return 0;
7337 src = source;
7338 dst = dest;
7339
7340 /*
7341 * When the substitute part starts with "\=" evaluate it as an expression.
7342 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007343 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007344 {
7345#ifdef FEAT_EVAL
7346 /* To make sure that the length doesn't change between checking the
7347 * length and copying the string, and to speed up things, the
7348 * resulting string is saved from the call with "copy" == FALSE to the
7349 * call with "copy" == TRUE. */
7350 if (copy)
7351 {
7352 if (eval_result != NULL)
7353 {
7354 STRCPY(dest, eval_result);
7355 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01007356 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007357 }
7358 }
7359 else
7360 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007361 int prev_can_f_submatch = can_f_submatch;
7362 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007363
7364 vim_free(eval_result);
7365
7366 /* The expression may contain substitute(), which calls us
7367 * recursively. Make sure submatch() gets the text from the first
Bram Moolenaar6100d022016-10-02 16:51:57 +02007368 * level. */
7369 if (can_f_submatch)
7370 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007371 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007372 rsm.sm_match = rex.reg_match;
7373 rsm.sm_mmatch = rex.reg_mmatch;
7374 rsm.sm_firstlnum = rex.reg_firstlnum;
7375 rsm.sm_maxline = rex.reg_maxline;
7376 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007377
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007378 if (expr != NULL)
7379 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007380 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007381 int dummy;
7382 char_u buf[NUMBUFLEN];
7383 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007384 staticList10_T matchList;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007385
7386 rettv.v_type = VAR_STRING;
7387 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007388 argv[0].v_type = VAR_LIST;
7389 argv[0].vval.v_list = &matchList.sl_list;
7390 matchList.sl_list.lv_len = 0;
7391 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007392 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007393 s = expr->vval.v_string;
7394 call_func(s, (int)STRLEN(s), &rettv,
7395 1, argv, fill_submatch_list,
7396 0L, 0L, &dummy, TRUE, NULL, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007397 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007398 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007399 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007400 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007401
Bram Moolenaar6100d022016-10-02 16:51:57 +02007402 s = partial_name(partial);
7403 call_func(s, (int)STRLEN(s), &rettv,
7404 1, argv, fill_submatch_list,
7405 0L, 0L, &dummy, TRUE, partial, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007406 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007407 if (matchList.sl_list.lv_len > 0)
7408 /* fill_submatch_list() was called */
7409 clear_submatch_list(&matchList);
7410
Bram Moolenaard155d7a2018-12-21 16:04:21 +01007411 eval_result = tv_get_string_buf_chk(&rettv, buf);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007412 if (eval_result != NULL)
7413 eval_result = vim_strsave(eval_result);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007414 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007415 }
7416 else
7417 eval_result = eval_to_string(source + 2, NULL, TRUE);
7418
Bram Moolenaar071d4272004-06-13 20:20:40 +00007419 if (eval_result != NULL)
7420 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007421 int had_backslash = FALSE;
7422
Bram Moolenaar91acfff2017-03-12 19:22:36 +01007423 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007424 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007425 /* Change NL to CR, so that it becomes a line break,
7426 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007427 * Skip over a backslashed character. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007428 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007429 *s = CAR;
7430 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007431 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007432 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007433 /* Change NL to CR here too, so that this works:
7434 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7435 * abc\
7436 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007437 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007438 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007439 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007440 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007441 had_backslash = TRUE;
7442 }
7443 }
7444 if (had_backslash && backslash)
7445 {
7446 /* Backslashes will be consumed, need to double them. */
7447 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7448 if (s != NULL)
7449 {
7450 vim_free(eval_result);
7451 eval_result = s;
7452 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007453 }
7454
7455 dst += STRLEN(eval_result);
7456 }
7457
Bram Moolenaar6100d022016-10-02 16:51:57 +02007458 can_f_submatch = prev_can_f_submatch;
7459 if (can_f_submatch)
7460 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007461 }
7462#endif
7463 }
7464 else
7465 while ((c = *src++) != NUL)
7466 {
7467 if (c == '&' && magic)
7468 no = 0;
7469 else if (c == '\\' && *src != NUL)
7470 {
7471 if (*src == '&' && !magic)
7472 {
7473 ++src;
7474 no = 0;
7475 }
7476 else if ('0' <= *src && *src <= '9')
7477 {
7478 no = *src++ - '0';
7479 }
7480 else if (vim_strchr((char_u *)"uUlLeE", *src))
7481 {
7482 switch (*src++)
7483 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007484 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007485 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007486 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007487 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007488 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007489 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007490 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007491 continue;
7492 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007493 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007494 continue;
7495 }
7496 }
7497 }
7498 if (no < 0) /* Ordinary character. */
7499 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007500 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7501 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007502 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007503 if (copy)
7504 {
7505 *dst++ = c;
7506 *dst++ = *src++;
7507 *dst++ = *src++;
7508 }
7509 else
7510 {
7511 dst += 3;
7512 src += 2;
7513 }
7514 continue;
7515 }
7516
Bram Moolenaar071d4272004-06-13 20:20:40 +00007517 if (c == '\\' && *src != NUL)
7518 {
7519 /* Check for abbreviations -- webb */
7520 switch (*src)
7521 {
7522 case 'r': c = CAR; ++src; break;
7523 case 'n': c = NL; ++src; break;
7524 case 't': c = TAB; ++src; break;
7525 /* Oh no! \e already has meaning in subst pat :-( */
7526 /* case 'e': c = ESC; ++src; break; */
7527 case 'b': c = Ctrl_H; ++src; break;
7528
7529 /* If "backslash" is TRUE the backslash will be removed
7530 * later. Used to insert a literal CR. */
7531 default: if (backslash)
7532 {
7533 if (copy)
7534 *dst = '\\';
7535 ++dst;
7536 }
7537 c = *src++;
7538 }
7539 }
Bram Moolenaardb552d602006-03-23 22:59:57 +00007540 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007541 c = mb_ptr2char(src - 1);
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007542
Bram Moolenaardb552d602006-03-23 22:59:57 +00007543 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007544 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007545 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007546 func_one = (fptr_T)(func_one(&cc, c));
7547 else if (func_all != (fptr_T)NULL)
7548 /* Turbo C complains without the typecast */
7549 func_all = (fptr_T)(func_all(&cc, c));
7550 else /* just copy */
7551 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007552
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007553 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007554 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007555 int totlen = mb_ptr2len(src - 1);
7556
Bram Moolenaar071d4272004-06-13 20:20:40 +00007557 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007558 mb_char2bytes(cc, dst);
7559 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007560 if (enc_utf8)
7561 {
7562 int clen = utf_ptr2len(src - 1);
7563
7564 /* If the character length is shorter than "totlen", there
7565 * are composing characters; copy them as-is. */
7566 if (clen < totlen)
7567 {
7568 if (copy)
7569 mch_memmove(dst + 1, src - 1 + clen,
7570 (size_t)(totlen - clen));
7571 dst += totlen - clen;
7572 }
7573 }
7574 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007575 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01007576 else if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007577 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007578 dst++;
7579 }
7580 else
7581 {
7582 if (REG_MULTI)
7583 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007584 clnum = rex.reg_mmatch->startpos[no].lnum;
7585 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007586 s = NULL;
7587 else
7588 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007589 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
7590 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7591 len = rex.reg_mmatch->endpos[no].col
7592 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007593 else
7594 len = (int)STRLEN(s);
7595 }
7596 }
7597 else
7598 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007599 s = rex.reg_match->startp[no];
7600 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007601 s = NULL;
7602 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007603 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007604 }
7605 if (s != NULL)
7606 {
7607 for (;;)
7608 {
7609 if (len == 0)
7610 {
7611 if (REG_MULTI)
7612 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007613 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007614 break;
7615 if (copy)
7616 *dst = CAR;
7617 ++dst;
7618 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02007619 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7620 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007621 else
7622 len = (int)STRLEN(s);
7623 }
7624 else
7625 break;
7626 }
7627 else if (*s == NUL) /* we hit NUL. */
7628 {
7629 if (copy)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007630 emsg(_(e_re_damg));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007631 goto exit;
7632 }
7633 else
7634 {
7635 if (backslash && (*s == CAR || *s == '\\'))
7636 {
7637 /*
7638 * Insert a backslash in front of a CR, otherwise
7639 * it will be replaced by a line break.
7640 * Number of backslashes will be halved later,
7641 * double them here.
7642 */
7643 if (copy)
7644 {
7645 dst[0] = '\\';
7646 dst[1] = *s;
7647 }
7648 dst += 2;
7649 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007650 else
7651 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007652 if (has_mbyte)
7653 c = mb_ptr2char(s);
7654 else
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007655 c = *s;
7656
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007657 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007658 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007659 func_one = (fptr_T)(func_one(&cc, c));
7660 else if (func_all != (fptr_T)NULL)
7661 /* Turbo C complains without the typecast */
7662 func_all = (fptr_T)(func_all(&cc, c));
7663 else /* just copy */
7664 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007665
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007666 if (has_mbyte)
7667 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007668 int l;
7669
7670 /* Copy composing characters separately, one
7671 * at a time. */
7672 if (enc_utf8)
7673 l = utf_ptr2len(s) - 1;
7674 else
7675 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007676
7677 s += l;
7678 len -= l;
7679 if (copy)
7680 mb_char2bytes(cc, dst);
7681 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007682 }
Bram Moolenaara12a1612019-01-24 16:39:02 +01007683 else if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007684 *dst = cc;
7685 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007686 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007687
Bram Moolenaar071d4272004-06-13 20:20:40 +00007688 ++s;
7689 --len;
7690 }
7691 }
7692 }
7693 no = -1;
7694 }
7695 }
7696 if (copy)
7697 *dst = NUL;
7698
7699exit:
7700 return (int)((dst - dest) + 1);
7701}
7702
7703#ifdef FEAT_EVAL
7704/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007705 * Call reg_getline() with the line numbers from the submatch. If a
7706 * substitute() was used the reg_maxline and other values have been
7707 * overwritten.
7708 */
7709 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007710reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007711{
7712 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007713 linenr_T save_first = rex.reg_firstlnum;
7714 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007715
Bram Moolenaar6100d022016-10-02 16:51:57 +02007716 rex.reg_firstlnum = rsm.sm_firstlnum;
7717 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007718
7719 s = reg_getline(lnum);
7720
Bram Moolenaar6100d022016-10-02 16:51:57 +02007721 rex.reg_firstlnum = save_first;
7722 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007723 return s;
7724}
7725
7726/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007727 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007728 * allocated memory.
7729 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7730 */
7731 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007732reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007733{
7734 char_u *retval = NULL;
7735 char_u *s;
7736 int len;
7737 int round;
7738 linenr_T lnum;
7739
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007740 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007741 return NULL;
7742
Bram Moolenaar6100d022016-10-02 16:51:57 +02007743 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007744 {
7745 /*
7746 * First round: compute the length and allocate memory.
7747 * Second round: copy the text.
7748 */
7749 for (round = 1; round <= 2; ++round)
7750 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007751 lnum = rsm.sm_mmatch->startpos[no].lnum;
7752 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007753 return NULL;
7754
Bram Moolenaar6100d022016-10-02 16:51:57 +02007755 s = reg_getline_submatch(lnum) + rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007756 if (s == NULL) /* anti-crash check, cannot happen? */
7757 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007758 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007759 {
7760 /* Within one line: take form start to end col. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007761 len = rsm.sm_mmatch->endpos[no].col
7762 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007763 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007764 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007765 ++len;
7766 }
7767 else
7768 {
7769 /* Multiple lines: take start line from start col, middle
7770 * lines completely and end line up to end col. */
7771 len = (int)STRLEN(s);
7772 if (round == 2)
7773 {
7774 STRCPY(retval, s);
7775 retval[len] = '\n';
7776 }
7777 ++len;
7778 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007779 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007780 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007781 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007782 if (round == 2)
7783 STRCPY(retval + len, s);
7784 len += (int)STRLEN(s);
7785 if (round == 2)
7786 retval[len] = '\n';
7787 ++len;
7788 }
7789 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007790 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02007791 rsm.sm_mmatch->endpos[no].col);
7792 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007793 if (round == 2)
7794 retval[len] = NUL;
7795 ++len;
7796 }
7797
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007798 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007799 {
7800 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007801 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007802 return NULL;
7803 }
7804 }
7805 }
7806 else
7807 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007808 s = rsm.sm_match->startp[no];
7809 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007810 retval = NULL;
7811 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007812 retval = vim_strnsave(s, (int)(rsm.sm_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007813 }
7814
7815 return retval;
7816}
Bram Moolenaar41571762014-04-02 19:00:58 +02007817
7818/*
7819 * Used for the submatch() function with the optional non-zero argument: get
7820 * the list of strings from the n'th submatch in allocated memory with NULs
7821 * represented in NLs.
7822 * Returns a list of allocated strings. Returns NULL when not in a ":s"
7823 * command, for a non-existing submatch and for any error.
7824 */
7825 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007826reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02007827{
7828 char_u *s;
7829 linenr_T slnum;
7830 linenr_T elnum;
7831 colnr_T scol;
7832 colnr_T ecol;
7833 int i;
7834 list_T *list;
7835 int error = FALSE;
7836
7837 if (!can_f_submatch || no < 0)
7838 return NULL;
7839
Bram Moolenaar6100d022016-10-02 16:51:57 +02007840 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02007841 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007842 slnum = rsm.sm_mmatch->startpos[no].lnum;
7843 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02007844 if (slnum < 0 || elnum < 0)
7845 return NULL;
7846
Bram Moolenaar6100d022016-10-02 16:51:57 +02007847 scol = rsm.sm_mmatch->startpos[no].col;
7848 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02007849
7850 list = list_alloc();
7851 if (list == NULL)
7852 return NULL;
7853
7854 s = reg_getline_submatch(slnum) + scol;
7855 if (slnum == elnum)
7856 {
7857 if (list_append_string(list, s, ecol - scol) == FAIL)
7858 error = TRUE;
7859 }
7860 else
7861 {
7862 if (list_append_string(list, s, -1) == FAIL)
7863 error = TRUE;
7864 for (i = 1; i < elnum - slnum; i++)
7865 {
7866 s = reg_getline_submatch(slnum + i);
7867 if (list_append_string(list, s, -1) == FAIL)
7868 error = TRUE;
7869 }
7870 s = reg_getline_submatch(elnum);
7871 if (list_append_string(list, s, ecol) == FAIL)
7872 error = TRUE;
7873 }
7874 }
7875 else
7876 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007877 s = rsm.sm_match->startp[no];
7878 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02007879 return NULL;
7880 list = list_alloc();
7881 if (list == NULL)
7882 return NULL;
7883 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02007884 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02007885 error = TRUE;
7886 }
7887
7888 if (error)
7889 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02007890 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02007891 return NULL;
7892 }
7893 return list;
7894}
Bram Moolenaar071d4272004-06-13 20:20:40 +00007895#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007896
7897static regengine_T bt_regengine =
7898{
7899 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02007900 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007901 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01007902 bt_regexec_multi,
7903 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007904};
7905
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007906#include "regexp_nfa.c"
7907
7908static regengine_T nfa_regengine =
7909{
7910 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02007911 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007912 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01007913 nfa_regexec_multi,
7914 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007915};
7916
7917/* Which regexp engine to use? Needed for vim_regcomp().
7918 * Must match with 'regexpengine'. */
7919static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01007920
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007921#ifdef DEBUG
7922static char_u regname[][30] = {
7923 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02007924 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007925 "NFA Regexp Engine"
7926 };
7927#endif
7928
7929/*
7930 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02007931 * Returns the program in allocated memory.
7932 * Use vim_regfree() to free the memory.
7933 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007934 */
7935 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007936vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007937{
7938 regprog_T *prog = NULL;
7939 char_u *expr = expr_arg;
7940
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007941 regexp_engine = p_re;
7942
7943 /* Check for prefix "\%#=", that sets the regexp engine */
7944 if (STRNCMP(expr, "\\%#=", 4) == 0)
7945 {
7946 int newengine = expr[4] - '0';
7947
7948 if (newengine == AUTOMATIC_ENGINE
7949 || newengine == BACKTRACKING_ENGINE
7950 || newengine == NFA_ENGINE)
7951 {
7952 regexp_engine = expr[4] - '0';
7953 expr += 5;
7954#ifdef DEBUG
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007955 smsg("New regexp mode selected (%d): %s",
Bram Moolenaar6e132072014-05-13 16:46:32 +02007956 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007957#endif
7958 }
7959 else
7960 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007961 emsg(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007962 regexp_engine = AUTOMATIC_ENGINE;
7963 }
7964 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02007965#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007966 bt_regengine.expr = expr;
7967 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02007968#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007969
7970 /*
7971 * First try the NFA engine, unless backtracking was requested.
7972 */
7973 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01007974 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01007975 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007976 else
7977 prog = bt_regengine.regcomp(expr, re_flags);
7978
Bram Moolenaarfda37292014-11-05 14:27:36 +01007979 /* Check for error compiling regexp with initial engine. */
7980 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007981 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007982#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007983 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
7984 {
7985 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007986 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007987 if (f)
7988 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02007989 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007990 fclose(f);
7991 }
7992 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01007993 semsg("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01007994 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007995 }
7996#endif
7997 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01007998 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01007999 * The NFA engine also fails for patterns that it can't handle well
8000 * but are still valid patterns, thus a retry should work.
8001 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008002 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008003 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008004 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008005 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008006 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008007 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008008
Bram Moolenaarfda37292014-11-05 14:27:36 +01008009 if (prog != NULL)
8010 {
8011 /* Store the info needed to call regcomp() again when the engine turns
8012 * out to be very slow when executing it. */
8013 prog->re_engine = regexp_engine;
8014 prog->re_flags = re_flags;
8015 }
8016
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008017 return prog;
8018}
8019
8020/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008021 * Free a compiled regexp program, returned by vim_regcomp().
8022 */
8023 void
Bram Moolenaar05540972016-01-30 20:31:25 +01008024vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02008025{
8026 if (prog != NULL)
8027 prog->engine->regfree(prog);
8028}
8029
Bram Moolenaarfda37292014-11-05 14:27:36 +01008030#ifdef FEAT_EVAL
Bram Moolenaarfda37292014-11-05 14:27:36 +01008031 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01008032report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008033{
8034 if (p_verbose > 0)
8035 {
8036 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01008037 msg_puts(_("Switching to backtracking RE engine for pattern: "));
8038 msg_puts((char *)pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008039 verbose_leave();
8040 }
8041}
8042#endif
8043
Bram Moolenaar113e1072019-01-20 15:30:40 +01008044#if (defined(FEAT_X11) && (defined(FEAT_TITLE) || defined(FEAT_XCLIPBOARD))) \
8045 || defined(PROTO)
Bram Moolenaar473de612013-06-08 18:19:48 +02008046/*
Bram Moolenaara8bfa172018-12-29 22:28:46 +01008047 * Return whether "prog" is currently being executed.
8048 */
8049 int
8050regprog_in_use(regprog_T *prog)
8051{
8052 return prog->re_in_use;
8053}
Bram Moolenaar113e1072019-01-20 15:30:40 +01008054#endif
Bram Moolenaara8bfa172018-12-29 22:28:46 +01008055
8056/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008057 * Match a regexp against a string.
8058 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008059 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008060 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01008061 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008062 *
8063 * Return TRUE if there is a match, FALSE if not.
8064 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01008065 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008066vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01008067 regmatch_T *rmp,
8068 char_u *line, /* string to match against */
8069 colnr_T col, /* column to start looking for match */
8070 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008071{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008072 int result;
8073 regexec_T rex_save;
8074 int rex_in_use_save = rex_in_use;
8075
Bram Moolenaar0270f382018-07-17 05:43:58 +02008076 // Cannot use the same prog recursively, it contains state.
8077 if (rmp->regprog->re_in_use)
8078 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008079 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02008080 return FALSE;
8081 }
8082 rmp->regprog->re_in_use = TRUE;
8083
Bram Moolenaar6100d022016-10-02 16:51:57 +02008084 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02008085 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02008086 rex_save = rex;
8087 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008088
Bram Moolenaar6100d022016-10-02 16:51:57 +02008089 rex.reg_startp = NULL;
8090 rex.reg_endp = NULL;
8091 rex.reg_startpos = NULL;
8092 rex.reg_endpos = NULL;
8093
8094 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02008095 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008096
8097 /* NFA engine aborted because it's very slow. */
8098 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8099 && result == NFA_TOO_EXPENSIVE)
8100 {
8101 int save_p_re = p_re;
8102 int re_flags = rmp->regprog->re_flags;
8103 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8104
8105 p_re = BACKTRACKING_ENGINE;
8106 vim_regfree(rmp->regprog);
8107 if (pat != NULL)
8108 {
8109#ifdef FEAT_EVAL
8110 report_re_switch(pat);
8111#endif
8112 rmp->regprog = vim_regcomp(pat, re_flags);
8113 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02008114 {
8115 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008116 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaar41499802018-07-18 06:02:09 +02008117 rmp->regprog->re_in_use = FALSE;
8118 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01008119 vim_free(pat);
8120 }
8121
8122 p_re = save_p_re;
8123 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02008124
8125 rex_in_use = rex_in_use_save;
8126 if (rex_in_use)
8127 rex = rex_save;
8128
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008129 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008130}
8131
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008132/*
8133 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008134 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008135 */
8136 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008137vim_regexec_prog(
8138 regprog_T **prog,
8139 int ignore_case,
8140 char_u *line,
8141 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008142{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008143 int r;
8144 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008145
8146 regmatch.regprog = *prog;
8147 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008148 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008149 *prog = regmatch.regprog;
8150 return r;
8151}
8152
8153/*
8154 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008155 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008156 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008157 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008158vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008159{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008160 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008161}
8162
8163#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8164 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8165/*
8166 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008167 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008168 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008169 */
8170 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008171vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008172{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008173 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008174}
8175#endif
8176
8177/*
8178 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008179 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
8180 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008181 * Uses curbuf for line count and 'iskeyword'.
8182 *
8183 * Return zero if there is no match. Return number of lines contained in the
8184 * match otherwise.
8185 */
8186 long
Bram Moolenaar05540972016-01-30 20:31:25 +01008187vim_regexec_multi(
8188 regmmatch_T *rmp,
Bram Moolenaard23a8232018-02-10 18:45:26 +01008189 win_T *win, /* window in which to search or NULL */
8190 buf_T *buf, /* buffer in which to search */
8191 linenr_T lnum, /* nr of line to start looking for match */
8192 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008193 proftime_T *tm, /* timeout limit or NULL */
8194 int *timed_out) /* flag is set when timeout limit reached */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008195{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008196 int result;
8197 regexec_T rex_save;
8198 int rex_in_use_save = rex_in_use;
8199
Bram Moolenaar0270f382018-07-17 05:43:58 +02008200 // Cannot use the same prog recursively, it contains state.
8201 if (rmp->regprog->re_in_use)
8202 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01008203 emsg(_(e_recursive));
Bram Moolenaar0270f382018-07-17 05:43:58 +02008204 return FALSE;
8205 }
8206 rmp->regprog->re_in_use = TRUE;
8207
Bram Moolenaar6100d022016-10-02 16:51:57 +02008208 if (rex_in_use)
8209 /* Being called recursively, save the state. */
8210 rex_save = rex;
8211 rex_in_use = TRUE;
8212
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008213 result = rmp->regprog->engine->regexec_multi(
8214 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02008215 rmp->regprog->re_in_use = FALSE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008216
8217 /* NFA engine aborted because it's very slow. */
8218 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8219 && result == NFA_TOO_EXPENSIVE)
8220 {
8221 int save_p_re = p_re;
8222 int re_flags = rmp->regprog->re_flags;
8223 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8224
8225 p_re = BACKTRACKING_ENGINE;
8226 vim_regfree(rmp->regprog);
8227 if (pat != NULL)
8228 {
8229#ifdef FEAT_EVAL
8230 report_re_switch(pat);
8231#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008232#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008233 // checking for \z misuse was already done when compiling for NFA,
8234 // allow all here
8235 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008236#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01008237 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008238#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008239 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008240#endif
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008241
Bram Moolenaarfda37292014-11-05 14:27:36 +01008242 if (rmp->regprog != NULL)
Bram Moolenaar41499802018-07-18 06:02:09 +02008243 {
8244 rmp->regprog->re_in_use = TRUE;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008245 result = rmp->regprog->engine->regexec_multi(
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008246 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaar41499802018-07-18 06:02:09 +02008247 rmp->regprog->re_in_use = FALSE;
8248 }
Bram Moolenaarfda37292014-11-05 14:27:36 +01008249 vim_free(pat);
8250 }
8251 p_re = save_p_re;
8252 }
8253
Bram Moolenaar6100d022016-10-02 16:51:57 +02008254 rex_in_use = rex_in_use_save;
8255 if (rex_in_use)
8256 rex = rex_save;
8257
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008258 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008259}