blob: 88cf8817ab4e61e0a454573d7babefc8be9cc14a [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100259no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260{
261 if (is_Magic(x))
262 return un_Magic(x);
263 return x;
264}
265
266 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100267toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268{
269 if (is_Magic(x))
270 return un_Magic(x);
271 return Magic(x);
272}
273
274/*
275 * The first byte of the regexp internal "program" is actually this magic
276 * number; the start node begins in the second byte. It's used to catch the
277 * most severe mutilation of the program by the caller.
278 */
279
280#define REGMAGIC 0234
281
282/*
283 * Opcode notes:
284 *
285 * BRANCH The set of branches constituting a single choice are hooked
286 * together with their "next" pointers, since precedence prevents
287 * anything being concatenated to any individual branch. The
288 * "next" pointer of the last BRANCH in a choice points to the
289 * thing following the whole choice. This is also where the
290 * final "next" pointer of each individual branch points; each
291 * branch starts with the operand node of a BRANCH node.
292 *
293 * BACK Normal "next" pointers all implicitly point forward; BACK
294 * exists to make loop structures possible.
295 *
296 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
297 * BRANCH structures using BACK. Simple cases (one character
298 * per match) are implemented with STAR and PLUS for speed
299 * and to minimize recursive plunges.
300 *
301 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
302 * node, and defines the min and max limits to be used for that
303 * node.
304 *
305 * MOPEN,MCLOSE ...are numbered at compile time.
306 * ZOPEN,ZCLOSE ...ditto
307 */
308
309/*
310 * A node is one char of opcode followed by two chars of "next" pointer.
311 * "Next" pointers are stored as two 8-bit bytes, high order first. The
312 * value is a positive offset from the opcode of the node containing it.
313 * An operand, if any, simply follows the node. (Note that much of the
314 * code generation knows about this implicit relationship.)
315 *
316 * Using two bytes for the "next" pointer is vast overkill for most things,
317 * but allows patterns to get big without disasters.
318 */
319#define OP(p) ((int)*(p))
320#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
321#define OPERAND(p) ((p) + 3)
322/* Obtain an operand that was stored as four bytes, MSB first. */
323#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
324 + ((long)(p)[5] << 8) + (long)(p)[6])
325/* Obtain a second operand stored as four bytes. */
326#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
327/* Obtain a second single-byte operand stored after a four bytes operand. */
328#define OPERAND_CMP(p) (p)[7]
329
330/*
331 * Utility definitions.
332 */
333#define UCHARAT(p) ((int)*(char_u *)(p))
334
335/* Used for an error (down from) vim_regcomp(): give the error message, set
336 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000337#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar95f09602016-11-10 20:01:45 +0100338#define IEMSG_RET_NULL(m) return (IEMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000339#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200340#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
341#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
342#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343
Bram Moolenaar95f09602016-11-10 20:01:45 +0100344
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345#define MAX_LIMIT (32767L << 16L)
346
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100347static int cstrncmp(char_u *s1, char_u *s2, int *n);
348static char_u *cstrchr(char_u *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000349
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200350#ifdef BT_REGEXP_DUMP
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100351static void regdump(char_u *, bt_regprog_T *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200352#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000353#ifdef DEBUG
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100354static char_u *regprop(char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000355#endif
356
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100357static int re_mult_next(char *what);
Bram Moolenaarfb031402014-09-09 17:18:49 +0200358
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200359static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200360static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
Bram Moolenaar6c95fbc2017-06-05 17:53:37 +0200361#ifdef FEAT_MBYTE
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200362static char_u e_large_class[] = N_("E945: Range too large in character class");
Bram Moolenaar6c95fbc2017-06-05 17:53:37 +0200363#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200364static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
365static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
366static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200367#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200368static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
Bram Moolenaarbcf94422018-06-23 14:21:42 +0200369static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200370#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200371static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200372static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar0270f382018-07-17 05:43:58 +0200373static char_u e_recursive[] = N_("E956: Cannot use pattern recursively");
374
Bram Moolenaar071d4272004-06-13 20:20:40 +0000375#define NOT_MULTI 0
376#define MULTI_ONE 1
377#define MULTI_MULT 2
378/*
379 * Return NOT_MULTI if c is not a "multi" operator.
380 * Return MULTI_ONE if c is a single "multi" operator.
381 * Return MULTI_MULT if c is a multi "multi" operator.
382 */
383 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100384re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000385{
386 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
387 return MULTI_ONE;
388 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
389 return MULTI_MULT;
390 return NOT_MULTI;
391}
392
393/*
394 * Flags to be passed up and down.
395 */
396#define HASWIDTH 0x1 /* Known never to match null string. */
397#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
398#define SPSTART 0x4 /* Starts with * or +. */
399#define HASNL 0x8 /* Contains some \n. */
400#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
401#define WORST 0 /* Worst case. */
402
403/*
404 * When regcode is set to this value, code is not emitted and size is computed
405 * instead.
406 */
407#define JUST_CALC_SIZE ((char_u *) -1)
408
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000409static char_u *reg_prev_sub = NULL;
410
Bram Moolenaar071d4272004-06-13 20:20:40 +0000411/*
412 * REGEXP_INRANGE contains all characters which are always special in a []
413 * range after '\'.
414 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
415 * These are:
416 * \n - New line (NL).
417 * \r - Carriage Return (CR).
418 * \t - Tab (TAB).
419 * \e - Escape (ESC).
420 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000421 * \d - Character code in decimal, eg \d123
422 * \o - Character code in octal, eg \o80
423 * \x - Character code in hex, eg \x4a
424 * \u - Multibyte character code, eg \u20ac
425 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426 */
427static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000428static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000429
Bram Moolenaar071d4272004-06-13 20:20:40 +0000430/*
431 * Translate '\x' to its control character, except "\n", which is Magic.
432 */
433 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100434backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000435{
436 switch (c)
437 {
438 case 'r': return CAR;
439 case 't': return TAB;
440 case 'e': return ESC;
441 case 'b': return BS;
442 }
443 return c;
444}
445
446/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000447 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
449 * recognized. Otherwise "pp" is advanced to after the item.
450 */
451 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100452get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453{
454 static const char *(class_names[]) =
455 {
456 "alnum:]",
457#define CLASS_ALNUM 0
458 "alpha:]",
459#define CLASS_ALPHA 1
460 "blank:]",
461#define CLASS_BLANK 2
462 "cntrl:]",
463#define CLASS_CNTRL 3
464 "digit:]",
465#define CLASS_DIGIT 4
466 "graph:]",
467#define CLASS_GRAPH 5
468 "lower:]",
469#define CLASS_LOWER 6
470 "print:]",
471#define CLASS_PRINT 7
472 "punct:]",
473#define CLASS_PUNCT 8
474 "space:]",
475#define CLASS_SPACE 9
476 "upper:]",
477#define CLASS_UPPER 10
478 "xdigit:]",
479#define CLASS_XDIGIT 11
480 "tab:]",
481#define CLASS_TAB 12
482 "return:]",
483#define CLASS_RETURN 13
484 "backspace:]",
485#define CLASS_BACKSPACE 14
486 "escape:]",
487#define CLASS_ESCAPE 15
488 };
489#define CLASS_NONE 99
490 int i;
491
492 if ((*pp)[1] == ':')
493 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000494 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000495 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
496 {
497 *pp += STRLEN(class_names[i]) + 2;
498 return i;
499 }
500 }
501 return CLASS_NONE;
502}
503
504/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000505 * Specific version of character class functions.
506 * Using a table to keep this fast.
507 */
508static short class_tab[256];
509
510#define RI_DIGIT 0x01
511#define RI_HEX 0x02
512#define RI_OCTAL 0x04
513#define RI_WORD 0x08
514#define RI_HEAD 0x10
515#define RI_ALPHA 0x20
516#define RI_LOWER 0x40
517#define RI_UPPER 0x80
518#define RI_WHITE 0x100
519
520 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100521init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000522{
523 int i;
524 static int done = FALSE;
525
526 if (done)
527 return;
528
529 for (i = 0; i < 256; ++i)
530 {
531 if (i >= '0' && i <= '7')
532 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
533 else if (i >= '8' && i <= '9')
534 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
535 else if (i >= 'a' && i <= 'f')
536 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
537#ifdef EBCDIC
538 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
539 || (i >= 's' && i <= 'z'))
540#else
541 else if (i >= 'g' && i <= 'z')
542#endif
543 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
544 else if (i >= 'A' && i <= 'F')
545 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
546#ifdef EBCDIC
547 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
548 || (i >= 'S' && i <= 'Z'))
549#else
550 else if (i >= 'G' && i <= 'Z')
551#endif
552 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
553 else if (i == '_')
554 class_tab[i] = RI_WORD + RI_HEAD;
555 else
556 class_tab[i] = 0;
557 }
558 class_tab[' '] |= RI_WHITE;
559 class_tab['\t'] |= RI_WHITE;
560 done = TRUE;
561}
562
563#ifdef FEAT_MBYTE
564# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
565# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
566# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
567# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
568# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
569# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
570# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
571# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
572# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
573#else
574# define ri_digit(c) (class_tab[c] & RI_DIGIT)
575# define ri_hex(c) (class_tab[c] & RI_HEX)
576# define ri_octal(c) (class_tab[c] & RI_OCTAL)
577# define ri_word(c) (class_tab[c] & RI_WORD)
578# define ri_head(c) (class_tab[c] & RI_HEAD)
579# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
580# define ri_lower(c) (class_tab[c] & RI_LOWER)
581# define ri_upper(c) (class_tab[c] & RI_UPPER)
582# define ri_white(c) (class_tab[c] & RI_WHITE)
583#endif
584
585/* flags for regflags */
586#define RF_ICASE 1 /* ignore case */
587#define RF_NOICASE 2 /* don't ignore case */
588#define RF_HASNL 4 /* can match a NL */
589#define RF_ICOMBINE 8 /* ignore combining characters */
590#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
591
592/*
593 * Global work variables for vim_regcomp().
594 */
595
596static char_u *regparse; /* Input-scan pointer. */
597static int prevchr_len; /* byte length of previous char */
598static int num_complex_braces; /* Complex \{...} count */
599static int regnpar; /* () count. */
600#ifdef FEAT_SYN_HL
601static int regnzpar; /* \z() count. */
602static int re_has_z; /* \z item detected */
603#endif
604static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
605static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000606static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000607static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
608static unsigned regflags; /* RF_ flags for prog */
609static long brace_min[10]; /* Minimums for complex brace repeats */
610static long brace_max[10]; /* Maximums for complex brace repeats */
611static int brace_count[10]; /* Current counts for complex brace repeats */
612#if defined(FEAT_SYN_HL) || defined(PROTO)
613static int had_eol; /* TRUE when EOL found by vim_regcomp() */
614#endif
615static int one_exactly = FALSE; /* only do one char for EXACTLY */
616
617static int reg_magic; /* magicness of the pattern: */
618#define MAGIC_NONE 1 /* "\V" very unmagic */
619#define MAGIC_OFF 2 /* "\M" or 'magic' off */
620#define MAGIC_ON 3 /* "\m" or 'magic' */
621#define MAGIC_ALL 4 /* "\v" very magic */
622
623static int reg_string; /* matching with a string instead of a buffer
624 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000625static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000626
627/*
628 * META contains all characters that may be magic, except '^' and '$'.
629 */
630
631#ifdef EBCDIC
632static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
633#else
634/* META[] is used often enough to justify turning it into a table. */
635static char_u META_flags[] = {
636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638/* % & ( ) * + . */
639 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
640/* 1 2 3 4 5 6 7 8 9 < = > ? */
641 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
642/* @ A C D F H I K L M O */
643 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
644/* P S U V W X Z [ _ */
645 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
646/* a c d f h i k l m n o */
647 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
648/* p s u v w x z { | ~ */
649 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
650};
651#endif
652
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200653static int curchr; /* currently parsed character */
654/* Previous character. Note: prevchr is sometimes -1 when we are not at the
655 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
656 * because ^ was taken to be magic -- webb */
657static int prevchr;
658static int prevprevchr; /* previous-previous character */
659static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000660
661/* arguments for reg() */
662#define REG_NOPAREN 0 /* toplevel reg() */
663#define REG_PAREN 1 /* \(\) */
664#define REG_ZPAREN 2 /* \z(\) */
665#define REG_NPAREN 3 /* \%(\) */
666
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200667typedef struct
668{
669 char_u *regparse;
670 int prevchr_len;
671 int curchr;
672 int prevchr;
673 int prevprevchr;
674 int nextchr;
675 int at_start;
676 int prev_at_start;
677 int regnpar;
678} parse_state_T;
679
Bram Moolenaar071d4272004-06-13 20:20:40 +0000680/*
681 * Forward declarations for vim_regcomp()'s friends.
682 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100683static void initchr(char_u *);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100684static int getchr(void);
685static void skipchr_keepstart(void);
686static int peekchr(void);
687static void skipchr(void);
688static void ungetchr(void);
Bram Moolenaar4c22a912017-11-02 22:29:38 +0100689static long gethexchrs(int maxinputlen);
690static long getoctchrs(void);
691static long getdecchrs(void);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100692static int coll_get_char(void);
693static void regcomp_start(char_u *expr, int flags);
694static char_u *reg(int, int *);
695static char_u *regbranch(int *flagp);
696static char_u *regconcat(int *flagp);
697static char_u *regpiece(int *);
698static char_u *regatom(int *);
699static char_u *regnode(int);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000700#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100701static int use_multibytecode(int c);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000702#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100703static int prog_magic_wrong(void);
704static char_u *regnext(char_u *);
705static void regc(int b);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000706#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100707static void regmbc(int c);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200708# define REGMBC(x) regmbc(x);
709# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000710#else
711# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200712# define REGMBC(x)
713# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000714#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100715static void reginsert(int, char_u *);
716static void reginsert_nr(int op, long val, char_u *opnd);
717static void reginsert_limits(int, long, long, char_u *);
718static char_u *re_put_long(char_u *pr, long_u val);
719static int read_limits(long *, long *);
720static void regtail(char_u *, char_u *);
721static void regoptail(char_u *, char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000722
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200723static regengine_T bt_regengine;
724static regengine_T nfa_regengine;
725
Bram Moolenaar071d4272004-06-13 20:20:40 +0000726/*
727 * Return TRUE if compiled regular expression "prog" can match a line break.
728 */
729 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100730re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731{
732 return (prog->regflags & RF_HASNL);
733}
734
735/*
736 * Return TRUE if compiled regular expression "prog" looks before the start
737 * position (pattern contains "\@<=" or "\@<!").
738 */
739 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100740re_lookbehind(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000741{
742 return (prog->regflags & RF_LOOKBH);
743}
744
745/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000746 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
747 * Returns a character representing the class. Zero means that no item was
748 * recognized. Otherwise "pp" is advanced to after the item.
749 */
750 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100751get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000752{
753 int c;
754 int l = 1;
755 char_u *p = *pp;
756
757 if (p[1] == '=')
758 {
759#ifdef FEAT_MBYTE
760 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000761 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000762#endif
763 if (p[l + 2] == '=' && p[l + 3] == ']')
764 {
765#ifdef FEAT_MBYTE
766 if (has_mbyte)
767 c = mb_ptr2char(p + 2);
768 else
769#endif
770 c = p[2];
771 *pp += l + 4;
772 return c;
773 }
774 }
775 return 0;
776}
777
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200778#ifdef EBCDIC
779/*
780 * Table for equivalence class "c". (IBM-1047)
781 */
782char *EQUIVAL_CLASS_C[16] = {
783 "A\x62\x63\x64\x65\x66\x67",
784 "C\x68",
785 "E\x71\x72\x73\x74",
786 "I\x75\x76\x77\x78",
787 "N\x69",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200788 "O\xEB\xEC\xED\xEE\xEF\x80",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200789 "U\xFB\xFC\xFD\xFE",
790 "Y\xBA",
791 "a\x42\x43\x44\x45\x46\x47",
792 "c\x48",
793 "e\x51\x52\x53\x54",
794 "i\x55\x56\x57\x58",
795 "n\x49",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200796 "o\xCB\xCC\xCD\xCE\xCF\x70",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200797 "u\xDB\xDC\xDD\xDE",
798 "y\x8D\xDF",
799};
800#endif
801
Bram Moolenaardf177f62005-02-22 08:39:57 +0000802/*
803 * Produce the bytes for equivalence class "c".
804 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200805 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000806 */
807 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100808reg_equi_class(int c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000809{
810#ifdef FEAT_MBYTE
811 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000812 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000813#endif
814 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200815#ifdef EBCDIC
816 int i;
817
818 /* This might be slower than switch/case below. */
819 for (i = 0; i < 16; i++)
820 {
821 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
822 {
823 char *p = EQUIVAL_CLASS_C[i];
824
825 while (*p != 0)
826 regmbc(*p++);
827 return;
828 }
829 }
830#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000831 switch (c)
832 {
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200833 /* Do not use '\300' style, it results in a negative number. */
834 case 'A': case 0xc0: case 0xc1: case 0xc2:
835 case 0xc3: case 0xc4: case 0xc5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200836 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
837 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200838 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
839 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
840 regmbc(0xc5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200841 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
842 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
843 REGMBC(0x1ea2)
844 return;
845 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
846 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000847 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200848 case 'C': case 0xc7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200849 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200850 regmbc('C'); regmbc(0xc7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200851 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
852 REGMBC(0x10c)
853 return;
854 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
855 CASEMBC(0x1e0e) CASEMBC(0x1e10)
856 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
857 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000858 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200859 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200860 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
861 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200862 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
863 regmbc(0xca); regmbc(0xcb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200864 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
865 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
866 REGMBC(0x1ebc)
867 return;
868 case 'F': CASEMBC(0x1e1e)
869 regmbc('F'); REGMBC(0x1e1e)
870 return;
871 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
872 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
873 CASEMBC(0x1e20)
874 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
875 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
876 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
877 return;
878 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
879 CASEMBC(0x1e26) CASEMBC(0x1e28)
880 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
881 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000882 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200883 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200884 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
885 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200886 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
887 regmbc(0xce); regmbc(0xcf);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200888 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
889 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
890 REGMBC(0x1ec8)
891 return;
892 case 'J': CASEMBC(0x134)
893 regmbc('J'); REGMBC(0x134)
894 return;
895 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
896 CASEMBC(0x1e34)
897 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
898 REGMBC(0x1e30) REGMBC(0x1e34)
899 return;
900 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
901 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
902 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
903 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
904 REGMBC(0x1e3a)
905 return;
906 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
907 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000908 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200909 case 'N': case 0xd1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200910 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
911 CASEMBC(0x1e48)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200912 regmbc('N'); regmbc(0xd1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200913 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
914 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000915 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200916 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
917 case 0xd6: case 0xd8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200918 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
919 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200920 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
921 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
922 regmbc(0xd8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200923 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
924 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
925 REGMBC(0x1ec) REGMBC(0x1ece)
926 return;
927 case 'P': case 0x1e54: case 0x1e56:
928 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
929 return;
930 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
931 CASEMBC(0x1e58) CASEMBC(0x1e5e)
932 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
933 REGMBC(0x1e58) REGMBC(0x1e5e)
934 return;
935 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
936 CASEMBC(0x160) CASEMBC(0x1e60)
937 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
938 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
939 return;
940 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
941 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
942 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
943 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000944 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200945 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200946 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
947 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
948 CASEMBC(0x1ee6)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200949 regmbc('U'); regmbc(0xd9); regmbc(0xda);
950 regmbc(0xdb); regmbc(0xdc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200951 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
952 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
953 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
954 return;
955 case 'V': CASEMBC(0x1e7c)
956 regmbc('V'); REGMBC(0x1e7c)
957 return;
958 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
959 CASEMBC(0x1e84) CASEMBC(0x1e86)
960 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
961 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
962 return;
963 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
964 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000965 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200966 case 'Y': case 0xdd:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200967 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
968 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200969 regmbc('Y'); regmbc(0xdd);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200970 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
971 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
972 return;
973 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
974 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
975 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
976 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
977 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000978 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200979 case 'a': case 0xe0: case 0xe1: case 0xe2:
980 case 0xe3: case 0xe4: case 0xe5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200981 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
982 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200983 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
984 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
985 regmbc(0xe5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200986 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
987 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
988 REGMBC(0x1ea3)
989 return;
990 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
991 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000992 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200993 case 'c': case 0xe7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200994 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200995 regmbc('c'); regmbc(0xe7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200996 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
997 REGMBC(0x10d)
998 return;
Bram Moolenaar2c61ec62015-07-10 19:16:34 +0200999 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
1000 CASEMBC(0x1e0f) CASEMBC(0x1e11)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001001 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001002 REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001003 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001004 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001005 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1006 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001007 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
1008 regmbc(0xea); regmbc(0xeb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001009 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1010 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1011 REGMBC(0x1ebd)
1012 return;
1013 case 'f': CASEMBC(0x1e1f)
1014 regmbc('f'); REGMBC(0x1e1f)
1015 return;
1016 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1017 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1018 CASEMBC(0x1e21)
1019 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1020 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1021 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1022 return;
1023 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1024 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1025 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1026 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1027 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001028 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001029 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001030 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1031 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001032 regmbc('i'); regmbc(0xec); regmbc(0xed);
1033 regmbc(0xee); regmbc(0xef);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001034 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1035 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1036 return;
1037 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1038 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1039 return;
1040 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1041 CASEMBC(0x1e35)
1042 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1043 REGMBC(0x1e31) REGMBC(0x1e35)
1044 return;
1045 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1046 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1047 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1048 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1049 REGMBC(0x1e3b)
1050 return;
1051 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1052 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001053 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001054 case 'n': case 0xf1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001055 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1056 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001057 regmbc('n'); regmbc(0xf1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001058 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1059 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001060 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001061 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1062 case 0xf6: case 0xf8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001063 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1064 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001065 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1066 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1067 regmbc(0xf8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001068 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1069 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1070 REGMBC(0x1ed) REGMBC(0x1ecf)
1071 return;
1072 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1073 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1074 return;
1075 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1076 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1077 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1078 REGMBC(0x1e59) REGMBC(0x1e5f)
1079 return;
1080 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1081 CASEMBC(0x161) CASEMBC(0x1e61)
1082 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1083 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1084 return;
1085 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1086 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1087 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1088 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001089 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001090 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001091 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1092 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1093 CASEMBC(0x1ee7)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001094 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1095 regmbc(0xfb); regmbc(0xfc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001096 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1097 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1098 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1099 return;
1100 case 'v': CASEMBC(0x1e7d)
1101 regmbc('v'); REGMBC(0x1e7d)
1102 return;
1103 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1104 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1105 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1106 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1107 REGMBC(0x1e98)
1108 return;
1109 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1110 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001111 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001112 case 'y': case 0xfd: case 0xff:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001113 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1114 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001115 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001116 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1117 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1118 return;
1119 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1120 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1121 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1122 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1123 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001124 return;
1125 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001126#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001127 }
1128 regmbc(c);
1129}
1130
1131/*
1132 * Check for a collating element "[.a.]". "pp" points to the '['.
1133 * Returns a character. Zero means that no item was recognized. Otherwise
1134 * "pp" is advanced to after the item.
1135 * Currently only single characters are recognized!
1136 */
1137 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001138get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001139{
1140 int c;
1141 int l = 1;
1142 char_u *p = *pp;
1143
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001144 if (p[0] != NUL && p[1] == '.')
Bram Moolenaardf177f62005-02-22 08:39:57 +00001145 {
1146#ifdef FEAT_MBYTE
1147 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001148 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001149#endif
1150 if (p[l + 2] == '.' && p[l + 3] == ']')
1151 {
1152#ifdef FEAT_MBYTE
1153 if (has_mbyte)
1154 c = mb_ptr2char(p + 2);
1155 else
1156#endif
1157 c = p[2];
1158 *pp += l + 4;
1159 return c;
1160 }
1161 }
1162 return 0;
1163}
1164
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001165static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1166static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1167
1168 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001169get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001170{
1171 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1172 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1173}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001174
1175/*
1176 * Skip over a "[]" range.
1177 * "p" must point to the character after the '['.
1178 * The returned pointer is on the matching ']', or the terminating NUL.
1179 */
1180 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001181skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001182{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001183#ifdef FEAT_MBYTE
1184 int l;
1185#endif
1186
Bram Moolenaardf177f62005-02-22 08:39:57 +00001187 if (*p == '^') /* Complement of range. */
1188 ++p;
1189 if (*p == ']' || *p == '-')
1190 ++p;
1191 while (*p != NUL && *p != ']')
1192 {
1193#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001194 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001195 p += l;
1196 else
1197#endif
1198 if (*p == '-')
1199 {
1200 ++p;
1201 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001202 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001203 }
1204 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001205 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001206 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001207 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001208 p += 2;
1209 else if (*p == '[')
1210 {
1211 if (get_char_class(&p) == CLASS_NONE
1212 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001213 && get_coll_element(&p) == 0
1214 && *p != NUL)
1215 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001216 }
1217 else
1218 ++p;
1219 }
1220
1221 return p;
1222}
1223
1224/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001225 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001226 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001227 * Take care of characters with a backslash in front of it.
1228 * Skip strings inside [ and ].
1229 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1230 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1231 * is changed in-place.
1232 */
1233 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001234skip_regexp(
1235 char_u *startp,
1236 int dirc,
1237 int magic,
1238 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001239{
1240 int mymagic;
1241 char_u *p = startp;
1242
1243 if (magic)
1244 mymagic = MAGIC_ON;
1245 else
1246 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001247 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001248
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001249 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001250 {
1251 if (p[0] == dirc) /* found end of regexp */
1252 break;
1253 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1254 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1255 {
1256 p = skip_anyof(p + 1);
1257 if (p[0] == NUL)
1258 break;
1259 }
1260 else if (p[0] == '\\' && p[1] != NUL)
1261 {
1262 if (dirc == '?' && newp != NULL && p[1] == '?')
1263 {
1264 /* change "\?" to "?", make a copy first. */
1265 if (*newp == NULL)
1266 {
1267 *newp = vim_strsave(startp);
1268 if (*newp != NULL)
1269 p = *newp + (p - startp);
1270 }
1271 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001272 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273 else
1274 ++p;
1275 }
1276 else
1277 ++p; /* skip next character */
1278 if (*p == 'v')
1279 mymagic = MAGIC_ALL;
1280 else if (*p == 'V')
1281 mymagic = MAGIC_NONE;
1282 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001283 }
1284 return p;
1285}
1286
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02001287/*
1288 * Return TRUE if the back reference is legal. We must have seen the close
1289 * brace.
1290 * TODO: Should also check that we don't refer to something that is repeated
1291 * (+*=): what instance of the repetition should we match?
1292 */
1293 static int
1294seen_endbrace(int refnum)
1295{
1296 if (!had_endbrace[refnum])
1297 {
1298 char_u *p;
1299
1300 /* Trick: check if "@<=" or "@<!" follows, in which case
1301 * the \1 can appear before the referenced match. */
1302 for (p = regparse; *p != NUL; ++p)
1303 if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '='))
1304 break;
1305 if (*p == NUL)
1306 {
1307 EMSG(_("E65: Illegal back reference"));
1308 rc_did_emsg = TRUE;
1309 return FALSE;
1310 }
1311 }
1312 return TRUE;
1313}
1314
Bram Moolenaar071d4272004-06-13 20:20:40 +00001315/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001316 * bt_regcomp() - compile a regular expression into internal code for the
1317 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001318 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319 *
1320 * We can't allocate space until we know how big the compiled form will be,
1321 * but we can't compile it (and thus know how big it is) until we've got a
1322 * place to put the code. So we cheat: we compile it twice, once with code
1323 * generation turned off and size counting turned on, and once "for real".
1324 * This also means that we don't allocate space until we are sure that the
1325 * thing really will compile successfully, and we never have to move the
1326 * code and thus invalidate pointers into it. (Note that it has to be in
1327 * one piece because vim_free() must be able to free it all.)
1328 *
1329 * Whether upper/lower case is to be ignored is decided when executing the
1330 * program, it does not matter here.
1331 *
1332 * Beware that the optimization-preparation code in here knows about some
1333 * of the structure of the compiled regexp.
1334 * "re_flags": RE_MAGIC and/or RE_STRING.
1335 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001336 static regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001337bt_regcomp(char_u *expr, int re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001338{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001339 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001340 char_u *scan;
1341 char_u *longest;
1342 int len;
1343 int flags;
1344
1345 if (expr == NULL)
1346 EMSG_RET_NULL(_(e_null));
1347
1348 init_class_tab();
1349
1350 /*
1351 * First pass: determine size, legality.
1352 */
1353 regcomp_start(expr, re_flags);
1354 regcode = JUST_CALC_SIZE;
1355 regc(REGMAGIC);
1356 if (reg(REG_NOPAREN, &flags) == NULL)
1357 return NULL;
1358
Bram Moolenaar071d4272004-06-13 20:20:40 +00001359 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001360 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001361 if (r == NULL)
1362 return NULL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02001363 r->re_in_use = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001364
1365 /*
1366 * Second pass: emit code.
1367 */
1368 regcomp_start(expr, re_flags);
1369 regcode = r->program;
1370 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001371 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372 {
1373 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001374 if (reg_toolong)
1375 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001376 return NULL;
1377 }
1378
1379 /* Dig out information for optimizations. */
1380 r->regstart = NUL; /* Worst-case defaults. */
1381 r->reganch = 0;
1382 r->regmust = NULL;
1383 r->regmlen = 0;
1384 r->regflags = regflags;
1385 if (flags & HASNL)
1386 r->regflags |= RF_HASNL;
1387 if (flags & HASLOOKBH)
1388 r->regflags |= RF_LOOKBH;
1389#ifdef FEAT_SYN_HL
1390 /* Remember whether this pattern has any \z specials in it. */
1391 r->reghasz = re_has_z;
1392#endif
1393 scan = r->program + 1; /* First BRANCH. */
1394 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1395 {
1396 scan = OPERAND(scan);
1397
1398 /* Starting-point info. */
1399 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1400 {
1401 r->reganch++;
1402 scan = regnext(scan);
1403 }
1404
1405 if (OP(scan) == EXACTLY)
1406 {
1407#ifdef FEAT_MBYTE
1408 if (has_mbyte)
1409 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1410 else
1411#endif
1412 r->regstart = *OPERAND(scan);
1413 }
1414 else if ((OP(scan) == BOW
1415 || OP(scan) == EOW
1416 || OP(scan) == NOTHING
1417 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1418 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1419 && OP(regnext(scan)) == EXACTLY)
1420 {
1421#ifdef FEAT_MBYTE
1422 if (has_mbyte)
1423 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1424 else
1425#endif
1426 r->regstart = *OPERAND(regnext(scan));
1427 }
1428
1429 /*
1430 * If there's something expensive in the r.e., find the longest
1431 * literal string that must appear and make it the regmust. Resolve
1432 * ties in favor of later strings, since the regstart check works
1433 * with the beginning of the r.e. and avoiding duplication
1434 * strengthens checking. Not a strong reason, but sufficient in the
1435 * absence of others.
1436 */
1437 /*
1438 * When the r.e. starts with BOW, it is faster to look for a regmust
1439 * first. Used a lot for "#" and "*" commands. (Added by mool).
1440 */
1441 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1442 && !(flags & HASNL))
1443 {
1444 longest = NULL;
1445 len = 0;
1446 for (; scan != NULL; scan = regnext(scan))
1447 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1448 {
1449 longest = OPERAND(scan);
1450 len = (int)STRLEN(OPERAND(scan));
1451 }
1452 r->regmust = longest;
1453 r->regmlen = len;
1454 }
1455 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001456#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001457 regdump(expr, r);
1458#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001459 r->engine = &bt_regengine;
1460 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001461}
1462
1463/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001464 * Free a compiled regexp program, returned by bt_regcomp().
1465 */
1466 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001467bt_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02001468{
1469 vim_free(prog);
1470}
1471
1472/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001473 * Setup to parse the regexp. Used once to get the length and once to do it.
1474 */
1475 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001476regcomp_start(
1477 char_u *expr,
1478 int re_flags) /* see vim_regcomp() */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001479{
1480 initchr(expr);
1481 if (re_flags & RE_MAGIC)
1482 reg_magic = MAGIC_ON;
1483 else
1484 reg_magic = MAGIC_OFF;
1485 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001486 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001487 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001488
1489 num_complex_braces = 0;
1490 regnpar = 1;
1491 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1492#ifdef FEAT_SYN_HL
1493 regnzpar = 1;
1494 re_has_z = 0;
1495#endif
1496 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001497 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001498 regflags = 0;
1499#if defined(FEAT_SYN_HL) || defined(PROTO)
1500 had_eol = FALSE;
1501#endif
1502}
1503
1504#if defined(FEAT_SYN_HL) || defined(PROTO)
1505/*
1506 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1507 * found. This is messy, but it works fine.
1508 */
1509 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001510vim_regcomp_had_eol(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001511{
1512 return had_eol;
1513}
1514#endif
1515
Bram Moolenaar0270f382018-07-17 05:43:58 +02001516// variables used for parsing
1517static int at_start; // True when on the first character
1518static int prev_at_start; // True when on the second character
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001519
Bram Moolenaar071d4272004-06-13 20:20:40 +00001520/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001521 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001522 *
1523 * Caller must absorb opening parenthesis.
1524 *
1525 * Combining parenthesis handling with the base level of regular expression
1526 * is a trifle forced, but the need to tie the tails of the branches to what
1527 * follows makes it hard to avoid.
1528 */
1529 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001530reg(
1531 int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1532 int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001533{
1534 char_u *ret;
1535 char_u *br;
1536 char_u *ender;
1537 int parno = 0;
1538 int flags;
1539
1540 *flagp = HASWIDTH; /* Tentatively. */
1541
1542#ifdef FEAT_SYN_HL
1543 if (paren == REG_ZPAREN)
1544 {
1545 /* Make a ZOPEN node. */
1546 if (regnzpar >= NSUBEXP)
1547 EMSG_RET_NULL(_("E50: Too many \\z("));
1548 parno = regnzpar;
1549 regnzpar++;
1550 ret = regnode(ZOPEN + parno);
1551 }
1552 else
1553#endif
1554 if (paren == REG_PAREN)
1555 {
1556 /* Make a MOPEN node. */
1557 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001558 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001559 parno = regnpar;
1560 ++regnpar;
1561 ret = regnode(MOPEN + parno);
1562 }
1563 else if (paren == REG_NPAREN)
1564 {
1565 /* Make a NOPEN node. */
1566 ret = regnode(NOPEN);
1567 }
1568 else
1569 ret = NULL;
1570
1571 /* Pick up the branches, linking them together. */
1572 br = regbranch(&flags);
1573 if (br == NULL)
1574 return NULL;
1575 if (ret != NULL)
1576 regtail(ret, br); /* [MZ]OPEN -> first. */
1577 else
1578 ret = br;
1579 /* If one of the branches can be zero-width, the whole thing can.
1580 * If one of the branches has * at start or matches a line-break, the
1581 * whole thing can. */
1582 if (!(flags & HASWIDTH))
1583 *flagp &= ~HASWIDTH;
1584 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1585 while (peekchr() == Magic('|'))
1586 {
1587 skipchr();
1588 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001589 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001590 return NULL;
1591 regtail(ret, br); /* BRANCH -> BRANCH. */
1592 if (!(flags & HASWIDTH))
1593 *flagp &= ~HASWIDTH;
1594 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1595 }
1596
1597 /* Make a closing node, and hook it on the end. */
1598 ender = regnode(
1599#ifdef FEAT_SYN_HL
1600 paren == REG_ZPAREN ? ZCLOSE + parno :
1601#endif
1602 paren == REG_PAREN ? MCLOSE + parno :
1603 paren == REG_NPAREN ? NCLOSE : END);
1604 regtail(ret, ender);
1605
1606 /* Hook the tails of the branches to the closing node. */
1607 for (br = ret; br != NULL; br = regnext(br))
1608 regoptail(br, ender);
1609
1610 /* Check for proper termination. */
1611 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1612 {
1613#ifdef FEAT_SYN_HL
1614 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001615 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001616 else
1617#endif
1618 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001619 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001620 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001621 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001622 }
1623 else if (paren == REG_NOPAREN && peekchr() != NUL)
1624 {
1625 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001626 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001627 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001628 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001629 /* NOTREACHED */
1630 }
1631 /*
1632 * Here we set the flag allowing back references to this set of
1633 * parentheses.
1634 */
1635 if (paren == REG_PAREN)
1636 had_endbrace[parno] = TRUE; /* have seen the close paren */
1637 return ret;
1638}
1639
1640/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001641 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001642 * Implements the & operator.
1643 */
1644 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001645regbranch(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001646{
1647 char_u *ret;
1648 char_u *chain = NULL;
1649 char_u *latest;
1650 int flags;
1651
1652 *flagp = WORST | HASNL; /* Tentatively. */
1653
1654 ret = regnode(BRANCH);
1655 for (;;)
1656 {
1657 latest = regconcat(&flags);
1658 if (latest == NULL)
1659 return NULL;
1660 /* If one of the branches has width, the whole thing has. If one of
1661 * the branches anchors at start-of-line, the whole thing does.
1662 * If one of the branches uses look-behind, the whole thing does. */
1663 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1664 /* If one of the branches doesn't match a line-break, the whole thing
1665 * doesn't. */
1666 *flagp &= ~HASNL | (flags & HASNL);
1667 if (chain != NULL)
1668 regtail(chain, latest);
1669 if (peekchr() != Magic('&'))
1670 break;
1671 skipchr();
1672 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001673 if (reg_toolong)
1674 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001675 reginsert(MATCH, latest);
1676 chain = latest;
1677 }
1678
1679 return ret;
1680}
1681
1682/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001683 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001684 * Implements the concatenation operator.
1685 */
1686 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001687regconcat(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001688{
1689 char_u *first = NULL;
1690 char_u *chain = NULL;
1691 char_u *latest;
1692 int flags;
1693 int cont = TRUE;
1694
1695 *flagp = WORST; /* Tentatively. */
1696
1697 while (cont)
1698 {
1699 switch (peekchr())
1700 {
1701 case NUL:
1702 case Magic('|'):
1703 case Magic('&'):
1704 case Magic(')'):
1705 cont = FALSE;
1706 break;
1707 case Magic('Z'):
1708#ifdef FEAT_MBYTE
1709 regflags |= RF_ICOMBINE;
1710#endif
1711 skipchr_keepstart();
1712 break;
1713 case Magic('c'):
1714 regflags |= RF_ICASE;
1715 skipchr_keepstart();
1716 break;
1717 case Magic('C'):
1718 regflags |= RF_NOICASE;
1719 skipchr_keepstart();
1720 break;
1721 case Magic('v'):
1722 reg_magic = MAGIC_ALL;
1723 skipchr_keepstart();
1724 curchr = -1;
1725 break;
1726 case Magic('m'):
1727 reg_magic = MAGIC_ON;
1728 skipchr_keepstart();
1729 curchr = -1;
1730 break;
1731 case Magic('M'):
1732 reg_magic = MAGIC_OFF;
1733 skipchr_keepstart();
1734 curchr = -1;
1735 break;
1736 case Magic('V'):
1737 reg_magic = MAGIC_NONE;
1738 skipchr_keepstart();
1739 curchr = -1;
1740 break;
1741 default:
1742 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001743 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001744 return NULL;
1745 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1746 if (chain == NULL) /* First piece. */
1747 *flagp |= flags & SPSTART;
1748 else
1749 regtail(chain, latest);
1750 chain = latest;
1751 if (first == NULL)
1752 first = latest;
1753 break;
1754 }
1755 }
1756 if (first == NULL) /* Loop ran zero times. */
1757 first = regnode(NOTHING);
1758 return first;
1759}
1760
1761/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001762 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001763 *
1764 * Note that the branching code sequences used for = and the general cases
1765 * of * and + are somewhat optimized: they use the same NOTHING node as
1766 * both the endmarker for their branch list and the body of the last branch.
1767 * It might seem that this node could be dispensed with entirely, but the
1768 * endmarker role is not redundant.
1769 */
1770 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001771regpiece(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001772{
1773 char_u *ret;
1774 int op;
1775 char_u *next;
1776 int flags;
1777 long minval;
1778 long maxval;
1779
1780 ret = regatom(&flags);
1781 if (ret == NULL)
1782 return NULL;
1783
1784 op = peekchr();
1785 if (re_multi_type(op) == NOT_MULTI)
1786 {
1787 *flagp = flags;
1788 return ret;
1789 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001790 /* default flags */
1791 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1792
1793 skipchr();
1794 switch (op)
1795 {
1796 case Magic('*'):
1797 if (flags & SIMPLE)
1798 reginsert(STAR, ret);
1799 else
1800 {
1801 /* Emit x* as (x&|), where & means "self". */
1802 reginsert(BRANCH, ret); /* Either x */
1803 regoptail(ret, regnode(BACK)); /* and loop */
1804 regoptail(ret, ret); /* back */
1805 regtail(ret, regnode(BRANCH)); /* or */
1806 regtail(ret, regnode(NOTHING)); /* null. */
1807 }
1808 break;
1809
1810 case Magic('+'):
1811 if (flags & SIMPLE)
1812 reginsert(PLUS, ret);
1813 else
1814 {
1815 /* Emit x+ as x(&|), where & means "self". */
1816 next = regnode(BRANCH); /* Either */
1817 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001818 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001819 regtail(next, regnode(BRANCH)); /* or */
1820 regtail(ret, regnode(NOTHING)); /* null. */
1821 }
1822 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1823 break;
1824
1825 case Magic('@'):
1826 {
1827 int lop = END;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01001828 long nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001829
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001830 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001831 switch (no_Magic(getchr()))
1832 {
1833 case '=': lop = MATCH; break; /* \@= */
1834 case '!': lop = NOMATCH; break; /* \@! */
1835 case '>': lop = SUBPAT; break; /* \@> */
1836 case '<': switch (no_Magic(getchr()))
1837 {
1838 case '=': lop = BEHIND; break; /* \@<= */
1839 case '!': lop = NOBEHIND; break; /* \@<! */
1840 }
1841 }
1842 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001843 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001844 reg_magic == MAGIC_ALL);
1845 /* Look behind must match with behind_pos. */
1846 if (lop == BEHIND || lop == NOBEHIND)
1847 {
1848 regtail(ret, regnode(BHPOS));
1849 *flagp |= HASLOOKBH;
1850 }
1851 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001852 if (lop == BEHIND || lop == NOBEHIND)
1853 {
1854 if (nr < 0)
1855 nr = 0; /* no limit is same as zero limit */
1856 reginsert_nr(lop, nr, ret);
1857 }
1858 else
1859 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001860 break;
1861 }
1862
1863 case Magic('?'):
1864 case Magic('='):
1865 /* Emit x= as (x|) */
1866 reginsert(BRANCH, ret); /* Either x */
1867 regtail(ret, regnode(BRANCH)); /* or */
1868 next = regnode(NOTHING); /* null. */
1869 regtail(ret, next);
1870 regoptail(ret, next);
1871 break;
1872
1873 case Magic('{'):
1874 if (!read_limits(&minval, &maxval))
1875 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001876 if (flags & SIMPLE)
1877 {
1878 reginsert(BRACE_SIMPLE, ret);
1879 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1880 }
1881 else
1882 {
1883 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001884 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001885 reg_magic == MAGIC_ALL);
1886 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1887 regoptail(ret, regnode(BACK));
1888 regoptail(ret, ret);
1889 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1890 ++num_complex_braces;
1891 }
1892 if (minval > 0 && maxval > 0)
1893 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1894 break;
1895 }
1896 if (re_multi_type(peekchr()) != NOT_MULTI)
1897 {
1898 /* Can't have a multi follow a multi. */
1899 if (peekchr() == Magic('*'))
1900 sprintf((char *)IObuff, _("E61: Nested %s*"),
1901 reg_magic >= MAGIC_ON ? "" : "\\");
1902 else
1903 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1904 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1905 EMSG_RET_NULL(IObuff);
1906 }
1907
1908 return ret;
1909}
1910
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001911/* When making changes to classchars also change nfa_classcodes. */
1912static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1913static int classcodes[] = {
1914 ANY, IDENT, SIDENT, KWORD, SKWORD,
1915 FNAME, SFNAME, PRINT, SPRINT,
1916 WHITE, NWHITE, DIGIT, NDIGIT,
1917 HEX, NHEX, OCTAL, NOCTAL,
1918 WORD, NWORD, HEAD, NHEAD,
1919 ALPHA, NALPHA, LOWER, NLOWER,
1920 UPPER, NUPPER
1921};
1922
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001924 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001925 *
1926 * Optimization: gobbles an entire sequence of ordinary characters so that
1927 * it can turn them into a single node, which is smaller to store and
1928 * faster to run. Don't do this when one_exactly is set.
1929 */
1930 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001931regatom(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001932{
1933 char_u *ret;
1934 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001935 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001936 char_u *p;
1937 int extra = 0;
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001938 int save_prev_at_start = prev_at_start;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001939
1940 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001941
1942 c = getchr();
1943 switch (c)
1944 {
1945 case Magic('^'):
1946 ret = regnode(BOL);
1947 break;
1948
1949 case Magic('$'):
1950 ret = regnode(EOL);
1951#if defined(FEAT_SYN_HL) || defined(PROTO)
1952 had_eol = TRUE;
1953#endif
1954 break;
1955
1956 case Magic('<'):
1957 ret = regnode(BOW);
1958 break;
1959
1960 case Magic('>'):
1961 ret = regnode(EOW);
1962 break;
1963
1964 case Magic('_'):
1965 c = no_Magic(getchr());
1966 if (c == '^') /* "\_^" is start-of-line */
1967 {
1968 ret = regnode(BOL);
1969 break;
1970 }
1971 if (c == '$') /* "\_$" is end-of-line */
1972 {
1973 ret = regnode(EOL);
1974#if defined(FEAT_SYN_HL) || defined(PROTO)
1975 had_eol = TRUE;
1976#endif
1977 break;
1978 }
1979
1980 extra = ADD_NL;
1981 *flagp |= HASNL;
1982
1983 /* "\_[" is character range plus newline */
1984 if (c == '[')
1985 goto collection;
1986
1987 /* "\_x" is character class plus newline */
Bram Moolenaar2f40d122017-10-24 21:49:36 +02001988 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001989
1990 /*
1991 * Character classes.
1992 */
1993 case Magic('.'):
1994 case Magic('i'):
1995 case Magic('I'):
1996 case Magic('k'):
1997 case Magic('K'):
1998 case Magic('f'):
1999 case Magic('F'):
2000 case Magic('p'):
2001 case Magic('P'):
2002 case Magic('s'):
2003 case Magic('S'):
2004 case Magic('d'):
2005 case Magic('D'):
2006 case Magic('x'):
2007 case Magic('X'):
2008 case Magic('o'):
2009 case Magic('O'):
2010 case Magic('w'):
2011 case Magic('W'):
2012 case Magic('h'):
2013 case Magic('H'):
2014 case Magic('a'):
2015 case Magic('A'):
2016 case Magic('l'):
2017 case Magic('L'):
2018 case Magic('u'):
2019 case Magic('U'):
2020 p = vim_strchr(classchars, no_Magic(c));
2021 if (p == NULL)
2022 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002023#ifdef FEAT_MBYTE
2024 /* When '.' is followed by a composing char ignore the dot, so that
2025 * the composing char is matched here. */
2026 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2027 {
2028 c = getchr();
2029 goto do_multibyte;
2030 }
2031#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002032 ret = regnode(classcodes[p - classchars] + extra);
2033 *flagp |= HASWIDTH | SIMPLE;
2034 break;
2035
2036 case Magic('n'):
2037 if (reg_string)
2038 {
2039 /* In a string "\n" matches a newline character. */
2040 ret = regnode(EXACTLY);
2041 regc(NL);
2042 regc(NUL);
2043 *flagp |= HASWIDTH | SIMPLE;
2044 }
2045 else
2046 {
2047 /* In buffer text "\n" matches the end of a line. */
2048 ret = regnode(NEWL);
2049 *flagp |= HASWIDTH | HASNL;
2050 }
2051 break;
2052
2053 case Magic('('):
2054 if (one_exactly)
2055 EMSG_ONE_RET_NULL;
2056 ret = reg(REG_PAREN, &flags);
2057 if (ret == NULL)
2058 return NULL;
2059 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2060 break;
2061
2062 case NUL:
2063 case Magic('|'):
2064 case Magic('&'):
2065 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002066 if (one_exactly)
2067 EMSG_ONE_RET_NULL;
Bram Moolenaar95f09602016-11-10 20:01:45 +01002068 IEMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002069 /* NOTREACHED */
2070
2071 case Magic('='):
2072 case Magic('?'):
2073 case Magic('+'):
2074 case Magic('@'):
2075 case Magic('{'):
2076 case Magic('*'):
2077 c = no_Magic(c);
2078 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2079 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2080 ? "" : "\\", c);
2081 EMSG_RET_NULL(IObuff);
2082 /* NOTREACHED */
2083
2084 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002085 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002086 {
2087 char_u *lp;
2088
2089 ret = regnode(EXACTLY);
2090 lp = reg_prev_sub;
2091 while (*lp != NUL)
2092 regc(*lp++);
2093 regc(NUL);
2094 if (*reg_prev_sub != NUL)
2095 {
2096 *flagp |= HASWIDTH;
2097 if ((lp - reg_prev_sub) == 1)
2098 *flagp |= SIMPLE;
2099 }
2100 }
2101 else
2102 EMSG_RET_NULL(_(e_nopresub));
2103 break;
2104
2105 case Magic('1'):
2106 case Magic('2'):
2107 case Magic('3'):
2108 case Magic('4'):
2109 case Magic('5'):
2110 case Magic('6'):
2111 case Magic('7'):
2112 case Magic('8'):
2113 case Magic('9'):
2114 {
2115 int refnum;
2116
2117 refnum = c - Magic('0');
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +02002118 if (!seen_endbrace(refnum))
2119 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002120 ret = regnode(BACKREF + refnum);
2121 }
2122 break;
2123
Bram Moolenaar071d4272004-06-13 20:20:40 +00002124 case Magic('z'):
2125 {
2126 c = no_Magic(getchr());
2127 switch (c)
2128 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002129#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002130 case '(': if ((reg_do_extmatch & REX_SET) == 0)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002131 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002132 if (one_exactly)
2133 EMSG_ONE_RET_NULL;
2134 ret = reg(REG_ZPAREN, &flags);
2135 if (ret == NULL)
2136 return NULL;
2137 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2138 re_has_z = REX_SET;
2139 break;
2140
2141 case '1':
2142 case '2':
2143 case '3':
2144 case '4':
2145 case '5':
2146 case '6':
2147 case '7':
2148 case '8':
Bram Moolenaarbcf94422018-06-23 14:21:42 +02002149 case '9': if ((reg_do_extmatch & REX_USE) == 0)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002150 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002151 ret = regnode(ZREF + c - '0');
2152 re_has_z = REX_USE;
2153 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002154#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002155
2156 case 's': ret = regnode(MOPEN + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002157 if (re_mult_next("\\zs") == FAIL)
2158 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002159 break;
2160
2161 case 'e': ret = regnode(MCLOSE + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002162 if (re_mult_next("\\ze") == FAIL)
2163 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002164 break;
2165
2166 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2167 }
2168 }
2169 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002170
2171 case Magic('%'):
2172 {
2173 c = no_Magic(getchr());
2174 switch (c)
2175 {
2176 /* () without a back reference */
2177 case '(':
2178 if (one_exactly)
2179 EMSG_ONE_RET_NULL;
2180 ret = reg(REG_NPAREN, &flags);
2181 if (ret == NULL)
2182 return NULL;
2183 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2184 break;
2185
2186 /* Catch \%^ and \%$ regardless of where they appear in the
2187 * pattern -- regardless of whether or not it makes sense. */
2188 case '^':
2189 ret = regnode(RE_BOF);
2190 break;
2191
2192 case '$':
2193 ret = regnode(RE_EOF);
2194 break;
2195
2196 case '#':
2197 ret = regnode(CURSOR);
2198 break;
2199
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002200 case 'V':
2201 ret = regnode(RE_VISUAL);
2202 break;
2203
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002204 case 'C':
2205 ret = regnode(RE_COMPOSING);
2206 break;
2207
Bram Moolenaar071d4272004-06-13 20:20:40 +00002208 /* \%[abc]: Emit as a list of branches, all ending at the last
2209 * branch which matches nothing. */
2210 case '[':
2211 if (one_exactly) /* doesn't nest */
2212 EMSG_ONE_RET_NULL;
2213 {
2214 char_u *lastbranch;
2215 char_u *lastnode = NULL;
2216 char_u *br;
2217
2218 ret = NULL;
2219 while ((c = getchr()) != ']')
2220 {
2221 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002222 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002223 reg_magic == MAGIC_ALL);
2224 br = regnode(BRANCH);
2225 if (ret == NULL)
2226 ret = br;
2227 else
2228 regtail(lastnode, br);
2229
2230 ungetchr();
2231 one_exactly = TRUE;
2232 lastnode = regatom(flagp);
2233 one_exactly = FALSE;
2234 if (lastnode == NULL)
2235 return NULL;
2236 }
2237 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002238 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002239 reg_magic == MAGIC_ALL);
2240 lastbranch = regnode(BRANCH);
2241 br = regnode(NOTHING);
2242 if (ret != JUST_CALC_SIZE)
2243 {
2244 regtail(lastnode, br);
2245 regtail(lastbranch, br);
2246 /* connect all branches to the NOTHING
2247 * branch at the end */
2248 for (br = ret; br != lastnode; )
2249 {
2250 if (OP(br) == BRANCH)
2251 {
2252 regtail(br, lastbranch);
2253 br = OPERAND(br);
2254 }
2255 else
2256 br = regnext(br);
2257 }
2258 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002259 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002260 break;
2261 }
2262
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002263 case 'd': /* %d123 decimal */
2264 case 'o': /* %o123 octal */
2265 case 'x': /* %xab hex 2 */
2266 case 'u': /* %uabcd hex 4 */
2267 case 'U': /* %U1234abcd hex 8 */
2268 {
Bram Moolenaar4c22a912017-11-02 22:29:38 +01002269 long i;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002270
2271 switch (c)
2272 {
2273 case 'd': i = getdecchrs(); break;
2274 case 'o': i = getoctchrs(); break;
2275 case 'x': i = gethexchrs(2); break;
2276 case 'u': i = gethexchrs(4); break;
2277 case 'U': i = gethexchrs(8); break;
2278 default: i = -1; break;
2279 }
2280
2281 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002282 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002283 _("E678: Invalid character after %s%%[dxouU]"),
2284 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002285#ifdef FEAT_MBYTE
2286 if (use_multibytecode(i))
2287 ret = regnode(MULTIBYTECODE);
2288 else
2289#endif
2290 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002291 if (i == 0)
2292 regc(0x0a);
2293 else
2294#ifdef FEAT_MBYTE
2295 regmbc(i);
2296#else
2297 regc(i);
2298#endif
2299 regc(NUL);
2300 *flagp |= HASWIDTH;
2301 break;
2302 }
2303
Bram Moolenaar071d4272004-06-13 20:20:40 +00002304 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002305 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2306 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002307 {
2308 long_u n = 0;
2309 int cmp;
2310
2311 cmp = c;
2312 if (cmp == '<' || cmp == '>')
2313 c = getchr();
2314 while (VIM_ISDIGIT(c))
2315 {
2316 n = n * 10 + (c - '0');
2317 c = getchr();
2318 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002319 if (c == '\'' && n == 0)
2320 {
2321 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2322 c = getchr();
2323 ret = regnode(RE_MARK);
2324 if (ret == JUST_CALC_SIZE)
2325 regsize += 2;
2326 else
2327 {
2328 *regcode++ = c;
2329 *regcode++ = cmp;
2330 }
2331 break;
2332 }
2333 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002334 {
2335 if (c == 'l')
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002336 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002337 ret = regnode(RE_LNUM);
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002338 if (save_prev_at_start)
2339 at_start = TRUE;
2340 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002341 else if (c == 'c')
2342 ret = regnode(RE_COL);
2343 else
2344 ret = regnode(RE_VCOL);
2345 if (ret == JUST_CALC_SIZE)
2346 regsize += 5;
2347 else
2348 {
2349 /* put the number and the optional
2350 * comparator after the opcode */
2351 regcode = re_put_long(regcode, n);
2352 *regcode++ = cmp;
2353 }
2354 break;
2355 }
2356 }
2357
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002358 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002359 reg_magic == MAGIC_ALL);
2360 }
2361 }
2362 break;
2363
2364 case Magic('['):
2365collection:
2366 {
2367 char_u *lp;
2368
2369 /*
2370 * If there is no matching ']', we assume the '[' is a normal
2371 * character. This makes 'incsearch' and ":help [" work.
2372 */
2373 lp = skip_anyof(regparse);
2374 if (*lp == ']') /* there is a matching ']' */
2375 {
2376 int startc = -1; /* > 0 when next '-' is a range */
2377 int endc;
2378
2379 /*
2380 * In a character class, different parsing rules apply.
2381 * Not even \ is special anymore, nothing is.
2382 */
2383 if (*regparse == '^') /* Complement of range. */
2384 {
2385 ret = regnode(ANYBUT + extra);
2386 regparse++;
2387 }
2388 else
2389 ret = regnode(ANYOF + extra);
2390
2391 /* At the start ']' and '-' mean the literal character. */
2392 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002393 {
2394 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002395 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002396 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002397
2398 while (*regparse != NUL && *regparse != ']')
2399 {
2400 if (*regparse == '-')
2401 {
2402 ++regparse;
2403 /* The '-' is not used for a range at the end and
2404 * after or before a '\n'. */
2405 if (*regparse == ']' || *regparse == NUL
2406 || startc == -1
2407 || (regparse[0] == '\\' && regparse[1] == 'n'))
2408 {
2409 regc('-');
2410 startc = '-'; /* [--x] is a range */
2411 }
2412 else
2413 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002414 /* Also accept "a-[.z.]" */
2415 endc = 0;
2416 if (*regparse == '[')
2417 endc = get_coll_element(&regparse);
2418 if (endc == 0)
2419 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002420#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002421 if (has_mbyte)
2422 endc = mb_ptr2char_adv(&regparse);
2423 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002424#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002425 endc = *regparse++;
2426 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002427
2428 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002429 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002430 endc = coll_get_char();
2431
Bram Moolenaar071d4272004-06-13 20:20:40 +00002432 if (startc > endc)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002433 EMSG_RET_NULL(_(e_reverse_range));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002434#ifdef FEAT_MBYTE
2435 if (has_mbyte && ((*mb_char2len)(startc) > 1
2436 || (*mb_char2len)(endc) > 1))
2437 {
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002438 /* Limit to a range of 256 chars. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002439 if (endc > startc + 256)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002440 EMSG_RET_NULL(_(e_large_class));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002441 while (++startc <= endc)
2442 regmbc(startc);
2443 }
2444 else
2445#endif
2446 {
2447#ifdef EBCDIC
2448 int alpha_only = FALSE;
2449
2450 /* for alphabetical range skip the gaps
2451 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2452 if (isalpha(startc) && isalpha(endc))
2453 alpha_only = TRUE;
2454#endif
2455 while (++startc <= endc)
2456#ifdef EBCDIC
2457 if (!alpha_only || isalpha(startc))
2458#endif
2459 regc(startc);
2460 }
2461 startc = -1;
2462 }
2463 }
2464 /*
2465 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2466 * accepts "\t", "\e", etc., but only when the 'l' flag in
2467 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002468 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002469 */
2470 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002471 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002472 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002473 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002474 && vim_strchr(REGEXP_ABBR,
2475 regparse[1]) != NULL)))
2476 {
2477 regparse++;
2478 if (*regparse == 'n')
2479 {
2480 /* '\n' in range: also match NL */
2481 if (ret != JUST_CALC_SIZE)
2482 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002483 /* Using \n inside [^] does not change what
2484 * matches. "[^\n]" is the same as ".". */
2485 if (*ret == ANYOF)
2486 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002487 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002488 *flagp |= HASNL;
2489 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002490 /* else: must have had a \n already */
2491 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002492 regparse++;
2493 startc = -1;
2494 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002495 else if (*regparse == 'd'
2496 || *regparse == 'o'
2497 || *regparse == 'x'
2498 || *regparse == 'u'
2499 || *regparse == 'U')
2500 {
2501 startc = coll_get_char();
2502 if (startc == 0)
2503 regc(0x0a);
2504 else
2505#ifdef FEAT_MBYTE
2506 regmbc(startc);
2507#else
2508 regc(startc);
2509#endif
2510 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002511 else
2512 {
2513 startc = backslash_trans(*regparse++);
2514 regc(startc);
2515 }
2516 }
2517 else if (*regparse == '[')
2518 {
2519 int c_class;
2520 int cu;
2521
Bram Moolenaardf177f62005-02-22 08:39:57 +00002522 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002523 startc = -1;
2524 /* Characters assumed to be 8 bits! */
2525 switch (c_class)
2526 {
2527 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002528 c_class = get_equi_class(&regparse);
2529 if (c_class != 0)
2530 {
2531 /* produce equivalence class */
2532 reg_equi_class(c_class);
2533 }
2534 else if ((c_class =
2535 get_coll_element(&regparse)) != 0)
2536 {
2537 /* produce a collating element */
2538 regmbc(c_class);
2539 }
2540 else
2541 {
2542 /* literal '[', allow [[-x] as a range */
2543 startc = *regparse++;
2544 regc(startc);
2545 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002546 break;
2547 case CLASS_ALNUM:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002548 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002549 if (isalnum(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002550 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002551 break;
2552 case CLASS_ALPHA:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002553 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002554 if (isalpha(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002555 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002556 break;
2557 case CLASS_BLANK:
2558 regc(' ');
2559 regc('\t');
2560 break;
2561 case CLASS_CNTRL:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002562 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002563 if (iscntrl(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002564 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002565 break;
2566 case CLASS_DIGIT:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002567 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002568 if (VIM_ISDIGIT(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002569 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002570 break;
2571 case CLASS_GRAPH:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002572 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002573 if (isgraph(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002574 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002575 break;
2576 case CLASS_LOWER:
2577 for (cu = 1; cu <= 255; cu++)
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002578 if (MB_ISLOWER(cu) && cu != 170
2579 && cu != 186)
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002580 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002581 break;
2582 case CLASS_PRINT:
2583 for (cu = 1; cu <= 255; cu++)
2584 if (vim_isprintc(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002585 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002586 break;
2587 case CLASS_PUNCT:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002588 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002589 if (ispunct(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002590 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002591 break;
2592 case CLASS_SPACE:
2593 for (cu = 9; cu <= 13; cu++)
2594 regc(cu);
2595 regc(' ');
2596 break;
2597 case CLASS_UPPER:
2598 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002599 if (MB_ISUPPER(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002600 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002601 break;
2602 case CLASS_XDIGIT:
2603 for (cu = 1; cu <= 255; cu++)
2604 if (vim_isxdigit(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002605 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002606 break;
2607 case CLASS_TAB:
2608 regc('\t');
2609 break;
2610 case CLASS_RETURN:
2611 regc('\r');
2612 break;
2613 case CLASS_BACKSPACE:
2614 regc('\b');
2615 break;
2616 case CLASS_ESCAPE:
2617 regc('\033');
2618 break;
2619 }
2620 }
2621 else
2622 {
2623#ifdef FEAT_MBYTE
2624 if (has_mbyte)
2625 {
2626 int len;
2627
2628 /* produce a multibyte character, including any
2629 * following composing characters */
2630 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002631 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002632 if (enc_utf8 && utf_char2len(startc) != len)
2633 startc = -1; /* composing chars */
2634 while (--len >= 0)
2635 regc(*regparse++);
2636 }
2637 else
2638#endif
2639 {
2640 startc = *regparse++;
2641 regc(startc);
2642 }
2643 }
2644 }
2645 regc(NUL);
2646 prevchr_len = 1; /* last char was the ']' */
2647 if (*regparse != ']')
2648 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2649 skipchr(); /* let's be friends with the lexer again */
2650 *flagp |= HASWIDTH | SIMPLE;
2651 break;
2652 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002653 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002654 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002655 }
2656 /* FALLTHROUGH */
2657
2658 default:
2659 {
2660 int len;
2661
2662#ifdef FEAT_MBYTE
2663 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002664 * before a multi and when it's a composing char. */
2665 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002666 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002667do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002668 ret = regnode(MULTIBYTECODE);
2669 regmbc(c);
2670 *flagp |= HASWIDTH | SIMPLE;
2671 break;
2672 }
2673#endif
2674
2675 ret = regnode(EXACTLY);
2676
2677 /*
2678 * Append characters as long as:
2679 * - there is no following multi, we then need the character in
2680 * front of it as a single character operand
2681 * - not running into a Magic character
2682 * - "one_exactly" is not set
2683 * But always emit at least one character. Might be a Multi,
2684 * e.g., a "[" without matching "]".
2685 */
2686 for (len = 0; c != NUL && (len == 0
2687 || (re_multi_type(peekchr()) == NOT_MULTI
2688 && !one_exactly
2689 && !is_Magic(c))); ++len)
2690 {
2691 c = no_Magic(c);
2692#ifdef FEAT_MBYTE
2693 if (has_mbyte)
2694 {
2695 regmbc(c);
2696 if (enc_utf8)
2697 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002698 int l;
2699
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002700 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002701 for (;;)
2702 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002703 l = utf_ptr2len(regparse);
2704 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002705 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002706 regmbc(utf_ptr2char(regparse));
2707 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002708 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002709 }
2710 }
2711 else
2712#endif
2713 regc(c);
2714 c = getchr();
2715 }
2716 ungetchr();
2717
2718 regc(NUL);
2719 *flagp |= HASWIDTH;
2720 if (len == 1)
2721 *flagp |= SIMPLE;
2722 }
2723 break;
2724 }
2725
2726 return ret;
2727}
2728
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002729#ifdef FEAT_MBYTE
2730/*
2731 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2732 * character "c".
2733 */
2734 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002735use_multibytecode(int c)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002736{
2737 return has_mbyte && (*mb_char2len)(c) > 1
2738 && (re_multi_type(peekchr()) != NOT_MULTI
2739 || (enc_utf8 && utf_iscomposing(c)));
2740}
2741#endif
2742
Bram Moolenaar071d4272004-06-13 20:20:40 +00002743/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002744 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002745 * Return pointer to generated code.
2746 */
2747 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002748regnode(int op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002749{
2750 char_u *ret;
2751
2752 ret = regcode;
2753 if (ret == JUST_CALC_SIZE)
2754 regsize += 3;
2755 else
2756 {
2757 *regcode++ = op;
2758 *regcode++ = NUL; /* Null "next" pointer. */
2759 *regcode++ = NUL;
2760 }
2761 return ret;
2762}
2763
2764/*
2765 * Emit (if appropriate) a byte of code
2766 */
2767 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002768regc(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002769{
2770 if (regcode == JUST_CALC_SIZE)
2771 regsize++;
2772 else
2773 *regcode++ = b;
2774}
2775
2776#ifdef FEAT_MBYTE
2777/*
2778 * Emit (if appropriate) a multi-byte character of code
2779 */
2780 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002781regmbc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002782{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002783 if (!has_mbyte && c > 0xff)
2784 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002785 if (regcode == JUST_CALC_SIZE)
2786 regsize += (*mb_char2len)(c);
2787 else
2788 regcode += (*mb_char2bytes)(c, regcode);
2789}
2790#endif
2791
2792/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002793 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002794 *
2795 * Means relocating the operand.
2796 */
2797 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002798reginsert(int op, char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002799{
2800 char_u *src;
2801 char_u *dst;
2802 char_u *place;
2803
2804 if (regcode == JUST_CALC_SIZE)
2805 {
2806 regsize += 3;
2807 return;
2808 }
2809 src = regcode;
2810 regcode += 3;
2811 dst = regcode;
2812 while (src > opnd)
2813 *--dst = *--src;
2814
2815 place = opnd; /* Op node, where operand used to be. */
2816 *place++ = op;
2817 *place++ = NUL;
2818 *place = NUL;
2819}
2820
2821/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002822 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002823 * Add a number to the operator.
2824 */
2825 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002826reginsert_nr(int op, long val, char_u *opnd)
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002827{
2828 char_u *src;
2829 char_u *dst;
2830 char_u *place;
2831
2832 if (regcode == JUST_CALC_SIZE)
2833 {
2834 regsize += 7;
2835 return;
2836 }
2837 src = regcode;
2838 regcode += 7;
2839 dst = regcode;
2840 while (src > opnd)
2841 *--dst = *--src;
2842
2843 place = opnd; /* Op node, where operand used to be. */
2844 *place++ = op;
2845 *place++ = NUL;
2846 *place++ = NUL;
2847 place = re_put_long(place, (long_u)val);
2848}
2849
2850/*
2851 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002852 * The operator has the given limit values as operands. Also set next pointer.
2853 *
2854 * Means relocating the operand.
2855 */
2856 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002857reginsert_limits(
2858 int op,
2859 long minval,
2860 long maxval,
2861 char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002862{
2863 char_u *src;
2864 char_u *dst;
2865 char_u *place;
2866
2867 if (regcode == JUST_CALC_SIZE)
2868 {
2869 regsize += 11;
2870 return;
2871 }
2872 src = regcode;
2873 regcode += 11;
2874 dst = regcode;
2875 while (src > opnd)
2876 *--dst = *--src;
2877
2878 place = opnd; /* Op node, where operand used to be. */
2879 *place++ = op;
2880 *place++ = NUL;
2881 *place++ = NUL;
2882 place = re_put_long(place, (long_u)minval);
2883 place = re_put_long(place, (long_u)maxval);
2884 regtail(opnd, place);
2885}
2886
2887/*
2888 * Write a long as four bytes at "p" and return pointer to the next char.
2889 */
2890 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002891re_put_long(char_u *p, long_u val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002892{
2893 *p++ = (char_u) ((val >> 24) & 0377);
2894 *p++ = (char_u) ((val >> 16) & 0377);
2895 *p++ = (char_u) ((val >> 8) & 0377);
2896 *p++ = (char_u) (val & 0377);
2897 return p;
2898}
2899
2900/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002901 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002902 */
2903 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002904regtail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002905{
2906 char_u *scan;
2907 char_u *temp;
2908 int offset;
2909
2910 if (p == JUST_CALC_SIZE)
2911 return;
2912
2913 /* Find last node. */
2914 scan = p;
2915 for (;;)
2916 {
2917 temp = regnext(scan);
2918 if (temp == NULL)
2919 break;
2920 scan = temp;
2921 }
2922
Bram Moolenaar582fd852005-03-28 20:58:01 +00002923 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002924 offset = (int)(scan - val);
2925 else
2926 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002927 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002928 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002929 * values in too many places. */
2930 if (offset > 0xffff)
2931 reg_toolong = TRUE;
2932 else
2933 {
2934 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2935 *(scan + 2) = (char_u) (offset & 0377);
2936 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002937}
2938
2939/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002940 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002941 */
2942 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002943regoptail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002944{
2945 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2946 if (p == NULL || p == JUST_CALC_SIZE
2947 || (OP(p) != BRANCH
2948 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2949 return;
2950 regtail(OPERAND(p), val);
2951}
2952
2953/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002954 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002955 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002956/*
2957 * Start parsing at "str".
2958 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002959 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002960initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002961{
2962 regparse = str;
2963 prevchr_len = 0;
2964 curchr = prevprevchr = prevchr = nextchr = -1;
2965 at_start = TRUE;
2966 prev_at_start = FALSE;
2967}
2968
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002969/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002970 * Save the current parse state, so that it can be restored and parsing
2971 * starts in the same state again.
2972 */
2973 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002974save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002975{
2976 ps->regparse = regparse;
2977 ps->prevchr_len = prevchr_len;
2978 ps->curchr = curchr;
2979 ps->prevchr = prevchr;
2980 ps->prevprevchr = prevprevchr;
2981 ps->nextchr = nextchr;
2982 ps->at_start = at_start;
2983 ps->prev_at_start = prev_at_start;
2984 ps->regnpar = regnpar;
2985}
2986
2987/*
2988 * Restore a previously saved parse state.
2989 */
2990 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002991restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002992{
2993 regparse = ps->regparse;
2994 prevchr_len = ps->prevchr_len;
2995 curchr = ps->curchr;
2996 prevchr = ps->prevchr;
2997 prevprevchr = ps->prevprevchr;
2998 nextchr = ps->nextchr;
2999 at_start = ps->at_start;
3000 prev_at_start = ps->prev_at_start;
3001 regnpar = ps->regnpar;
3002}
3003
3004
3005/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003006 * Get the next character without advancing.
3007 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003008 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003009peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003010{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003011 static int after_slash = FALSE;
3012
Bram Moolenaar071d4272004-06-13 20:20:40 +00003013 if (curchr == -1)
3014 {
3015 switch (curchr = regparse[0])
3016 {
3017 case '.':
3018 case '[':
3019 case '~':
3020 /* magic when 'magic' is on */
3021 if (reg_magic >= MAGIC_ON)
3022 curchr = Magic(curchr);
3023 break;
3024 case '(':
3025 case ')':
3026 case '{':
3027 case '%':
3028 case '+':
3029 case '=':
3030 case '?':
3031 case '@':
3032 case '!':
3033 case '&':
3034 case '|':
3035 case '<':
3036 case '>':
3037 case '#': /* future ext. */
3038 case '"': /* future ext. */
3039 case '\'': /* future ext. */
3040 case ',': /* future ext. */
3041 case '-': /* future ext. */
3042 case ':': /* future ext. */
3043 case ';': /* future ext. */
3044 case '`': /* future ext. */
3045 case '/': /* Can't be used in / command */
3046 /* magic only after "\v" */
3047 if (reg_magic == MAGIC_ALL)
3048 curchr = Magic(curchr);
3049 break;
3050 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003051 /* * is not magic as the very first character, eg "?*ptr", when
3052 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3053 * "\(\*" is not magic, thus must be magic if "after_slash" */
3054 if (reg_magic >= MAGIC_ON
3055 && !at_start
3056 && !(prev_at_start && prevchr == Magic('^'))
3057 && (after_slash
3058 || (prevchr != Magic('(')
3059 && prevchr != Magic('&')
3060 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003061 curchr = Magic('*');
3062 break;
3063 case '^':
3064 /* '^' is only magic as the very first character and if it's after
3065 * "\(", "\|", "\&' or "\n" */
3066 if (reg_magic >= MAGIC_OFF
3067 && (at_start
3068 || reg_magic == MAGIC_ALL
3069 || prevchr == Magic('(')
3070 || prevchr == Magic('|')
3071 || prevchr == Magic('&')
3072 || prevchr == Magic('n')
3073 || (no_Magic(prevchr) == '('
3074 && prevprevchr == Magic('%'))))
3075 {
3076 curchr = Magic('^');
3077 at_start = TRUE;
3078 prev_at_start = FALSE;
3079 }
3080 break;
3081 case '$':
3082 /* '$' is only magic as the very last char and if it's in front of
3083 * either "\|", "\)", "\&", or "\n" */
3084 if (reg_magic >= MAGIC_OFF)
3085 {
3086 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003087 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003088
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003089 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003090 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003091 || p[1] == 'm' || p[1] == 'M'
3092 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3093 {
3094 if (p[1] == 'v')
3095 is_magic_all = TRUE;
3096 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3097 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003098 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003099 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003100 if (p[0] == NUL
3101 || (p[0] == '\\'
3102 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3103 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003104 || (is_magic_all
3105 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003106 || reg_magic == MAGIC_ALL)
3107 curchr = Magic('$');
3108 }
3109 break;
3110 case '\\':
3111 {
3112 int c = regparse[1];
3113
3114 if (c == NUL)
3115 curchr = '\\'; /* trailing '\' */
3116 else if (
3117#ifdef EBCDIC
3118 vim_strchr(META, c)
3119#else
3120 c <= '~' && META_flags[c]
3121#endif
3122 )
3123 {
3124 /*
3125 * META contains everything that may be magic sometimes,
3126 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02003127 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +00003128 * magicness. Therefore, \ is so meta-magic that it is
3129 * not in META.
3130 */
3131 curchr = -1;
3132 prev_at_start = at_start;
3133 at_start = FALSE; /* be able to say "/\*ptr" */
3134 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003135 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003136 peekchr();
3137 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003138 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003139 curchr = toggle_Magic(curchr);
3140 }
3141 else if (vim_strchr(REGEXP_ABBR, c))
3142 {
3143 /*
3144 * Handle abbreviations, like "\t" for TAB -- webb
3145 */
3146 curchr = backslash_trans(c);
3147 }
3148 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3149 curchr = toggle_Magic(c);
3150 else
3151 {
3152 /*
3153 * Next character can never be (made) magic?
3154 * Then backslashing it won't do anything.
3155 */
3156#ifdef FEAT_MBYTE
3157 if (has_mbyte)
3158 curchr = (*mb_ptr2char)(regparse + 1);
3159 else
3160#endif
3161 curchr = c;
3162 }
3163 break;
3164 }
3165
3166#ifdef FEAT_MBYTE
3167 default:
3168 if (has_mbyte)
3169 curchr = (*mb_ptr2char)(regparse);
3170#endif
3171 }
3172 }
3173
3174 return curchr;
3175}
3176
3177/*
3178 * Eat one lexed character. Do this in a way that we can undo it.
3179 */
3180 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003181skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003182{
3183 /* peekchr() eats a backslash, do the same here */
3184 if (*regparse == '\\')
3185 prevchr_len = 1;
3186 else
3187 prevchr_len = 0;
3188 if (regparse[prevchr_len] != NUL)
3189 {
3190#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003191 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003192 /* exclude composing chars that mb_ptr2len does include */
3193 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003194 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003195 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003196 else
3197#endif
3198 ++prevchr_len;
3199 }
3200 regparse += prevchr_len;
3201 prev_at_start = at_start;
3202 at_start = FALSE;
3203 prevprevchr = prevchr;
3204 prevchr = curchr;
3205 curchr = nextchr; /* use previously unget char, or -1 */
3206 nextchr = -1;
3207}
3208
3209/*
3210 * Skip a character while keeping the value of prev_at_start for at_start.
3211 * prevchr and prevprevchr are also kept.
3212 */
3213 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003214skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003215{
3216 int as = prev_at_start;
3217 int pr = prevchr;
3218 int prpr = prevprevchr;
3219
3220 skipchr();
3221 at_start = as;
3222 prevchr = pr;
3223 prevprevchr = prpr;
3224}
3225
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003226/*
3227 * Get the next character from the pattern. We know about magic and such, so
3228 * therefore we need a lexical analyzer.
3229 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003230 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003231getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003232{
3233 int chr = peekchr();
3234
3235 skipchr();
3236 return chr;
3237}
3238
3239/*
3240 * put character back. Works only once!
3241 */
3242 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003243ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003244{
3245 nextchr = curchr;
3246 curchr = prevchr;
3247 prevchr = prevprevchr;
3248 at_start = prev_at_start;
3249 prev_at_start = FALSE;
3250
3251 /* Backup regparse, so that it's at the same position as before the
3252 * getchr(). */
3253 regparse -= prevchr_len;
3254}
3255
3256/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003257 * Get and return the value of the hex string at the current position.
3258 * Return -1 if there is no valid hex number.
3259 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003260 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003261 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003262 * The parameter controls the maximum number of input characters. This will be
3263 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3264 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003265 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003266gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003267{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003268 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003269 int c;
3270 int i;
3271
3272 for (i = 0; i < maxinputlen; ++i)
3273 {
3274 c = regparse[0];
3275 if (!vim_isxdigit(c))
3276 break;
3277 nr <<= 4;
3278 nr |= hex2nr(c);
3279 ++regparse;
3280 }
3281
3282 if (i == 0)
3283 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003284 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003285}
3286
3287/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003288 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003289 * current position. Return -1 for invalid. Consumes all digits.
3290 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003291 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003292getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003293{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003294 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003295 int c;
3296 int i;
3297
3298 for (i = 0; ; ++i)
3299 {
3300 c = regparse[0];
3301 if (c < '0' || c > '9')
3302 break;
3303 nr *= 10;
3304 nr += c - '0';
3305 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003306 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003307 }
3308
3309 if (i == 0)
3310 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003311 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003312}
3313
3314/*
3315 * get and return the value of the octal string immediately after the current
3316 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3317 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3318 * treat 8 or 9 as recognised characters. Position is updated:
3319 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003320 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003321 */
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003322 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003323getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003324{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003325 long_u nr = 0;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003326 int c;
3327 int i;
3328
3329 for (i = 0; i < 3 && nr < 040; ++i)
3330 {
3331 c = regparse[0];
3332 if (c < '0' || c > '7')
3333 break;
3334 nr <<= 3;
3335 nr |= hex2nr(c);
3336 ++regparse;
3337 }
3338
3339 if (i == 0)
3340 return -1;
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003341 return (long)nr;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003342}
3343
3344/*
3345 * Get a number after a backslash that is inside [].
3346 * When nothing is recognized return a backslash.
3347 */
3348 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003349coll_get_char(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003350{
Bram Moolenaar4c22a912017-11-02 22:29:38 +01003351 long nr = -1;
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003352
3353 switch (*regparse++)
3354 {
3355 case 'd': nr = getdecchrs(); break;
3356 case 'o': nr = getoctchrs(); break;
3357 case 'x': nr = gethexchrs(2); break;
3358 case 'u': nr = gethexchrs(4); break;
3359 case 'U': nr = gethexchrs(8); break;
3360 }
3361 if (nr < 0)
3362 {
3363 /* If getting the number fails be backwards compatible: the character
3364 * is a backslash. */
3365 --regparse;
3366 nr = '\\';
3367 }
3368 return nr;
3369}
3370
3371/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003372 * read_limits - Read two integers to be taken as a minimum and maximum.
3373 * If the first character is '-', then the range is reversed.
3374 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3375 * missing, a very big number is the default.
3376 */
3377 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003378read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003379{
3380 int reverse = FALSE;
3381 char_u *first_char;
3382 long tmp;
3383
3384 if (*regparse == '-')
3385 {
3386 /* Starts with '-', so reverse the range later */
3387 regparse++;
3388 reverse = TRUE;
3389 }
3390 first_char = regparse;
3391 *minval = getdigits(&regparse);
3392 if (*regparse == ',') /* There is a comma */
3393 {
3394 if (vim_isdigit(*++regparse))
3395 *maxval = getdigits(&regparse);
3396 else
3397 *maxval = MAX_LIMIT;
3398 }
3399 else if (VIM_ISDIGIT(*first_char))
3400 *maxval = *minval; /* It was \{n} or \{-n} */
3401 else
3402 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3403 if (*regparse == '\\')
3404 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003405 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003406 {
3407 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3408 reg_magic == MAGIC_ALL ? "" : "\\");
3409 EMSG_RET_FAIL(IObuff);
3410 }
3411
3412 /*
3413 * Reverse the range if there was a '-', or make sure it is in the right
3414 * order otherwise.
3415 */
3416 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3417 {
3418 tmp = *minval;
3419 *minval = *maxval;
3420 *maxval = tmp;
3421 }
3422 skipchr(); /* let's be friends with the lexer again */
3423 return OK;
3424}
3425
3426/*
3427 * vim_regexec and friends
3428 */
3429
3430/*
3431 * Global work variables for vim_regexec().
3432 */
3433
Bram Moolenaar071d4272004-06-13 20:20:40 +00003434/*
3435 * Structure used to save the current input state, when it needs to be
3436 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003437 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003438 */
3439typedef struct
3440{
3441 union
3442 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003443 char_u *ptr; /* rex.input pointer, for single-line regexp */
3444 lpos_T pos; /* rex.input pos, for multi-line regexp */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003445 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003446 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003447} regsave_T;
3448
3449/* struct to save start/end pointer/position in for \(\) */
3450typedef struct
3451{
3452 union
3453 {
3454 char_u *ptr;
3455 lpos_T pos;
3456 } se_u;
3457} save_se_T;
3458
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003459/* used for BEHIND and NOBEHIND matching */
3460typedef struct regbehind_S
3461{
3462 regsave_T save_after;
3463 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003464 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003465 save_se_T save_start[NSUBEXP];
3466 save_se_T save_end[NSUBEXP];
3467} regbehind_T;
3468
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003469static char_u *reg_getline(linenr_T lnum);
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003470static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm, int *timed_out);
Bram Moolenaar09463262017-06-17 20:55:06 +02003471static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, int *timed_out);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003472static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003473#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003474static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003475#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003476static void save_subexpr(regbehind_T *bp);
3477static void restore_subexpr(regbehind_T *bp);
3478static void reg_nextline(void);
3479static void reg_save(regsave_T *save, garray_T *gap);
3480static void reg_restore(regsave_T *save, garray_T *gap);
3481static int reg_save_equal(regsave_T *save);
3482static void save_se_multi(save_se_T *savep, lpos_T *posp);
3483static void save_se_one(save_se_T *savep, char_u **pp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003484
3485/* Save the sub-expressions before attempting a match. */
3486#define save_se(savep, posp, pp) \
3487 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3488
3489/* After a failed match restore the sub-expressions. */
3490#define restore_se(savep, posp, pp) { \
3491 if (REG_MULTI) \
3492 *(posp) = (savep)->se_u.pos; \
3493 else \
3494 *(pp) = (savep)->se_u.ptr; }
3495
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003496static int re_num_cmp(long_u val, char_u *scan);
3497static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
Bram Moolenaar09463262017-06-17 20:55:06 +02003498static int regmatch(char_u *prog, proftime_T *tm, int *timed_out);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003499static int regrepeat(char_u *p, long maxcount);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003500
3501#ifdef DEBUG
3502int regnarrate = 0;
3503#endif
3504
3505/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003506 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3507 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003508 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003509 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003510static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003511static unsigned reg_tofreelen;
3512
3513/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02003514 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003515 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003516 * done:
3517 * single-line multi-line
3518 * reg_match &regmatch_T NULL
3519 * reg_mmatch NULL &regmmatch_T
3520 * reg_startp reg_match->startp <invalid>
3521 * reg_endp reg_match->endp <invalid>
3522 * reg_startpos <invalid> reg_mmatch->startpos
3523 * reg_endpos <invalid> reg_mmatch->endpos
3524 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003525 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003526 * reg_firstlnum <invalid> first line in which to search
3527 * reg_maxline 0 last line nr
3528 * reg_line_lbr FALSE or TRUE FALSE
3529 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003530typedef struct {
3531 regmatch_T *reg_match;
3532 regmmatch_T *reg_mmatch;
3533 char_u **reg_startp;
3534 char_u **reg_endp;
3535 lpos_T *reg_startpos;
3536 lpos_T *reg_endpos;
3537 win_T *reg_win;
3538 buf_T *reg_buf;
3539 linenr_T reg_firstlnum;
3540 linenr_T reg_maxline;
3541 int reg_line_lbr; /* "\n" in string is line break */
3542
Bram Moolenaar0270f382018-07-17 05:43:58 +02003543 // The current match-position is stord in these variables:
3544 linenr_T lnum; // line number, relative to first line
3545 char_u *line; // start of current line
3546 char_u *input; // current input, points into "regline"
3547
3548 int need_clear_subexpr; // subexpressions still need to be cleared
3549#ifdef FEAT_SYN_HL
3550 int need_clear_zsubexpr; // extmatch subexpressions still need to be
3551 // cleared
3552#endif
3553
Bram Moolenaar6100d022016-10-02 16:51:57 +02003554 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3555 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3556 * contains '\c' or '\C' the value is overruled. */
3557 int reg_ic;
3558
3559#ifdef FEAT_MBYTE
Bram Moolenaar0270f382018-07-17 05:43:58 +02003560 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z
Bram Moolenaar6100d022016-10-02 16:51:57 +02003561 * flag in the regexp. Defaults to false, always. */
3562 int reg_icombine;
3563#endif
3564
3565 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3566 * there is no maximum. */
3567 colnr_T reg_maxcol;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003568
3569 // State for the NFA engine regexec.
3570 int nfa_has_zend; // NFA regexp \ze operator encountered.
3571 int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
3572 int nfa_nsubexpr; // Number of sub expressions actually being used
3573 // during execution. 1 if only the whole match
3574 // (subexpr 0) is used.
3575 // listid is global, so that it increases on recursive calls to
3576 // nfa_regmatch(), which means we don't have to clear the lastlist field of
3577 // all the states.
3578 int nfa_listid;
3579 int nfa_alt_listid;
3580
3581#ifdef FEAT_SYN_HL
3582 int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
3583#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003584} regexec_T;
3585
3586static regexec_T rex;
3587static int rex_in_use = FALSE;
3588
Bram Moolenaar071d4272004-06-13 20:20:40 +00003589
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003590/* Values for rs_state in regitem_T. */
3591typedef enum regstate_E
3592{
3593 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3594 , RS_MOPEN /* MOPEN + [0-9] */
3595 , RS_MCLOSE /* MCLOSE + [0-9] */
3596#ifdef FEAT_SYN_HL
3597 , RS_ZOPEN /* ZOPEN + [0-9] */
3598 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3599#endif
3600 , RS_BRANCH /* BRANCH */
3601 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3602 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3603 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3604 , RS_NOMATCH /* NOMATCH */
3605 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3606 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3607 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3608 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3609} regstate_T;
3610
3611/*
3612 * When there are alternatives a regstate_T is put on the regstack to remember
3613 * what we are doing.
3614 * Before it may be another type of item, depending on rs_state, to remember
3615 * more things.
3616 */
3617typedef struct regitem_S
3618{
3619 regstate_T rs_state; /* what we are doing, one of RS_ above */
3620 char_u *rs_scan; /* current node in program */
3621 union
3622 {
3623 save_se_T sesave;
3624 regsave_T regsave;
Bram Moolenaar0270f382018-07-17 05:43:58 +02003625 } rs_un; /* room for saving rex.input */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003626 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003627} regitem_T;
3628
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003629static regitem_T *regstack_push(regstate_T state, char_u *scan);
3630static void regstack_pop(char_u **scan);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003631
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003632/* used for STAR, PLUS and BRACE_SIMPLE matching */
3633typedef struct regstar_S
3634{
3635 int nextb; /* next byte */
3636 int nextb_ic; /* next byte reverse case */
3637 long count;
3638 long minval;
3639 long maxval;
3640} regstar_T;
3641
3642/* used to store input position when a BACK was encountered, so that we now if
3643 * we made any progress since the last time. */
3644typedef struct backpos_S
3645{
3646 char_u *bp_scan; /* "scan" where BACK was encountered */
3647 regsave_T bp_pos; /* last input position */
3648} backpos_T;
3649
3650/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003651 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3652 * to avoid invoking malloc() and free() often.
3653 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3654 * or regbehind_T.
3655 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003656 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003657static garray_T regstack = {0, 0, 0, 0, NULL};
3658static garray_T backpos = {0, 0, 0, 0, NULL};
3659
3660/*
3661 * Both for regstack and backpos tables we use the following strategy of
3662 * allocation (to reduce malloc/free calls):
3663 * - Initial size is fairly small.
3664 * - When needed, the tables are grown bigger (8 times at first, double after
3665 * that).
3666 * - After executing the match we free the memory only if the array has grown.
3667 * Thus the memory is kept allocated when it's at the initial size.
3668 * This makes it fast while not keeping a lot of memory allocated.
3669 * A three times speed increase was observed when using many simple patterns.
3670 */
3671#define REGSTACK_INITIAL 2048
3672#define BACKPOS_INITIAL 64
3673
3674#if defined(EXITFREE) || defined(PROTO)
3675 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003676free_regexp_stuff(void)
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003677{
3678 ga_clear(&regstack);
3679 ga_clear(&backpos);
3680 vim_free(reg_tofree);
3681 vim_free(reg_prev_sub);
3682}
3683#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003684
Bram Moolenaar071d4272004-06-13 20:20:40 +00003685/*
3686 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3687 */
3688 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01003689reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003690{
3691 /* when looking behind for a match/no-match lnum is negative. But we
3692 * can't go before line 1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003693 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003694 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003695 if (lnum > rex.reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003696 /* Must have matched the "\n" in the last line. */
3697 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02003698 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003699}
3700
3701static regsave_T behind_pos;
3702
3703#ifdef FEAT_SYN_HL
3704static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3705static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3706static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3707static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3708#endif
3709
3710/* TRUE if using multi-line regexp. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003711#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003712
Bram Moolenaar071d4272004-06-13 20:20:40 +00003713/*
3714 * Match a regexp against a string.
3715 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3716 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003717 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003718 *
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003719 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003720 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003721 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003722bt_regexec_nl(
3723 regmatch_T *rmp,
3724 char_u *line, /* string to match against */
3725 colnr_T col, /* column to start looking for match */
3726 int line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003727{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003728 rex.reg_match = rmp;
3729 rex.reg_mmatch = NULL;
3730 rex.reg_maxline = 0;
3731 rex.reg_line_lbr = line_lbr;
3732 rex.reg_buf = curbuf;
3733 rex.reg_win = NULL;
3734 rex.reg_ic = rmp->rm_ic;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003735#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003736 rex.reg_icombine = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003737#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003738 rex.reg_maxcol = 0;
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003739
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003740 return bt_regexec_both(line, col, NULL, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003741}
3742
Bram Moolenaar071d4272004-06-13 20:20:40 +00003743/*
3744 * Match a regexp against multiple lines.
3745 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3746 * Uses curbuf for line count and 'iskeyword'.
3747 *
3748 * Return zero if there is no match. Return number of lines contained in the
3749 * match otherwise.
3750 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003751 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003752bt_regexec_multi(
3753 regmmatch_T *rmp,
3754 win_T *win, /* window in which to search or NULL */
3755 buf_T *buf, /* buffer in which to search */
3756 linenr_T lnum, /* nr of line to start looking for match */
3757 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003758 proftime_T *tm, /* timeout limit or NULL */
3759 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003760{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003761 rex.reg_match = NULL;
3762 rex.reg_mmatch = rmp;
3763 rex.reg_buf = buf;
3764 rex.reg_win = win;
3765 rex.reg_firstlnum = lnum;
3766 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
3767 rex.reg_line_lbr = FALSE;
3768 rex.reg_ic = rmp->rmm_ic;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003769#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003770 rex.reg_icombine = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003771#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003772 rex.reg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003773
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003774 return bt_regexec_both(NULL, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003775}
3776
3777/*
3778 * Match a regexp against a string ("line" points to the string) or multiple
3779 * lines ("line" is NULL, use reg_getline()).
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003780 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003781 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003782 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003783bt_regexec_both(
3784 char_u *line,
3785 colnr_T col, /* column to start looking for match */
Bram Moolenaar09463262017-06-17 20:55:06 +02003786 proftime_T *tm, /* timeout limit or NULL */
3787 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003788{
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003789 bt_regprog_T *prog;
3790 char_u *s;
3791 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003792
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003793 /* Create "regstack" and "backpos" if they are not allocated yet.
3794 * We allocate *_INITIAL amount of bytes first and then set the grow size
3795 * to much bigger value to avoid many malloc calls in case of deep regular
3796 * expressions. */
3797 if (regstack.ga_data == NULL)
3798 {
3799 /* Use an item size of 1 byte, since we push different things
3800 * onto the regstack. */
3801 ga_init2(&regstack, 1, REGSTACK_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003802 (void)ga_grow(&regstack, REGSTACK_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003803 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3804 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003805
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003806 if (backpos.ga_data == NULL)
3807 {
3808 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003809 (void)ga_grow(&backpos, BACKPOS_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003810 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3811 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003812
Bram Moolenaar071d4272004-06-13 20:20:40 +00003813 if (REG_MULTI)
3814 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003815 prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003816 line = reg_getline((linenr_T)0);
Bram Moolenaar6100d022016-10-02 16:51:57 +02003817 rex.reg_startpos = rex.reg_mmatch->startpos;
3818 rex.reg_endpos = rex.reg_mmatch->endpos;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003819 }
3820 else
3821 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003822 prog = (bt_regprog_T *)rex.reg_match->regprog;
3823 rex.reg_startp = rex.reg_match->startp;
3824 rex.reg_endp = rex.reg_match->endp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003825 }
3826
3827 /* Be paranoid... */
3828 if (prog == NULL || line == NULL)
3829 {
3830 EMSG(_(e_null));
3831 goto theend;
3832 }
3833
3834 /* Check validity of program. */
3835 if (prog_magic_wrong())
3836 goto theend;
3837
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003838 /* If the start column is past the maximum column: no need to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003839 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003840 goto theend;
3841
Bram Moolenaar6100d022016-10-02 16:51:57 +02003842 /* If pattern contains "\c" or "\C": overrule value of rex.reg_ic */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003843 if (prog->regflags & RF_ICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003844 rex.reg_ic = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003845 else if (prog->regflags & RF_NOICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003846 rex.reg_ic = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003847
3848#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003849 /* If pattern contains "\Z" overrule value of rex.reg_icombine */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003850 if (prog->regflags & RF_ICOMBINE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003851 rex.reg_icombine = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003852#endif
3853
3854 /* If there is a "must appear" string, look for it. */
3855 if (prog->regmust != NULL)
3856 {
3857 int c;
3858
3859#ifdef FEAT_MBYTE
3860 if (has_mbyte)
3861 c = (*mb_ptr2char)(prog->regmust);
3862 else
3863#endif
3864 c = *prog->regmust;
3865 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003866
3867 /*
3868 * This is used very often, esp. for ":global". Use three versions of
3869 * the loop to avoid overhead of conditions.
3870 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003871 if (!rex.reg_ic
Bram Moolenaar05159a02005-02-26 23:04:13 +00003872#ifdef FEAT_MBYTE
3873 && !has_mbyte
3874#endif
3875 )
3876 while ((s = vim_strbyte(s, c)) != NULL)
3877 {
3878 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3879 break; /* Found it. */
3880 ++s;
3881 }
3882#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003883 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar05159a02005-02-26 23:04:13 +00003884 while ((s = vim_strchr(s, c)) != NULL)
3885 {
3886 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3887 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003888 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003889 }
3890#endif
3891 else
3892 while ((s = cstrchr(s, c)) != NULL)
3893 {
3894 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3895 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003896 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003897 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003898 if (s == NULL) /* Not present. */
3899 goto theend;
3900 }
3901
Bram Moolenaar0270f382018-07-17 05:43:58 +02003902 rex.line = line;
3903 rex.lnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003904 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003905
3906 /* Simplest case: Anchored match need be tried only once. */
3907 if (prog->reganch)
3908 {
3909 int c;
3910
3911#ifdef FEAT_MBYTE
3912 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003913 c = (*mb_ptr2char)(rex.line + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003914 else
3915#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02003916 c = rex.line[col];
Bram Moolenaar071d4272004-06-13 20:20:40 +00003917 if (prog->regstart == NUL
3918 || prog->regstart == c
Bram Moolenaar6100d022016-10-02 16:51:57 +02003919 || (rex.reg_ic && ((
Bram Moolenaar071d4272004-06-13 20:20:40 +00003920#ifdef FEAT_MBYTE
3921 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3922 || (c < 255 && prog->regstart < 255 &&
3923#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003924 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar09463262017-06-17 20:55:06 +02003925 retval = regtry(prog, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003926 else
3927 retval = 0;
3928 }
3929 else
3930 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003931#ifdef FEAT_RELTIME
3932 int tm_count = 0;
3933#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003934 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003935 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003936 {
3937 if (prog->regstart != NUL)
3938 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003939 /* Skip until the char we know it must start with.
3940 * Used often, do some work to avoid call overhead. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003941 if (!rex.reg_ic
Bram Moolenaar05159a02005-02-26 23:04:13 +00003942#ifdef FEAT_MBYTE
3943 && !has_mbyte
3944#endif
3945 )
Bram Moolenaar0270f382018-07-17 05:43:58 +02003946 s = vim_strbyte(rex.line + col, prog->regstart);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003947 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02003948 s = cstrchr(rex.line + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003949 if (s == NULL)
3950 {
3951 retval = 0;
3952 break;
3953 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02003954 col = (int)(s - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003955 }
3956
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003957 /* Check for maximum column to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003958 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003959 {
3960 retval = 0;
3961 break;
3962 }
3963
Bram Moolenaar09463262017-06-17 20:55:06 +02003964 retval = regtry(prog, col, tm, timed_out);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003965 if (retval > 0)
3966 break;
3967
3968 /* if not currently on the first line, get it again */
Bram Moolenaar0270f382018-07-17 05:43:58 +02003969 if (rex.lnum != 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003970 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02003971 rex.lnum = 0;
3972 rex.line = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003973 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02003974 if (rex.line[col] == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003975 break;
3976#ifdef FEAT_MBYTE
3977 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02003978 col += (*mb_ptr2len)(rex.line + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003979 else
3980#endif
3981 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003982#ifdef FEAT_RELTIME
3983 /* Check for timeout once in a twenty times to avoid overhead. */
3984 if (tm != NULL && ++tm_count == 20)
3985 {
3986 tm_count = 0;
3987 if (profile_passed_limit(tm))
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003988 {
3989 if (timed_out != NULL)
3990 *timed_out = TRUE;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003991 break;
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003992 }
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003993 }
3994#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003995 }
3996 }
3997
Bram Moolenaar071d4272004-06-13 20:20:40 +00003998theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003999 /* Free "reg_tofree" when it's a bit big.
4000 * Free regstack and backpos if they are bigger than their initial size. */
4001 if (reg_tofreelen > 400)
Bram Moolenaard23a8232018-02-10 18:45:26 +01004002 VIM_CLEAR(reg_tofree);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004003 if (regstack.ga_maxlen > REGSTACK_INITIAL)
4004 ga_clear(&regstack);
4005 if (backpos.ga_maxlen > BACKPOS_INITIAL)
4006 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004007
Bram Moolenaar071d4272004-06-13 20:20:40 +00004008 return retval;
4009}
4010
4011#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004012static reg_extmatch_T *make_extmatch(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004013
4014/*
4015 * Create a new extmatch and mark it as referenced once.
4016 */
4017 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01004018make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004019{
4020 reg_extmatch_T *em;
4021
4022 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4023 if (em != NULL)
4024 em->refcnt = 1;
4025 return em;
4026}
4027
4028/*
4029 * Add a reference to an extmatch.
4030 */
4031 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01004032ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004033{
4034 if (em != NULL)
4035 em->refcnt++;
4036 return em;
4037}
4038
4039/*
4040 * Remove a reference to an extmatch. If there are no references left, free
4041 * the info.
4042 */
4043 void
Bram Moolenaar05540972016-01-30 20:31:25 +01004044unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004045{
4046 int i;
4047
4048 if (em != NULL && --em->refcnt <= 0)
4049 {
4050 for (i = 0; i < NSUBEXP; ++i)
4051 vim_free(em->matches[i]);
4052 vim_free(em);
4053 }
4054}
4055#endif
4056
4057/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02004058 * regtry - try match of "prog" with at rex.line["col"].
Bram Moolenaar071d4272004-06-13 20:20:40 +00004059 * Returns 0 for failure, number of lines contained in the match otherwise.
4060 */
4061 static long
Bram Moolenaar09463262017-06-17 20:55:06 +02004062regtry(
4063 bt_regprog_T *prog,
4064 colnr_T col,
4065 proftime_T *tm, /* timeout limit or NULL */
4066 int *timed_out) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004067{
Bram Moolenaar0270f382018-07-17 05:43:58 +02004068 rex.input = rex.line + col;
4069 rex.need_clear_subexpr = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004070#ifdef FEAT_SYN_HL
Bram Moolenaar0270f382018-07-17 05:43:58 +02004071 // Clear the external match subpointers if necessary.
4072 rex.need_clear_zsubexpr = (prog->reghasz == REX_SET);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004073#endif
4074
Bram Moolenaar09463262017-06-17 20:55:06 +02004075 if (regmatch(prog->program + 1, tm, timed_out) == 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004076 return 0;
4077
4078 cleanup_subexpr();
4079 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004080 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004081 if (rex.reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004082 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004083 rex.reg_startpos[0].lnum = 0;
4084 rex.reg_startpos[0].col = col;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004085 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004086 if (rex.reg_endpos[0].lnum < 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004087 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004088 rex.reg_endpos[0].lnum = rex.lnum;
4089 rex.reg_endpos[0].col = (int)(rex.input - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004090 }
4091 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004092 /* Use line number of "\ze". */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004093 rex.lnum = rex.reg_endpos[0].lnum;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004094 }
4095 else
4096 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004097 if (rex.reg_startp[0] == NULL)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004098 rex.reg_startp[0] = rex.line + col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004099 if (rex.reg_endp[0] == NULL)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004100 rex.reg_endp[0] = rex.input;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004101 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004102#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004103 /* Package any found \z(...\) matches for export. Default is none. */
4104 unref_extmatch(re_extmatch_out);
4105 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004106
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004107 if (prog->reghasz == REX_SET)
4108 {
4109 int i;
4110
4111 cleanup_zsubexpr();
4112 re_extmatch_out = make_extmatch();
4113 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004114 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004115 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004116 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004117 /* Only accept single line matches. */
4118 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004119 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4120 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004121 re_extmatch_out->matches[i] =
4122 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004123 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004124 reg_endzpos[i].col - reg_startzpos[i].col);
4125 }
4126 else
4127 {
4128 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4129 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004130 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004131 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004132 }
4133 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004134 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004135#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02004136 return 1 + rex.lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004137}
4138
4139#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004140static int reg_prev_class(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004141
Bram Moolenaar071d4272004-06-13 20:20:40 +00004142/*
4143 * Get class of previous character.
4144 */
4145 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004146reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004147{
Bram Moolenaar0270f382018-07-17 05:43:58 +02004148 if (rex.input > rex.line)
4149 return mb_get_class_buf(rex.input - 1
4150 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004151 return -1;
4152}
Bram Moolenaar071d4272004-06-13 20:20:40 +00004153#endif
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004154
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004155static int reg_match_visual(void);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004156
4157/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02004158 * Return TRUE if the current rex.input position matches the Visual area.
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004159 */
4160 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004161reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004162{
4163 pos_T top, bot;
4164 linenr_T lnum;
4165 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004166 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004167 int mode;
4168 colnr_T start, end;
4169 colnr_T start2, end2;
4170 colnr_T cols;
4171
4172 /* Check if the buffer is the current buffer. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004173 if (rex.reg_buf != curbuf || VIsual.lnum == 0)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004174 return FALSE;
4175
4176 if (VIsual_active)
4177 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004178 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004179 {
4180 top = VIsual;
4181 bot = wp->w_cursor;
4182 }
4183 else
4184 {
4185 top = wp->w_cursor;
4186 bot = VIsual;
4187 }
4188 mode = VIsual_mode;
4189 }
4190 else
4191 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004192 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004193 {
4194 top = curbuf->b_visual.vi_start;
4195 bot = curbuf->b_visual.vi_end;
4196 }
4197 else
4198 {
4199 top = curbuf->b_visual.vi_end;
4200 bot = curbuf->b_visual.vi_start;
4201 }
4202 mode = curbuf->b_visual.vi_mode;
4203 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004204 lnum = rex.lnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004205 if (lnum < top.lnum || lnum > bot.lnum)
4206 return FALSE;
4207
4208 if (mode == 'v')
4209 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004210 col = (colnr_T)(rex.input - rex.line);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004211 if ((lnum == top.lnum && col < top.col)
4212 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4213 return FALSE;
4214 }
4215 else if (mode == Ctrl_V)
4216 {
4217 getvvcol(wp, &top, &start, NULL, &end);
4218 getvvcol(wp, &bot, &start2, NULL, &end2);
4219 if (start2 < start)
4220 start = start2;
4221 if (end2 > end)
4222 end = end2;
4223 if (top.col == MAXCOL || bot.col == MAXCOL)
4224 end = MAXCOL;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004225 cols = win_linetabsize(wp, rex.line, (colnr_T)(rex.input - rex.line));
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004226 if (cols < start || cols > end - (*p_sel == 'e'))
4227 return FALSE;
4228 }
4229 return TRUE;
4230}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004231
Bram Moolenaar0270f382018-07-17 05:43:58 +02004232#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004233
4234/*
4235 * The arguments from BRACE_LIMITS are stored here. They are actually local
4236 * to regmatch(), but they are here to reduce the amount of stack space used
4237 * (it can be called recursively many times).
4238 */
4239static long bl_minval;
4240static long bl_maxval;
4241
4242/*
4243 * regmatch - main matching routine
4244 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004245 * Conceptually the strategy is simple: Check to see whether the current node
4246 * matches, push an item onto the regstack and loop to see whether the rest
4247 * matches, and then act accordingly. In practice we make some effort to
4248 * avoid using the regstack, in particular by going through "ordinary" nodes
4249 * (that don't need to know whether the rest of the match failed) by a nested
4250 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004251 *
Bram Moolenaar0270f382018-07-17 05:43:58 +02004252 * Returns TRUE when there is a match. Leaves rex.input and rex.lnum just after
Bram Moolenaar071d4272004-06-13 20:20:40 +00004253 * the last matched character.
Bram Moolenaar0270f382018-07-17 05:43:58 +02004254 * Returns FALSE when there is no match. Leaves rex.input and rex.lnum in an
Bram Moolenaar071d4272004-06-13 20:20:40 +00004255 * undefined state!
4256 */
4257 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004258regmatch(
Bram Moolenaar09463262017-06-17 20:55:06 +02004259 char_u *scan, /* Current node. */
4260 proftime_T *tm UNUSED, /* timeout limit or NULL */
4261 int *timed_out UNUSED) /* flag set on timeout or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004262{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004263 char_u *next; /* Next node. */
4264 int op;
4265 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004266 regitem_T *rp;
4267 int no;
4268 int status; /* one of the RA_ values: */
4269#define RA_FAIL 1 /* something failed, abort */
4270#define RA_CONT 2 /* continue in inner loop */
4271#define RA_BREAK 3 /* break inner loop */
4272#define RA_MATCH 4 /* successful match */
4273#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar09463262017-06-17 20:55:06 +02004274#ifdef FEAT_RELTIME
4275 int tm_count = 0;
4276#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00004277
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004278 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004279 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004280 regstack.ga_len = 0;
4281 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004282
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004283 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004284 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004285 */
4286 for (;;)
4287 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004288 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4289 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004290 fast_breakcheck();
4291
4292#ifdef DEBUG
4293 if (scan != NULL && regnarrate)
4294 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004295 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004296 mch_errmsg("(\n");
4297 }
4298#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004299
4300 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004301 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004302 * regstack.
4303 */
4304 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004305 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004306 if (got_int || scan == NULL)
4307 {
4308 status = RA_FAIL;
4309 break;
4310 }
Bram Moolenaar09463262017-06-17 20:55:06 +02004311#ifdef FEAT_RELTIME
4312 /* Check for timeout once in a 100 times to avoid overhead. */
4313 if (tm != NULL && ++tm_count == 100)
4314 {
4315 tm_count = 0;
4316 if (profile_passed_limit(tm))
4317 {
4318 if (timed_out != NULL)
4319 *timed_out = TRUE;
4320 status = RA_FAIL;
4321 break;
4322 }
4323 }
4324#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004325 status = RA_CONT;
4326
Bram Moolenaar071d4272004-06-13 20:20:40 +00004327#ifdef DEBUG
4328 if (regnarrate)
4329 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004330 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004331 mch_errmsg("...\n");
4332# ifdef FEAT_SYN_HL
4333 if (re_extmatch_in != NULL)
4334 {
4335 int i;
4336
4337 mch_errmsg(_("External submatches:\n"));
4338 for (i = 0; i < NSUBEXP; i++)
4339 {
4340 mch_errmsg(" \"");
4341 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004342 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004343 mch_errmsg("\"\n");
4344 }
4345 }
4346# endif
4347 }
4348#endif
4349 next = regnext(scan);
4350
4351 op = OP(scan);
4352 /* Check for character class with NL added. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004353 if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
Bram Moolenaar0270f382018-07-17 05:43:58 +02004354 && *rex.input == NUL && rex.lnum <= rex.reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004355 {
4356 reg_nextline();
4357 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004358 else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n')
Bram Moolenaar071d4272004-06-13 20:20:40 +00004359 {
4360 ADVANCE_REGINPUT();
4361 }
4362 else
4363 {
4364 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004365 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004366#ifdef FEAT_MBYTE
4367 if (has_mbyte)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004368 c = (*mb_ptr2char)(rex.input);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004369 else
4370#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02004371 c = *rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004372 switch (op)
4373 {
4374 case BOL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004375 if (rex.input != rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004376 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004377 break;
4378
4379 case EOL:
4380 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004381 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004382 break;
4383
4384 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004385 /* We're not at the beginning of the file when below the first
4386 * line where we started, not at the start of the line or we
4387 * didn't start at the first line of the buffer. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004388 if (rex.lnum != 0 || rex.input != rex.line
Bram Moolenaar6100d022016-10-02 16:51:57 +02004389 || (REG_MULTI && rex.reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004390 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004391 break;
4392
4393 case RE_EOF:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004394 if (rex.lnum != rex.reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004395 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004396 break;
4397
4398 case CURSOR:
4399 /* Check if the buffer is in a window and compare the
Bram Moolenaar6100d022016-10-02 16:51:57 +02004400 * rex.reg_win->w_cursor position to the match position. */
4401 if (rex.reg_win == NULL
Bram Moolenaar0270f382018-07-17 05:43:58 +02004402 || (rex.lnum + rex.reg_firstlnum
Bram Moolenaar6100d022016-10-02 16:51:57 +02004403 != rex.reg_win->w_cursor.lnum)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004404 || ((colnr_T)(rex.input - rex.line)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004405 != rex.reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004406 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004407 break;
4408
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004409 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004410 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004411 {
4412 int mark = OPERAND(scan)[0];
4413 int cmp = OPERAND(scan)[1];
4414 pos_T *pos;
4415
Bram Moolenaar6100d022016-10-02 16:51:57 +02004416 pos = getmark_buf(rex.reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004417 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004418 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004419 || (pos->lnum == rex.lnum + rex.reg_firstlnum
4420 ? (pos->col == (colnr_T)(rex.input - rex.line)
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004421 ? (cmp == '<' || cmp == '>')
Bram Moolenaar0270f382018-07-17 05:43:58 +02004422 : (pos->col < (colnr_T)(rex.input - rex.line)
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004423 ? cmp != '>'
4424 : cmp != '<'))
Bram Moolenaar0270f382018-07-17 05:43:58 +02004425 : (pos->lnum < rex.lnum + rex.reg_firstlnum
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004426 ? cmp != '>'
4427 : cmp != '<')))
4428 status = RA_NOMATCH;
4429 }
4430 break;
4431
4432 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004433 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004434 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004435 break;
4436
Bram Moolenaar071d4272004-06-13 20:20:40 +00004437 case RE_LNUM:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004438 if (!REG_MULTI || !re_num_cmp((long_u)(rex.lnum + rex.reg_firstlnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00004439 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004440 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004441 break;
4442
4443 case RE_COL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004444 if (!re_num_cmp((long_u)(rex.input - rex.line) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004445 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004446 break;
4447
4448 case RE_VCOL:
4449 if (!re_num_cmp((long_u)win_linetabsize(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004450 rex.reg_win == NULL ? curwin : rex.reg_win,
Bram Moolenaar0270f382018-07-17 05:43:58 +02004451 rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004452 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004453 break;
4454
Bram Moolenaar0270f382018-07-17 05:43:58 +02004455 case BOW: /* \<word; rex.input points to w */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004456 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004457 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004458#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004459 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004460 {
4461 int this_class;
4462
4463 /* Get class of current and previous char (if it exists). */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004464 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004465 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004466 status = RA_NOMATCH; /* not on a word at all */
4467 else if (reg_prev_class() == this_class)
4468 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004469 }
4470#endif
4471 else
4472 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004473 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line
4474 && vim_iswordc_buf(rex.input[-1], rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004475 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004476 }
4477 break;
4478
Bram Moolenaar0270f382018-07-17 05:43:58 +02004479 case EOW: /* word\>; rex.input points after d */
4480 if (rex.input == rex.line) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004481 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004482#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004483 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004484 {
4485 int this_class, prev_class;
4486
4487 /* Get class of current and previous char (if it exists). */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004488 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004489 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004490 if (this_class == prev_class
4491 || prev_class == 0 || prev_class == 1)
4492 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004493 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004494#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004495 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004496 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004497 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf)
4498 || (rex.input[0] != NUL
Bram Moolenaar6100d022016-10-02 16:51:57 +02004499 && vim_iswordc_buf(c, rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004500 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004501 }
4502 break; /* Matched with EOW */
4503
4504 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004505 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004506 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004507 status = RA_NOMATCH;
4508 else
4509 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004510 break;
4511
4512 case IDENT:
4513 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004514 status = RA_NOMATCH;
4515 else
4516 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004517 break;
4518
4519 case SIDENT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004520 if (VIM_ISDIGIT(*rex.input) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004521 status = RA_NOMATCH;
4522 else
4523 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004524 break;
4525
4526 case KWORD:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004527 if (!vim_iswordp_buf(rex.input, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004528 status = RA_NOMATCH;
4529 else
4530 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004531 break;
4532
4533 case SKWORD:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004534 if (VIM_ISDIGIT(*rex.input)
4535 || !vim_iswordp_buf(rex.input, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004536 status = RA_NOMATCH;
4537 else
4538 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004539 break;
4540
4541 case FNAME:
4542 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004543 status = RA_NOMATCH;
4544 else
4545 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004546 break;
4547
4548 case SFNAME:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004549 if (VIM_ISDIGIT(*rex.input) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004550 status = RA_NOMATCH;
4551 else
4552 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004553 break;
4554
4555 case PRINT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004556 if (!vim_isprintc(PTR2CHAR(rex.input)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004557 status = RA_NOMATCH;
4558 else
4559 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004560 break;
4561
4562 case SPRINT:
Bram Moolenaar0270f382018-07-17 05:43:58 +02004563 if (VIM_ISDIGIT(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004564 status = RA_NOMATCH;
4565 else
4566 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004567 break;
4568
4569 case WHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004570 if (!VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004571 status = RA_NOMATCH;
4572 else
4573 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004574 break;
4575
4576 case NWHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004577 if (c == NUL || VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004578 status = RA_NOMATCH;
4579 else
4580 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004581 break;
4582
4583 case DIGIT:
4584 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004585 status = RA_NOMATCH;
4586 else
4587 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004588 break;
4589
4590 case NDIGIT:
4591 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004592 status = RA_NOMATCH;
4593 else
4594 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004595 break;
4596
4597 case HEX:
4598 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004599 status = RA_NOMATCH;
4600 else
4601 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004602 break;
4603
4604 case NHEX:
4605 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004606 status = RA_NOMATCH;
4607 else
4608 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004609 break;
4610
4611 case OCTAL:
4612 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004613 status = RA_NOMATCH;
4614 else
4615 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004616 break;
4617
4618 case NOCTAL:
4619 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004620 status = RA_NOMATCH;
4621 else
4622 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004623 break;
4624
4625 case WORD:
4626 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004627 status = RA_NOMATCH;
4628 else
4629 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004630 break;
4631
4632 case NWORD:
4633 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004634 status = RA_NOMATCH;
4635 else
4636 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004637 break;
4638
4639 case HEAD:
4640 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004641 status = RA_NOMATCH;
4642 else
4643 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004644 break;
4645
4646 case NHEAD:
4647 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004648 status = RA_NOMATCH;
4649 else
4650 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004651 break;
4652
4653 case ALPHA:
4654 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004655 status = RA_NOMATCH;
4656 else
4657 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004658 break;
4659
4660 case NALPHA:
4661 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004662 status = RA_NOMATCH;
4663 else
4664 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004665 break;
4666
4667 case LOWER:
4668 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004669 status = RA_NOMATCH;
4670 else
4671 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004672 break;
4673
4674 case NLOWER:
4675 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004676 status = RA_NOMATCH;
4677 else
4678 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004679 break;
4680
4681 case UPPER:
4682 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004683 status = RA_NOMATCH;
4684 else
4685 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004686 break;
4687
4688 case NUPPER:
4689 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004690 status = RA_NOMATCH;
4691 else
4692 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004693 break;
4694
4695 case EXACTLY:
4696 {
4697 int len;
4698 char_u *opnd;
4699
4700 opnd = OPERAND(scan);
4701 /* Inline the first byte, for speed. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004702 if (*opnd != *rex.input
Bram Moolenaar6100d022016-10-02 16:51:57 +02004703 && (!rex.reg_ic || (
Bram Moolenaar071d4272004-06-13 20:20:40 +00004704#ifdef FEAT_MBYTE
4705 !enc_utf8 &&
4706#endif
Bram Moolenaar0270f382018-07-17 05:43:58 +02004707 MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004708 status = RA_NOMATCH;
4709 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004710 {
4711 /* match empty string always works; happens when "~" is
4712 * empty. */
4713 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004714 else
4715 {
4716 if (opnd[1] == NUL
Bram Moolenaar071d4272004-06-13 20:20:40 +00004717#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02004718 && !(enc_utf8 && rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004719#endif
4720 )
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004721 {
4722 len = 1; /* matched a single byte above */
4723 }
4724 else
4725 {
4726 /* Need to match first byte again for multi-byte. */
4727 len = (int)STRLEN(opnd);
Bram Moolenaar0270f382018-07-17 05:43:58 +02004728 if (cstrncmp(opnd, rex.input, &len) != 0)
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004729 status = RA_NOMATCH;
4730 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004731#ifdef FEAT_MBYTE
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004732 /* Check for following composing character, unless %C
4733 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004734 if (status != RA_NOMATCH
4735 && enc_utf8
Bram Moolenaar0270f382018-07-17 05:43:58 +02004736 && UTF_COMPOSINGLIKE(rex.input, rex.input + len)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004737 && !rex.reg_icombine
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004738 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004739 {
4740 /* raaron: This code makes a composing character get
4741 * ignored, which is the correct behavior (sometimes)
4742 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004743 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004744 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004745#endif
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004746 if (status != RA_NOMATCH)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004747 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004748 }
4749 }
4750 break;
4751
4752 case ANYOF:
4753 case ANYBUT:
4754 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004755 status = RA_NOMATCH;
4756 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4757 status = RA_NOMATCH;
4758 else
4759 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004760 break;
4761
4762#ifdef FEAT_MBYTE
4763 case MULTIBYTECODE:
4764 if (has_mbyte)
4765 {
4766 int i, len;
4767 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004768 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004769
4770 opnd = OPERAND(scan);
4771 /* Safety check (just in case 'encoding' was changed since
4772 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004773 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004774 {
4775 status = RA_NOMATCH;
4776 break;
4777 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004778 if (enc_utf8)
Bram Moolenaarace95982017-03-29 17:30:27 +02004779 opndc = utf_ptr2char(opnd);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004780 if (enc_utf8 && utf_iscomposing(opndc))
4781 {
4782 /* When only a composing char is given match at any
4783 * position where that composing char appears. */
4784 status = RA_NOMATCH;
Bram Moolenaar0270f382018-07-17 05:43:58 +02004785 for (i = 0; rex.input[i] != NUL;
4786 i += utf_ptr2len(rex.input + i))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004787 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02004788 inpc = utf_ptr2char(rex.input + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004789 if (!utf_iscomposing(inpc))
4790 {
4791 if (i > 0)
4792 break;
4793 }
4794 else if (opndc == inpc)
4795 {
4796 /* Include all following composing chars. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004797 len = i + utfc_ptr2len(rex.input + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004798 status = RA_MATCH;
4799 break;
4800 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004801 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004802 }
4803 else
4804 for (i = 0; i < len; ++i)
Bram Moolenaar0270f382018-07-17 05:43:58 +02004805 if (opnd[i] != rex.input[i])
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004806 {
4807 status = RA_NOMATCH;
4808 break;
4809 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02004810 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004811 }
4812 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004813 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004814 break;
4815#endif
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004816 case RE_COMPOSING:
4817#ifdef FEAT_MBYTE
4818 if (enc_utf8)
4819 {
4820 /* Skip composing characters. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02004821 while (utf_iscomposing(utf_ptr2char(rex.input)))
4822 MB_CPTR_ADV(rex.input);
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004823 }
4824#endif
4825 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004826
4827 case NOTHING:
4828 break;
4829
4830 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004831 {
4832 int i;
4833 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004834
Bram Moolenaar582fd852005-03-28 20:58:01 +00004835 /*
4836 * When we run into BACK we need to check if we don't keep
4837 * looping without matching any input. The second and later
4838 * times a BACK is encountered it fails if the input is still
4839 * at the same position as the previous time.
4840 * The positions are stored in "backpos" and found by the
4841 * current value of "scan", the position in the RE program.
4842 */
4843 bp = (backpos_T *)backpos.ga_data;
4844 for (i = 0; i < backpos.ga_len; ++i)
4845 if (bp[i].bp_scan == scan)
4846 break;
4847 if (i == backpos.ga_len)
4848 {
4849 /* First time at this BACK, make room to store the pos. */
4850 if (ga_grow(&backpos, 1) == FAIL)
4851 status = RA_FAIL;
4852 else
4853 {
4854 /* get "ga_data" again, it may have changed */
4855 bp = (backpos_T *)backpos.ga_data;
4856 bp[i].bp_scan = scan;
4857 ++backpos.ga_len;
4858 }
4859 }
4860 else if (reg_save_equal(&bp[i].bp_pos))
4861 /* Still at same position as last time, fail. */
4862 status = RA_NOMATCH;
4863
4864 if (status != RA_FAIL && status != RA_NOMATCH)
4865 reg_save(&bp[i].bp_pos, &backpos);
4866 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004867 break;
4868
Bram Moolenaar071d4272004-06-13 20:20:40 +00004869 case MOPEN + 0: /* Match start: \zs */
4870 case MOPEN + 1: /* \( */
4871 case MOPEN + 2:
4872 case MOPEN + 3:
4873 case MOPEN + 4:
4874 case MOPEN + 5:
4875 case MOPEN + 6:
4876 case MOPEN + 7:
4877 case MOPEN + 8:
4878 case MOPEN + 9:
4879 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004880 no = op - MOPEN;
4881 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004882 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004883 if (rp == NULL)
4884 status = RA_FAIL;
4885 else
4886 {
4887 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004888 save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
4889 &rex.reg_startp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004890 /* We simply continue and handle the result when done. */
4891 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004892 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004893 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004894
4895 case NOPEN: /* \%( */
4896 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004897 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004898 status = RA_FAIL;
4899 /* We simply continue and handle the result when done. */
4900 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004901
4902#ifdef FEAT_SYN_HL
4903 case ZOPEN + 1:
4904 case ZOPEN + 2:
4905 case ZOPEN + 3:
4906 case ZOPEN + 4:
4907 case ZOPEN + 5:
4908 case ZOPEN + 6:
4909 case ZOPEN + 7:
4910 case ZOPEN + 8:
4911 case ZOPEN + 9:
4912 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004913 no = op - ZOPEN;
4914 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004915 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004916 if (rp == NULL)
4917 status = RA_FAIL;
4918 else
4919 {
4920 rp->rs_no = no;
4921 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4922 &reg_startzp[no]);
4923 /* We simply continue and handle the result when done. */
4924 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004925 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004926 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004927#endif
4928
4929 case MCLOSE + 0: /* Match end: \ze */
4930 case MCLOSE + 1: /* \) */
4931 case MCLOSE + 2:
4932 case MCLOSE + 3:
4933 case MCLOSE + 4:
4934 case MCLOSE + 5:
4935 case MCLOSE + 6:
4936 case MCLOSE + 7:
4937 case MCLOSE + 8:
4938 case MCLOSE + 9:
4939 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004940 no = op - MCLOSE;
4941 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004942 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004943 if (rp == NULL)
4944 status = RA_FAIL;
4945 else
4946 {
4947 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004948 save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
4949 &rex.reg_endp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004950 /* We simply continue and handle the result when done. */
4951 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004952 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004953 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004954
4955#ifdef FEAT_SYN_HL
4956 case ZCLOSE + 1: /* \) after \z( */
4957 case ZCLOSE + 2:
4958 case ZCLOSE + 3:
4959 case ZCLOSE + 4:
4960 case ZCLOSE + 5:
4961 case ZCLOSE + 6:
4962 case ZCLOSE + 7:
4963 case ZCLOSE + 8:
4964 case ZCLOSE + 9:
4965 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004966 no = op - ZCLOSE;
4967 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004968 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004969 if (rp == NULL)
4970 status = RA_FAIL;
4971 else
4972 {
4973 rp->rs_no = no;
4974 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4975 &reg_endzp[no]);
4976 /* We simply continue and handle the result when done. */
4977 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004978 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004979 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004980#endif
4981
4982 case BACKREF + 1:
4983 case BACKREF + 2:
4984 case BACKREF + 3:
4985 case BACKREF + 4:
4986 case BACKREF + 5:
4987 case BACKREF + 6:
4988 case BACKREF + 7:
4989 case BACKREF + 8:
4990 case BACKREF + 9:
4991 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004992 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004993
4994 no = op - BACKREF;
4995 cleanup_subexpr();
4996 if (!REG_MULTI) /* Single-line regexp */
4997 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004998 if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004999 {
5000 /* Backref was not set: Match an empty string. */
5001 len = 0;
5002 }
5003 else
5004 {
5005 /* Compare current input with back-ref in the same
5006 * line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02005007 len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
Bram Moolenaar0270f382018-07-17 05:43:58 +02005008 if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005009 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005010 }
5011 }
5012 else /* Multi-line regexp */
5013 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005014 if (rex.reg_startpos[no].lnum < 0
5015 || rex.reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005016 {
5017 /* Backref was not set: Match an empty string. */
5018 len = 0;
5019 }
5020 else
5021 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005022 if (rex.reg_startpos[no].lnum == rex.lnum
5023 && rex.reg_endpos[no].lnum == rex.lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005024 {
5025 /* Compare back-ref within the current line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02005026 len = rex.reg_endpos[no].col
5027 - rex.reg_startpos[no].col;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005028 if (cstrncmp(rex.line + rex.reg_startpos[no].col,
5029 rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005030 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005031 }
5032 else
5033 {
5034 /* Messy situation: Need to compare between two
5035 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005036 int r = match_with_backref(
Bram Moolenaar6100d022016-10-02 16:51:57 +02005037 rex.reg_startpos[no].lnum,
5038 rex.reg_startpos[no].col,
5039 rex.reg_endpos[no].lnum,
5040 rex.reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02005041 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005042
5043 if (r != RA_MATCH)
5044 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005045 }
5046 }
5047 }
5048
5049 /* Matched the backref, skip over it. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005050 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005051 }
5052 break;
5053
5054#ifdef FEAT_SYN_HL
5055 case ZREF + 1:
5056 case ZREF + 2:
5057 case ZREF + 3:
5058 case ZREF + 4:
5059 case ZREF + 5:
5060 case ZREF + 6:
5061 case ZREF + 7:
5062 case ZREF + 8:
5063 case ZREF + 9:
5064 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005065 int len;
5066
5067 cleanup_zsubexpr();
5068 no = op - ZREF;
5069 if (re_extmatch_in != NULL
5070 && re_extmatch_in->matches[no] != NULL)
5071 {
5072 len = (int)STRLEN(re_extmatch_in->matches[no]);
5073 if (cstrncmp(re_extmatch_in->matches[no],
Bram Moolenaar0270f382018-07-17 05:43:58 +02005074 rex.input, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005075 status = RA_NOMATCH;
5076 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02005077 rex.input += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005078 }
5079 else
5080 {
5081 /* Backref was not set: Match an empty string. */
5082 }
5083 }
5084 break;
5085#endif
5086
5087 case BRANCH:
5088 {
5089 if (OP(next) != BRANCH) /* No choice. */
5090 next = OPERAND(scan); /* Avoid recursion. */
5091 else
5092 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005093 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005094 if (rp == NULL)
5095 status = RA_FAIL;
5096 else
5097 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005098 }
5099 }
5100 break;
5101
5102 case BRACE_LIMITS:
5103 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005104 if (OP(next) == BRACE_SIMPLE)
5105 {
5106 bl_minval = OPERAND_MIN(scan);
5107 bl_maxval = OPERAND_MAX(scan);
5108 }
5109 else if (OP(next) >= BRACE_COMPLEX
5110 && OP(next) < BRACE_COMPLEX + 10)
5111 {
5112 no = OP(next) - BRACE_COMPLEX;
5113 brace_min[no] = OPERAND_MIN(scan);
5114 brace_max[no] = OPERAND_MAX(scan);
5115 brace_count[no] = 0;
5116 }
5117 else
5118 {
Bram Moolenaar95f09602016-11-10 20:01:45 +01005119 internal_error("BRACE_LIMITS");
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005120 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005121 }
5122 }
5123 break;
5124
5125 case BRACE_COMPLEX + 0:
5126 case BRACE_COMPLEX + 1:
5127 case BRACE_COMPLEX + 2:
5128 case BRACE_COMPLEX + 3:
5129 case BRACE_COMPLEX + 4:
5130 case BRACE_COMPLEX + 5:
5131 case BRACE_COMPLEX + 6:
5132 case BRACE_COMPLEX + 7:
5133 case BRACE_COMPLEX + 8:
5134 case BRACE_COMPLEX + 9:
5135 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005136 no = op - BRACE_COMPLEX;
5137 ++brace_count[no];
5138
5139 /* If not matched enough times yet, try one more */
5140 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005141 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005142 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005143 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005144 if (rp == NULL)
5145 status = RA_FAIL;
5146 else
5147 {
5148 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005149 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005150 next = OPERAND(scan);
5151 /* We continue and handle the result when done. */
5152 }
5153 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005154 }
5155
5156 /* If matched enough times, may try matching some more */
5157 if (brace_min[no] <= brace_max[no])
5158 {
5159 /* Range is the normal way around, use longest match */
5160 if (brace_count[no] <= brace_max[no])
5161 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005162 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005163 if (rp == NULL)
5164 status = RA_FAIL;
5165 else
5166 {
5167 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005168 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005169 next = OPERAND(scan);
5170 /* We continue and handle the result when done. */
5171 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005172 }
5173 }
5174 else
5175 {
5176 /* Range is backwards, use shortest match first */
5177 if (brace_count[no] <= brace_min[no])
5178 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005179 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005180 if (rp == NULL)
5181 status = RA_FAIL;
5182 else
5183 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005184 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005185 /* We continue and handle the result when done. */
5186 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005187 }
5188 }
5189 }
5190 break;
5191
5192 case BRACE_SIMPLE:
5193 case STAR:
5194 case PLUS:
5195 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005196 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005197
5198 /*
5199 * Lookahead to avoid useless match attempts when we know
5200 * what character comes next.
5201 */
5202 if (OP(next) == EXACTLY)
5203 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005204 rst.nextb = *OPERAND(next);
Bram Moolenaar6100d022016-10-02 16:51:57 +02005205 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005206 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005207 if (MB_ISUPPER(rst.nextb))
5208 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005209 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005210 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005211 }
5212 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005213 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005214 }
5215 else
5216 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005217 rst.nextb = NUL;
5218 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005219 }
5220 if (op != BRACE_SIMPLE)
5221 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005222 rst.minval = (op == STAR) ? 0 : 1;
5223 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005224 }
5225 else
5226 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005227 rst.minval = bl_minval;
5228 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005229 }
5230
5231 /*
5232 * When maxval > minval, try matching as much as possible, up
5233 * to maxval. When maxval < minval, try matching at least the
5234 * minimal number (since the range is backwards, that's also
5235 * maxval!).
5236 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005237 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005238 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005239 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005240 status = RA_FAIL;
5241 break;
5242 }
5243 if (rst.minval <= rst.maxval
5244 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5245 {
5246 /* It could match. Prepare for trying to match what
5247 * follows. The code is below. Parameters are stored in
5248 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005249 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005250 {
5251 EMSG(_(e_maxmempat));
5252 status = RA_FAIL;
5253 }
5254 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005255 status = RA_FAIL;
5256 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005257 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005258 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005259 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005260 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005261 if (rp == NULL)
5262 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005263 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005264 {
5265 *(((regstar_T *)rp) - 1) = rst;
5266 status = RA_BREAK; /* skip the restore bits */
5267 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005268 }
5269 }
5270 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005271 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005272
Bram Moolenaar071d4272004-06-13 20:20:40 +00005273 }
5274 break;
5275
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005276 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005277 case MATCH:
5278 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005279 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005280 if (rp == NULL)
5281 status = RA_FAIL;
5282 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005283 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005284 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005285 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005286 next = OPERAND(scan);
5287 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005288 }
5289 break;
5290
5291 case BEHIND:
5292 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005293 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005294 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005295 {
5296 EMSG(_(e_maxmempat));
5297 status = RA_FAIL;
5298 }
5299 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005300 status = RA_FAIL;
5301 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005302 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005303 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005304 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005305 if (rp == NULL)
5306 status = RA_FAIL;
5307 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005308 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005309 /* Need to save the subexpr to be able to restore them
5310 * when there is a match but we don't use it. */
5311 save_subexpr(((regbehind_T *)rp) - 1);
5312
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005313 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005314 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005315 /* First try if what follows matches. If it does then we
5316 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005317 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005318 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005319 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005320
5321 case BHPOS:
5322 if (REG_MULTI)
5323 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005324 if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line)
5325 || behind_pos.rs_u.pos.lnum != rex.lnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005326 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005327 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02005328 else if (behind_pos.rs_u.ptr != rex.input)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005329 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005330 break;
5331
5332 case NEWL:
Bram Moolenaar0270f382018-07-17 05:43:58 +02005333 if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005334 || rex.reg_line_lbr)
5335 && (c != '\n' || !rex.reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005336 status = RA_NOMATCH;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005337 else if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005338 ADVANCE_REGINPUT();
5339 else
5340 reg_nextline();
5341 break;
5342
5343 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005344 status = RA_MATCH; /* Success! */
5345 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005346
5347 default:
5348 EMSG(_(e_re_corr));
5349#ifdef DEBUG
5350 printf("Illegal op code %d\n", op);
5351#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005352 status = RA_FAIL;
5353 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005354 }
5355 }
5356
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005357 /* If we can't continue sequentially, break the inner loop. */
5358 if (status != RA_CONT)
5359 break;
5360
5361 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005362 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005363
5364 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005365
5366 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005367 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005368 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005369 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005370 while (regstack.ga_len > 0 && status != RA_FAIL)
5371 {
5372 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5373 switch (rp->rs_state)
5374 {
5375 case RS_NOPEN:
5376 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005377 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005378 break;
5379
5380 case RS_MOPEN:
5381 /* Pop the state. Restore pointers when there is no match. */
5382 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005383 restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
5384 &rex.reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005385 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005386 break;
5387
5388#ifdef FEAT_SYN_HL
5389 case RS_ZOPEN:
5390 /* Pop the state. Restore pointers when there is no match. */
5391 if (status == RA_NOMATCH)
5392 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5393 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005394 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005395 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005396#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005397
5398 case RS_MCLOSE:
5399 /* Pop the state. Restore pointers when there is no match. */
5400 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005401 restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
5402 &rex.reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005403 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005404 break;
5405
5406#ifdef FEAT_SYN_HL
5407 case RS_ZCLOSE:
5408 /* Pop the state. Restore pointers when there is no match. */
5409 if (status == RA_NOMATCH)
5410 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5411 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005412 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005413 break;
5414#endif
5415
5416 case RS_BRANCH:
5417 if (status == RA_MATCH)
5418 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005419 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005420 else
5421 {
5422 if (status != RA_BREAK)
5423 {
5424 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005425 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005426 scan = rp->rs_scan;
5427 }
5428 if (scan == NULL || OP(scan) != BRANCH)
5429 {
5430 /* no more branches, didn't find a match */
5431 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005432 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005433 }
5434 else
5435 {
5436 /* Prepare to try a branch. */
5437 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005438 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005439 scan = OPERAND(scan);
5440 }
5441 }
5442 break;
5443
5444 case RS_BRCPLX_MORE:
5445 /* Pop the state. Restore pointers when there is no match. */
5446 if (status == RA_NOMATCH)
5447 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005448 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005449 --brace_count[rp->rs_no]; /* decrement match count */
5450 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005451 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005452 break;
5453
5454 case RS_BRCPLX_LONG:
5455 /* Pop the state. Restore pointers when there is no match. */
5456 if (status == RA_NOMATCH)
5457 {
5458 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005459 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005460 --brace_count[rp->rs_no];
5461 /* continue with the items after "\{}" */
5462 status = RA_CONT;
5463 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005464 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005465 if (status == RA_CONT)
5466 scan = regnext(scan);
5467 break;
5468
5469 case RS_BRCPLX_SHORT:
5470 /* Pop the state. Restore pointers when there is no match. */
5471 if (status == RA_NOMATCH)
5472 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005473 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005474 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005475 if (status == RA_NOMATCH)
5476 {
5477 scan = OPERAND(scan);
5478 status = RA_CONT;
5479 }
5480 break;
5481
5482 case RS_NOMATCH:
5483 /* Pop the state. If the operand matches for NOMATCH or
5484 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5485 * except for SUBPAT, and continue with the next item. */
5486 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5487 status = RA_NOMATCH;
5488 else
5489 {
5490 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005491 if (rp->rs_no != SUBPAT) /* zero-width */
5492 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005493 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005494 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005495 if (status == RA_CONT)
5496 scan = regnext(scan);
5497 break;
5498
5499 case RS_BEHIND1:
5500 if (status == RA_NOMATCH)
5501 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005502 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005503 regstack.ga_len -= sizeof(regbehind_T);
5504 }
5505 else
5506 {
5507 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5508 * the behind part does (not) match before the current
5509 * position in the input. This must be done at every
5510 * position in the input and checking if the match ends at
5511 * the current position. */
5512
5513 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005514 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005515
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005516 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005517 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005518 * result, hitting the start of the line or the previous
5519 * line (for multi-line matching).
5520 * Set behind_pos to where the match should end, BHPOS
5521 * will match it. Save the current value. */
5522 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5523 behind_pos = rp->rs_un.regsave;
5524
5525 rp->rs_state = RS_BEHIND2;
5526
Bram Moolenaar582fd852005-03-28 20:58:01 +00005527 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005528 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005529 }
5530 break;
5531
5532 case RS_BEHIND2:
5533 /*
5534 * Looping for BEHIND / NOBEHIND match.
5535 */
5536 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5537 {
5538 /* found a match that ends where "next" started */
5539 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5540 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005541 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5542 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005543 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005544 {
5545 /* But we didn't want a match. Need to restore the
5546 * subexpr, because what follows matched, so they have
5547 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005548 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005549 restore_subexpr(((regbehind_T *)rp) - 1);
5550 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005551 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005552 regstack.ga_len -= sizeof(regbehind_T);
5553 }
5554 else
5555 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005556 long limit;
5557
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005558 /* No match or a match that doesn't end where we want it: Go
5559 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005560 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005561 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005562 if (REG_MULTI)
5563 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005564 if (limit > 0
5565 && ((rp->rs_un.regsave.rs_u.pos.lnum
5566 < behind_pos.rs_u.pos.lnum
Bram Moolenaar0270f382018-07-17 05:43:58 +02005567 ? (colnr_T)STRLEN(rex.line)
Bram Moolenaar61602c52013-06-01 19:54:43 +02005568 : behind_pos.rs_u.pos.col)
5569 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5570 no = FAIL;
5571 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005572 {
5573 if (rp->rs_un.regsave.rs_u.pos.lnum
5574 < behind_pos.rs_u.pos.lnum
5575 || reg_getline(
5576 --rp->rs_un.regsave.rs_u.pos.lnum)
5577 == NULL)
5578 no = FAIL;
5579 else
5580 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005581 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005582 rp->rs_un.regsave.rs_u.pos.col =
Bram Moolenaar0270f382018-07-17 05:43:58 +02005583 (colnr_T)STRLEN(rex.line);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005584 }
5585 }
5586 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005587 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005588#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005589 if (has_mbyte)
Bram Moolenaarbc197192018-02-13 16:35:06 +01005590 {
5591 char_u *line =
5592 reg_getline(behind_pos.rs_u.pos.lnum);
5593
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005594 rp->rs_un.regsave.rs_u.pos.col -=
Bram Moolenaarbc197192018-02-13 16:35:06 +01005595 (*mb_head_off)(line, line
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005596 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaarbc197192018-02-13 16:35:06 +01005597 }
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005598 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005599#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005600 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005601 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005602 }
5603 else
5604 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005605 if (rp->rs_un.regsave.rs_u.ptr == rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005606 no = FAIL;
5607 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005608 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005609 MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005610 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5611 - rp->rs_un.regsave.rs_u.ptr) > limit)
5612 no = FAIL;
5613 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005614 }
5615 if (no == OK)
5616 {
5617 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005618 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005619 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005620 if (status == RA_MATCH)
5621 {
5622 /* We did match, so subexpr may have been changed,
5623 * need to restore them for the next try. */
5624 status = RA_NOMATCH;
5625 restore_subexpr(((regbehind_T *)rp) - 1);
5626 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005627 }
5628 else
5629 {
5630 /* Can't advance. For NOBEHIND that's a match. */
5631 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5632 if (rp->rs_no == NOBEHIND)
5633 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005634 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5635 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005636 status = RA_MATCH;
5637 }
5638 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005639 {
5640 /* We do want a proper match. Need to restore the
5641 * subexpr if we had a match, because they may have
5642 * been set. */
5643 if (status == RA_MATCH)
5644 {
5645 status = RA_NOMATCH;
5646 restore_subexpr(((regbehind_T *)rp) - 1);
5647 }
5648 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005649 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005650 regstack.ga_len -= sizeof(regbehind_T);
5651 }
5652 }
5653 break;
5654
5655 case RS_STAR_LONG:
5656 case RS_STAR_SHORT:
5657 {
5658 regstar_T *rst = ((regstar_T *)rp) - 1;
5659
5660 if (status == RA_MATCH)
5661 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005662 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005663 regstack.ga_len -= sizeof(regstar_T);
5664 break;
5665 }
5666
5667 /* Tried once already, restore input pointers. */
5668 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005669 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005670
5671 /* Repeat until we found a position where it could match. */
5672 for (;;)
5673 {
5674 if (status != RA_BREAK)
5675 {
5676 /* Tried first position already, advance. */
5677 if (rp->rs_state == RS_STAR_LONG)
5678 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005679 /* Trying for longest match, but couldn't or
5680 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005681 if (--rst->count < rst->minval)
5682 break;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005683 if (rex.input == rex.line)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005684 {
5685 /* backup to last char of previous line */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005686 --rex.lnum;
5687 rex.line = reg_getline(rex.lnum);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005688 /* Just in case regrepeat() didn't count
5689 * right. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005690 if (rex.line == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005691 break;
Bram Moolenaar0270f382018-07-17 05:43:58 +02005692 rex.input = rex.line + STRLEN(rex.line);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005693 fast_breakcheck();
5694 }
5695 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02005696 MB_PTR_BACK(rex.line, rex.input);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005697 }
5698 else
5699 {
5700 /* Range is backwards, use shortest match first.
5701 * Careful: maxval and minval are exchanged!
5702 * Couldn't or didn't match: try advancing one
5703 * char. */
5704 if (rst->count == rst->minval
5705 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5706 break;
5707 ++rst->count;
5708 }
5709 if (got_int)
5710 break;
5711 }
5712 else
5713 status = RA_NOMATCH;
5714
5715 /* If it could match, try it. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02005716 if (rst->nextb == NUL || *rex.input == rst->nextb
5717 || *rex.input == rst->nextb_ic)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005718 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005719 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005720 scan = regnext(rp->rs_scan);
5721 status = RA_CONT;
5722 break;
5723 }
5724 }
5725 if (status != RA_CONT)
5726 {
5727 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005728 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005729 regstack.ga_len -= sizeof(regstar_T);
5730 status = RA_NOMATCH;
5731 }
5732 }
5733 break;
5734 }
5735
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005736 /* If we want to continue the inner loop or didn't pop a state
5737 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005738 if (status == RA_CONT || rp == (regitem_T *)
5739 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5740 break;
5741 }
5742
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005743 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005744 if (status == RA_CONT)
5745 continue;
5746
5747 /*
5748 * If the regstack is empty or something failed we are done.
5749 */
5750 if (regstack.ga_len == 0 || status == RA_FAIL)
5751 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005752 if (scan == NULL)
5753 {
5754 /*
5755 * We get here only if there's trouble -- normally "case END" is
5756 * the terminating point.
5757 */
5758 EMSG(_(e_re_corr));
5759#ifdef DEBUG
5760 printf("Premature EOL\n");
5761#endif
5762 }
5763 return (status == RA_MATCH);
5764 }
5765
5766 } /* End of loop until the regstack is empty. */
5767
5768 /* NOTREACHED */
5769}
5770
5771/*
5772 * Push an item onto the regstack.
5773 * Returns pointer to new item. Returns NULL when out of memory.
5774 */
5775 static regitem_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01005776regstack_push(regstate_T state, char_u *scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005777{
5778 regitem_T *rp;
5779
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005780 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005781 {
5782 EMSG(_(e_maxmempat));
5783 return NULL;
5784 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005785 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005786 return NULL;
5787
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005788 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005789 rp->rs_state = state;
5790 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005791
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005792 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005793 return rp;
5794}
5795
5796/*
5797 * Pop an item from the regstack.
5798 */
5799 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01005800regstack_pop(char_u **scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005801{
5802 regitem_T *rp;
5803
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005804 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005805 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005806
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005807 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005808}
5809
Bram Moolenaar071d4272004-06-13 20:20:40 +00005810/*
5811 * regrepeat - repeatedly match something simple, return how many.
Bram Moolenaar0270f382018-07-17 05:43:58 +02005812 * Advances rex.input (and rex.lnum) to just after the matched chars.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005813 */
5814 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01005815regrepeat(
5816 char_u *p,
5817 long maxcount) /* maximum number of matches allowed */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005818{
5819 long count = 0;
5820 char_u *scan;
5821 char_u *opnd;
5822 int mask;
5823 int testval = 0;
5824
Bram Moolenaar0270f382018-07-17 05:43:58 +02005825 scan = rex.input; /* Make local copy of rex.input for speed. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005826 opnd = OPERAND(p);
5827 switch (OP(p))
5828 {
5829 case ANY:
5830 case ANY + ADD_NL:
5831 while (count < maxcount)
5832 {
5833 /* Matching anything means we continue until end-of-line (or
5834 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5835 while (*scan != NUL && count < maxcount)
5836 {
5837 ++count;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005838 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005839 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02005840 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005841 || rex.reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005842 break;
5843 ++count; /* count the line-break */
5844 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005845 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005846 if (got_int)
5847 break;
5848 }
5849 break;
5850
5851 case IDENT:
5852 case IDENT + ADD_NL:
5853 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005854 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005855 case SIDENT:
5856 case SIDENT + ADD_NL:
5857 while (count < maxcount)
5858 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005859 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005860 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005861 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005862 }
5863 else if (*scan == NUL)
5864 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005865 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005866 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005867 break;
5868 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005869 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005870 if (got_int)
5871 break;
5872 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005873 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005874 ++scan;
5875 else
5876 break;
5877 ++count;
5878 }
5879 break;
5880
5881 case KWORD:
5882 case KWORD + ADD_NL:
5883 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005884 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005885 case SKWORD:
5886 case SKWORD + ADD_NL:
5887 while (count < maxcount)
5888 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005889 if (vim_iswordp_buf(scan, rex.reg_buf)
Bram Moolenaarf813a182013-01-30 13:59:37 +01005890 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005891 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005892 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005893 }
5894 else if (*scan == NUL)
5895 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005896 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005897 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005898 break;
5899 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005900 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005901 if (got_int)
5902 break;
5903 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005904 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005905 ++scan;
5906 else
5907 break;
5908 ++count;
5909 }
5910 break;
5911
5912 case FNAME:
5913 case FNAME + ADD_NL:
5914 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005915 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005916 case SFNAME:
5917 case SFNAME + ADD_NL:
5918 while (count < maxcount)
5919 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005920 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005921 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005922 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005923 }
5924 else if (*scan == NUL)
5925 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005926 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005927 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005928 break;
5929 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005930 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005931 if (got_int)
5932 break;
5933 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005934 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005935 ++scan;
5936 else
5937 break;
5938 ++count;
5939 }
5940 break;
5941
5942 case PRINT:
5943 case PRINT + ADD_NL:
5944 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005945 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005946 case SPRINT:
5947 case SPRINT + ADD_NL:
5948 while (count < maxcount)
5949 {
5950 if (*scan == NUL)
5951 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005952 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005953 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005954 break;
5955 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005956 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005957 if (got_int)
5958 break;
5959 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005960 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5961 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005962 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005963 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005964 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005965 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005966 ++scan;
5967 else
5968 break;
5969 ++count;
5970 }
5971 break;
5972
5973 case WHITE:
5974 case WHITE + ADD_NL:
5975 testval = mask = RI_WHITE;
5976do_class:
5977 while (count < maxcount)
5978 {
5979#ifdef FEAT_MBYTE
5980 int l;
5981#endif
5982 if (*scan == NUL)
5983 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02005984 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02005985 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005986 break;
5987 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02005988 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005989 if (got_int)
5990 break;
5991 }
5992#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005993 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005994 {
5995 if (testval != 0)
5996 break;
5997 scan += l;
5998 }
5999#endif
6000 else if ((class_tab[*scan] & mask) == testval)
6001 ++scan;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006002 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006003 ++scan;
6004 else
6005 break;
6006 ++count;
6007 }
6008 break;
6009
6010 case NWHITE:
6011 case NWHITE + ADD_NL:
6012 mask = RI_WHITE;
6013 goto do_class;
6014 case DIGIT:
6015 case DIGIT + ADD_NL:
6016 testval = mask = RI_DIGIT;
6017 goto do_class;
6018 case NDIGIT:
6019 case NDIGIT + ADD_NL:
6020 mask = RI_DIGIT;
6021 goto do_class;
6022 case HEX:
6023 case HEX + ADD_NL:
6024 testval = mask = RI_HEX;
6025 goto do_class;
6026 case NHEX:
6027 case NHEX + ADD_NL:
6028 mask = RI_HEX;
6029 goto do_class;
6030 case OCTAL:
6031 case OCTAL + ADD_NL:
6032 testval = mask = RI_OCTAL;
6033 goto do_class;
6034 case NOCTAL:
6035 case NOCTAL + ADD_NL:
6036 mask = RI_OCTAL;
6037 goto do_class;
6038 case WORD:
6039 case WORD + ADD_NL:
6040 testval = mask = RI_WORD;
6041 goto do_class;
6042 case NWORD:
6043 case NWORD + ADD_NL:
6044 mask = RI_WORD;
6045 goto do_class;
6046 case HEAD:
6047 case HEAD + ADD_NL:
6048 testval = mask = RI_HEAD;
6049 goto do_class;
6050 case NHEAD:
6051 case NHEAD + ADD_NL:
6052 mask = RI_HEAD;
6053 goto do_class;
6054 case ALPHA:
6055 case ALPHA + ADD_NL:
6056 testval = mask = RI_ALPHA;
6057 goto do_class;
6058 case NALPHA:
6059 case NALPHA + ADD_NL:
6060 mask = RI_ALPHA;
6061 goto do_class;
6062 case LOWER:
6063 case LOWER + ADD_NL:
6064 testval = mask = RI_LOWER;
6065 goto do_class;
6066 case NLOWER:
6067 case NLOWER + ADD_NL:
6068 mask = RI_LOWER;
6069 goto do_class;
6070 case UPPER:
6071 case UPPER + ADD_NL:
6072 testval = mask = RI_UPPER;
6073 goto do_class;
6074 case NUPPER:
6075 case NUPPER + ADD_NL:
6076 mask = RI_UPPER;
6077 goto do_class;
6078
6079 case EXACTLY:
6080 {
6081 int cu, cl;
6082
6083 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006084 * would have been used for it. It does handle single-byte
6085 * characters, such as latin1. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006086 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006087 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006088 cu = MB_TOUPPER(*opnd);
6089 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006090 while (count < maxcount && (*scan == cu || *scan == cl))
6091 {
6092 count++;
6093 scan++;
6094 }
6095 }
6096 else
6097 {
6098 cu = *opnd;
6099 while (count < maxcount && *scan == cu)
6100 {
6101 count++;
6102 scan++;
6103 }
6104 }
6105 break;
6106 }
6107
6108#ifdef FEAT_MBYTE
6109 case MULTIBYTECODE:
6110 {
6111 int i, len, cf = 0;
6112
6113 /* Safety check (just in case 'encoding' was changed since
6114 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006115 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006116 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006117 if (rex.reg_ic && enc_utf8)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006118 cf = utf_fold(utf_ptr2char(opnd));
Bram Moolenaar069dd082015-05-04 09:56:49 +02006119 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006120 {
6121 for (i = 0; i < len; ++i)
6122 if (opnd[i] != scan[i])
6123 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006124 if (i < len && (!rex.reg_ic || !enc_utf8
Bram Moolenaar071d4272004-06-13 20:20:40 +00006125 || utf_fold(utf_ptr2char(scan)) != cf))
6126 break;
6127 scan += len;
6128 ++count;
6129 }
6130 }
6131 }
6132 break;
6133#endif
6134
6135 case ANYOF:
6136 case ANYOF + ADD_NL:
6137 testval = TRUE;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02006138 /* FALLTHROUGH */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006139
6140 case ANYBUT:
6141 case ANYBUT + ADD_NL:
6142 while (count < maxcount)
6143 {
6144#ifdef FEAT_MBYTE
6145 int len;
6146#endif
6147 if (*scan == NUL)
6148 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006149 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02006150 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006151 break;
6152 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02006153 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006154 if (got_int)
6155 break;
6156 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02006157 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006158 ++scan;
6159#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006160 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006161 {
6162 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6163 break;
6164 scan += len;
6165 }
6166#endif
6167 else
6168 {
6169 if ((cstrchr(opnd, *scan) == NULL) == testval)
6170 break;
6171 ++scan;
6172 }
6173 ++count;
6174 }
6175 break;
6176
6177 case NEWL:
6178 while (count < maxcount
Bram Moolenaar0270f382018-07-17 05:43:58 +02006179 && ((*scan == NUL && rex.lnum <= rex.reg_maxline
Bram Moolenaar6100d022016-10-02 16:51:57 +02006180 && !rex.reg_line_lbr && REG_MULTI)
6181 || (*scan == '\n' && rex.reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006182 {
6183 count++;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006184 if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006185 ADVANCE_REGINPUT();
6186 else
6187 reg_nextline();
Bram Moolenaar0270f382018-07-17 05:43:58 +02006188 scan = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006189 if (got_int)
6190 break;
6191 }
6192 break;
6193
6194 default: /* Oh dear. Called inappropriately. */
6195 EMSG(_(e_re_corr));
6196#ifdef DEBUG
6197 printf("Called regrepeat with op code %d\n", OP(p));
6198#endif
6199 break;
6200 }
6201
Bram Moolenaar0270f382018-07-17 05:43:58 +02006202 rex.input = scan;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006203
6204 return (int)count;
6205}
6206
6207/*
6208 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006209 * Returns NULL when calculating size, when there is no next item and when
6210 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006211 */
6212 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006213regnext(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006214{
6215 int offset;
6216
Bram Moolenaard3005802009-11-25 17:21:32 +00006217 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006218 return NULL;
6219
6220 offset = NEXT(p);
6221 if (offset == 0)
6222 return NULL;
6223
Bram Moolenaar582fd852005-03-28 20:58:01 +00006224 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006225 return p - offset;
6226 else
6227 return p + offset;
6228}
6229
6230/*
6231 * Check the regexp program for its magic number.
6232 * Return TRUE if it's wrong.
6233 */
6234 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006235prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006236{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006237 regprog_T *prog;
6238
Bram Moolenaar6100d022016-10-02 16:51:57 +02006239 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006240 if (prog->engine == &nfa_regengine)
6241 /* For NFA matcher we don't check the magic */
6242 return FALSE;
6243
6244 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006245 {
6246 EMSG(_(e_re_corr));
6247 return TRUE;
6248 }
6249 return FALSE;
6250}
6251
6252/*
6253 * Cleanup the subexpressions, if this wasn't done yet.
6254 * This construction is used to clear the subexpressions only when they are
6255 * used (to increase speed).
6256 */
6257 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006258cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006259{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006260 if (rex.need_clear_subexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006261 {
6262 if (REG_MULTI)
6263 {
6264 /* Use 0xff to set lnum to -1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006265 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6266 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006267 }
6268 else
6269 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006270 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6271 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006272 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006273 rex.need_clear_subexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006274 }
6275}
6276
6277#ifdef FEAT_SYN_HL
6278 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006279cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006280{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006281 if (rex.need_clear_zsubexpr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006282 {
6283 if (REG_MULTI)
6284 {
6285 /* Use 0xff to set lnum to -1 */
6286 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6287 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6288 }
6289 else
6290 {
6291 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6292 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6293 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006294 rex.need_clear_zsubexpr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006295 }
6296}
6297#endif
6298
6299/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006300 * Save the current subexpr to "bp", so that they can be restored
6301 * later by restore_subexpr().
6302 */
6303 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006304save_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006305{
6306 int i;
6307
Bram Moolenaar0270f382018-07-17 05:43:58 +02006308 /* When "rex.need_clear_subexpr" is set we don't need to save the values, only
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006309 * remember that this flag needs to be set again when restoring. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006310 bp->save_need_clear_subexpr = rex.need_clear_subexpr;
6311 if (!rex.need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006312 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006313 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006314 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006315 if (REG_MULTI)
6316 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006317 bp->save_start[i].se_u.pos = rex.reg_startpos[i];
6318 bp->save_end[i].se_u.pos = rex.reg_endpos[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006319 }
6320 else
6321 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006322 bp->save_start[i].se_u.ptr = rex.reg_startp[i];
6323 bp->save_end[i].se_u.ptr = rex.reg_endp[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006324 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006325 }
6326 }
6327}
6328
6329/*
6330 * Restore the subexpr from "bp".
6331 */
6332 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006333restore_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006334{
6335 int i;
6336
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006337 /* Only need to restore saved values when they are not to be cleared. */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006338 rex.need_clear_subexpr = bp->save_need_clear_subexpr;
6339 if (!rex.need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006340 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006341 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006342 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006343 if (REG_MULTI)
6344 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006345 rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
6346 rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006347 }
6348 else
6349 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006350 rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
6351 rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006352 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006353 }
6354 }
6355}
6356
6357/*
Bram Moolenaar0270f382018-07-17 05:43:58 +02006358 * Advance rex.lnum, rex.line and rex.input to the next line.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006359 */
6360 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006361reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006362{
Bram Moolenaar0270f382018-07-17 05:43:58 +02006363 rex.line = reg_getline(++rex.lnum);
6364 rex.input = rex.line;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006365 fast_breakcheck();
6366}
6367
6368/*
6369 * Save the input line and position in a regsave_T.
6370 */
6371 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006372reg_save(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006373{
6374 if (REG_MULTI)
6375 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006376 save->rs_u.pos.col = (colnr_T)(rex.input - rex.line);
6377 save->rs_u.pos.lnum = rex.lnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006378 }
6379 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02006380 save->rs_u.ptr = rex.input;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006381 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006382}
6383
6384/*
6385 * Restore the input line and position from a regsave_T.
6386 */
6387 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006388reg_restore(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006389{
6390 if (REG_MULTI)
6391 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006392 if (rex.lnum != save->rs_u.pos.lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006393 {
6394 /* only call reg_getline() when the line number changed to save
6395 * a bit of time */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006396 rex.lnum = save->rs_u.pos.lnum;
6397 rex.line = reg_getline(rex.lnum);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006398 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006399 rex.input = rex.line + save->rs_u.pos.col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006400 }
6401 else
Bram Moolenaar0270f382018-07-17 05:43:58 +02006402 rex.input = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006403 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006404}
6405
6406/*
6407 * Return TRUE if current position is equal to saved position.
6408 */
6409 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006410reg_save_equal(regsave_T *save)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006411{
6412 if (REG_MULTI)
Bram Moolenaar0270f382018-07-17 05:43:58 +02006413 return rex.lnum == save->rs_u.pos.lnum
6414 && rex.input == rex.line + save->rs_u.pos.col;
6415 return rex.input == save->rs_u.ptr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006416}
6417
6418/*
6419 * Tentatively set the sub-expression start to the current position (after
6420 * calling regmatch() they will have changed). Need to save the existing
6421 * values for when there is no match.
6422 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6423 * depending on REG_MULTI.
6424 */
6425 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006426save_se_multi(save_se_T *savep, lpos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006427{
6428 savep->se_u.pos = *posp;
Bram Moolenaar0270f382018-07-17 05:43:58 +02006429 posp->lnum = rex.lnum;
6430 posp->col = (colnr_T)(rex.input - rex.line);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006431}
6432
6433 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006434save_se_one(save_se_T *savep, char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006435{
6436 savep->se_u.ptr = *pp;
Bram Moolenaar0270f382018-07-17 05:43:58 +02006437 *pp = rex.input;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006438}
6439
6440/*
6441 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6442 */
6443 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006444re_num_cmp(long_u val, char_u *scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006445{
6446 long_u n = OPERAND_MIN(scan);
6447
6448 if (OPERAND_CMP(scan) == '>')
6449 return val > n;
6450 if (OPERAND_CMP(scan) == '<')
6451 return val < n;
6452 return val == n;
6453}
6454
Bram Moolenaar580abea2013-06-14 20:31:28 +02006455/*
6456 * Check whether a backreference matches.
6457 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006458 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6459 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006460 */
6461 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006462match_with_backref(
6463 linenr_T start_lnum,
6464 colnr_T start_col,
6465 linenr_T end_lnum,
6466 colnr_T end_col,
6467 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006468{
6469 linenr_T clnum = start_lnum;
6470 colnr_T ccol = start_col;
6471 int len;
6472 char_u *p;
6473
6474 if (bytelen != NULL)
6475 *bytelen = 0;
6476 for (;;)
6477 {
6478 /* Since getting one line may invalidate the other, need to make copy.
6479 * Slow! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006480 if (rex.line != reg_tofree)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006481 {
Bram Moolenaar0270f382018-07-17 05:43:58 +02006482 len = (int)STRLEN(rex.line);
Bram Moolenaar580abea2013-06-14 20:31:28 +02006483 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6484 {
6485 len += 50; /* get some extra */
6486 vim_free(reg_tofree);
6487 reg_tofree = alloc(len);
6488 if (reg_tofree == NULL)
6489 return RA_FAIL; /* out of memory!*/
6490 reg_tofreelen = len;
6491 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02006492 STRCPY(reg_tofree, rex.line);
6493 rex.input = reg_tofree + (rex.input - rex.line);
6494 rex.line = reg_tofree;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006495 }
6496
6497 /* Get the line to compare with. */
6498 p = reg_getline(clnum);
6499 if (clnum == end_lnum)
6500 len = end_col - ccol;
6501 else
6502 len = (int)STRLEN(p + ccol);
6503
Bram Moolenaar0270f382018-07-17 05:43:58 +02006504 if (cstrncmp(p + ccol, rex.input, &len) != 0)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006505 return RA_NOMATCH; /* doesn't match */
6506 if (bytelen != NULL)
6507 *bytelen += len;
6508 if (clnum == end_lnum)
6509 break; /* match and at end! */
Bram Moolenaar0270f382018-07-17 05:43:58 +02006510 if (rex.lnum >= rex.reg_maxline)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006511 return RA_NOMATCH; /* text too short */
6512
6513 /* Advance to next line. */
6514 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006515 if (bytelen != NULL)
6516 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006517 ++clnum;
6518 ccol = 0;
6519 if (got_int)
6520 return RA_FAIL;
6521 }
6522
Bram Moolenaar0270f382018-07-17 05:43:58 +02006523 /* found a match! Note that rex.line may now point to a copy of the line,
Bram Moolenaar580abea2013-06-14 20:31:28 +02006524 * that should not matter. */
6525 return RA_MATCH;
6526}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006527
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006528#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006529
6530/*
6531 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6532 */
6533 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006534regdump(char_u *pattern, bt_regprog_T *r)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006535{
6536 char_u *s;
6537 int op = EXACTLY; /* Arbitrary non-END op. */
6538 char_u *next;
6539 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006540 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006541
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006542#ifdef BT_REGEXP_LOG
6543 f = fopen("bt_regexp_log.log", "a");
6544#else
6545 f = stdout;
6546#endif
6547 if (f == NULL)
6548 return;
6549 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006550
6551 s = r->program + 1;
6552 /*
6553 * Loop until we find the END that isn't before a referred next (an END
6554 * can also appear in a NOMATCH operand).
6555 */
6556 while (op != END || s <= end)
6557 {
6558 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006559 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006560 next = regnext(s);
6561 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006562 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006563 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006564 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006565 if (end < next)
6566 end = next;
6567 if (op == BRACE_LIMITS)
6568 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006569 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006570 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006571 s += 8;
6572 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006573 else if (op == BEHIND || op == NOBEHIND)
6574 {
6575 /* one int */
6576 fprintf(f, " count %ld", OPERAND_MIN(s));
6577 s += 4;
6578 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006579 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6580 {
6581 /* one int plus comperator */
6582 fprintf(f, " count %ld", OPERAND_MIN(s));
6583 s += 5;
6584 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006585 s += 3;
6586 if (op == ANYOF || op == ANYOF + ADD_NL
6587 || op == ANYBUT || op == ANYBUT + ADD_NL
6588 || op == EXACTLY)
6589 {
6590 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006591 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006592 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006593 fprintf(f, "%c", *s++);
6594 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006595 s++;
6596 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006597 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006598 }
6599
6600 /* Header fields of interest. */
6601 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006602 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006603 ? (char *)transchar(r->regstart)
6604 : "multibyte", r->regstart);
6605 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006606 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006607 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006608 fprintf(f, "must have \"%s\"", r->regmust);
6609 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006610
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006611#ifdef BT_REGEXP_LOG
6612 fclose(f);
6613#endif
6614}
6615#endif /* BT_REGEXP_DUMP */
6616
6617#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006618/*
6619 * regprop - printable representation of opcode
6620 */
6621 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006622regprop(char_u *op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006623{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006624 char *p;
6625 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006626
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006627 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006628
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006629 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006630 {
6631 case BOL:
6632 p = "BOL";
6633 break;
6634 case EOL:
6635 p = "EOL";
6636 break;
6637 case RE_BOF:
6638 p = "BOF";
6639 break;
6640 case RE_EOF:
6641 p = "EOF";
6642 break;
6643 case CURSOR:
6644 p = "CURSOR";
6645 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006646 case RE_VISUAL:
6647 p = "RE_VISUAL";
6648 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006649 case RE_LNUM:
6650 p = "RE_LNUM";
6651 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006652 case RE_MARK:
6653 p = "RE_MARK";
6654 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006655 case RE_COL:
6656 p = "RE_COL";
6657 break;
6658 case RE_VCOL:
6659 p = "RE_VCOL";
6660 break;
6661 case BOW:
6662 p = "BOW";
6663 break;
6664 case EOW:
6665 p = "EOW";
6666 break;
6667 case ANY:
6668 p = "ANY";
6669 break;
6670 case ANY + ADD_NL:
6671 p = "ANY+NL";
6672 break;
6673 case ANYOF:
6674 p = "ANYOF";
6675 break;
6676 case ANYOF + ADD_NL:
6677 p = "ANYOF+NL";
6678 break;
6679 case ANYBUT:
6680 p = "ANYBUT";
6681 break;
6682 case ANYBUT + ADD_NL:
6683 p = "ANYBUT+NL";
6684 break;
6685 case IDENT:
6686 p = "IDENT";
6687 break;
6688 case IDENT + ADD_NL:
6689 p = "IDENT+NL";
6690 break;
6691 case SIDENT:
6692 p = "SIDENT";
6693 break;
6694 case SIDENT + ADD_NL:
6695 p = "SIDENT+NL";
6696 break;
6697 case KWORD:
6698 p = "KWORD";
6699 break;
6700 case KWORD + ADD_NL:
6701 p = "KWORD+NL";
6702 break;
6703 case SKWORD:
6704 p = "SKWORD";
6705 break;
6706 case SKWORD + ADD_NL:
6707 p = "SKWORD+NL";
6708 break;
6709 case FNAME:
6710 p = "FNAME";
6711 break;
6712 case FNAME + ADD_NL:
6713 p = "FNAME+NL";
6714 break;
6715 case SFNAME:
6716 p = "SFNAME";
6717 break;
6718 case SFNAME + ADD_NL:
6719 p = "SFNAME+NL";
6720 break;
6721 case PRINT:
6722 p = "PRINT";
6723 break;
6724 case PRINT + ADD_NL:
6725 p = "PRINT+NL";
6726 break;
6727 case SPRINT:
6728 p = "SPRINT";
6729 break;
6730 case SPRINT + ADD_NL:
6731 p = "SPRINT+NL";
6732 break;
6733 case WHITE:
6734 p = "WHITE";
6735 break;
6736 case WHITE + ADD_NL:
6737 p = "WHITE+NL";
6738 break;
6739 case NWHITE:
6740 p = "NWHITE";
6741 break;
6742 case NWHITE + ADD_NL:
6743 p = "NWHITE+NL";
6744 break;
6745 case DIGIT:
6746 p = "DIGIT";
6747 break;
6748 case DIGIT + ADD_NL:
6749 p = "DIGIT+NL";
6750 break;
6751 case NDIGIT:
6752 p = "NDIGIT";
6753 break;
6754 case NDIGIT + ADD_NL:
6755 p = "NDIGIT+NL";
6756 break;
6757 case HEX:
6758 p = "HEX";
6759 break;
6760 case HEX + ADD_NL:
6761 p = "HEX+NL";
6762 break;
6763 case NHEX:
6764 p = "NHEX";
6765 break;
6766 case NHEX + ADD_NL:
6767 p = "NHEX+NL";
6768 break;
6769 case OCTAL:
6770 p = "OCTAL";
6771 break;
6772 case OCTAL + ADD_NL:
6773 p = "OCTAL+NL";
6774 break;
6775 case NOCTAL:
6776 p = "NOCTAL";
6777 break;
6778 case NOCTAL + ADD_NL:
6779 p = "NOCTAL+NL";
6780 break;
6781 case WORD:
6782 p = "WORD";
6783 break;
6784 case WORD + ADD_NL:
6785 p = "WORD+NL";
6786 break;
6787 case NWORD:
6788 p = "NWORD";
6789 break;
6790 case NWORD + ADD_NL:
6791 p = "NWORD+NL";
6792 break;
6793 case HEAD:
6794 p = "HEAD";
6795 break;
6796 case HEAD + ADD_NL:
6797 p = "HEAD+NL";
6798 break;
6799 case NHEAD:
6800 p = "NHEAD";
6801 break;
6802 case NHEAD + ADD_NL:
6803 p = "NHEAD+NL";
6804 break;
6805 case ALPHA:
6806 p = "ALPHA";
6807 break;
6808 case ALPHA + ADD_NL:
6809 p = "ALPHA+NL";
6810 break;
6811 case NALPHA:
6812 p = "NALPHA";
6813 break;
6814 case NALPHA + ADD_NL:
6815 p = "NALPHA+NL";
6816 break;
6817 case LOWER:
6818 p = "LOWER";
6819 break;
6820 case LOWER + ADD_NL:
6821 p = "LOWER+NL";
6822 break;
6823 case NLOWER:
6824 p = "NLOWER";
6825 break;
6826 case NLOWER + ADD_NL:
6827 p = "NLOWER+NL";
6828 break;
6829 case UPPER:
6830 p = "UPPER";
6831 break;
6832 case UPPER + ADD_NL:
6833 p = "UPPER+NL";
6834 break;
6835 case NUPPER:
6836 p = "NUPPER";
6837 break;
6838 case NUPPER + ADD_NL:
6839 p = "NUPPER+NL";
6840 break;
6841 case BRANCH:
6842 p = "BRANCH";
6843 break;
6844 case EXACTLY:
6845 p = "EXACTLY";
6846 break;
6847 case NOTHING:
6848 p = "NOTHING";
6849 break;
6850 case BACK:
6851 p = "BACK";
6852 break;
6853 case END:
6854 p = "END";
6855 break;
6856 case MOPEN + 0:
6857 p = "MATCH START";
6858 break;
6859 case MOPEN + 1:
6860 case MOPEN + 2:
6861 case MOPEN + 3:
6862 case MOPEN + 4:
6863 case MOPEN + 5:
6864 case MOPEN + 6:
6865 case MOPEN + 7:
6866 case MOPEN + 8:
6867 case MOPEN + 9:
6868 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6869 p = NULL;
6870 break;
6871 case MCLOSE + 0:
6872 p = "MATCH END";
6873 break;
6874 case MCLOSE + 1:
6875 case MCLOSE + 2:
6876 case MCLOSE + 3:
6877 case MCLOSE + 4:
6878 case MCLOSE + 5:
6879 case MCLOSE + 6:
6880 case MCLOSE + 7:
6881 case MCLOSE + 8:
6882 case MCLOSE + 9:
6883 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6884 p = NULL;
6885 break;
6886 case BACKREF + 1:
6887 case BACKREF + 2:
6888 case BACKREF + 3:
6889 case BACKREF + 4:
6890 case BACKREF + 5:
6891 case BACKREF + 6:
6892 case BACKREF + 7:
6893 case BACKREF + 8:
6894 case BACKREF + 9:
6895 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6896 p = NULL;
6897 break;
6898 case NOPEN:
6899 p = "NOPEN";
6900 break;
6901 case NCLOSE:
6902 p = "NCLOSE";
6903 break;
6904#ifdef FEAT_SYN_HL
6905 case ZOPEN + 1:
6906 case ZOPEN + 2:
6907 case ZOPEN + 3:
6908 case ZOPEN + 4:
6909 case ZOPEN + 5:
6910 case ZOPEN + 6:
6911 case ZOPEN + 7:
6912 case ZOPEN + 8:
6913 case ZOPEN + 9:
6914 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6915 p = NULL;
6916 break;
6917 case ZCLOSE + 1:
6918 case ZCLOSE + 2:
6919 case ZCLOSE + 3:
6920 case ZCLOSE + 4:
6921 case ZCLOSE + 5:
6922 case ZCLOSE + 6:
6923 case ZCLOSE + 7:
6924 case ZCLOSE + 8:
6925 case ZCLOSE + 9:
6926 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6927 p = NULL;
6928 break;
6929 case ZREF + 1:
6930 case ZREF + 2:
6931 case ZREF + 3:
6932 case ZREF + 4:
6933 case ZREF + 5:
6934 case ZREF + 6:
6935 case ZREF + 7:
6936 case ZREF + 8:
6937 case ZREF + 9:
6938 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6939 p = NULL;
6940 break;
6941#endif
6942 case STAR:
6943 p = "STAR";
6944 break;
6945 case PLUS:
6946 p = "PLUS";
6947 break;
6948 case NOMATCH:
6949 p = "NOMATCH";
6950 break;
6951 case MATCH:
6952 p = "MATCH";
6953 break;
6954 case BEHIND:
6955 p = "BEHIND";
6956 break;
6957 case NOBEHIND:
6958 p = "NOBEHIND";
6959 break;
6960 case SUBPAT:
6961 p = "SUBPAT";
6962 break;
6963 case BRACE_LIMITS:
6964 p = "BRACE_LIMITS";
6965 break;
6966 case BRACE_SIMPLE:
6967 p = "BRACE_SIMPLE";
6968 break;
6969 case BRACE_COMPLEX + 0:
6970 case BRACE_COMPLEX + 1:
6971 case BRACE_COMPLEX + 2:
6972 case BRACE_COMPLEX + 3:
6973 case BRACE_COMPLEX + 4:
6974 case BRACE_COMPLEX + 5:
6975 case BRACE_COMPLEX + 6:
6976 case BRACE_COMPLEX + 7:
6977 case BRACE_COMPLEX + 8:
6978 case BRACE_COMPLEX + 9:
6979 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6980 p = NULL;
6981 break;
6982#ifdef FEAT_MBYTE
6983 case MULTIBYTECODE:
6984 p = "MULTIBYTECODE";
6985 break;
6986#endif
6987 case NEWL:
6988 p = "NEWL";
6989 break;
6990 default:
6991 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6992 p = NULL;
6993 break;
6994 }
6995 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006996 STRCAT(buf, p);
6997 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006998}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006999#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007000
Bram Moolenaarfb031402014-09-09 17:18:49 +02007001/*
7002 * Used in a place where no * or \+ can follow.
7003 */
7004 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007005re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02007006{
7007 if (re_multi_type(peekchr()) == MULTI_MULT)
7008 EMSG2_RET_FAIL(_("E888: (NFA regexp) cannot repeat %s"), what);
7009 return OK;
7010}
7011
Bram Moolenaar071d4272004-06-13 20:20:40 +00007012#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007013static void mb_decompose(int c, int *c1, int *c2, int *c3);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007014
7015typedef struct
7016{
7017 int a, b, c;
7018} decomp_T;
7019
7020
7021/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00007022static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00007023{
7024 {0x5e2,0,0}, /* 0xfb20 alt ayin */
7025 {0x5d0,0,0}, /* 0xfb21 alt alef */
7026 {0x5d3,0,0}, /* 0xfb22 alt dalet */
7027 {0x5d4,0,0}, /* 0xfb23 alt he */
7028 {0x5db,0,0}, /* 0xfb24 alt kaf */
7029 {0x5dc,0,0}, /* 0xfb25 alt lamed */
7030 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
7031 {0x5e8,0,0}, /* 0xfb27 alt resh */
7032 {0x5ea,0,0}, /* 0xfb28 alt tav */
7033 {'+', 0, 0}, /* 0xfb29 alt plus */
7034 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
7035 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
7036 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
7037 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
7038 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
7039 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
7040 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
7041 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
7042 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
7043 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
7044 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
7045 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
7046 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
7047 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
7048 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
7049 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
7050 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
7051 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
7052 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
7053 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
7054 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
7055 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
7056 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
7057 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
7058 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
7059 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
7060 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
7061 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
7062 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
7063 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
7064 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7065 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7066 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7067 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7068 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7069 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7070 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7071 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7072};
7073
7074 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01007075mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007076{
7077 decomp_T d;
7078
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007079 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007080 {
7081 d = decomp_table[c - 0xfb20];
7082 *c1 = d.a;
7083 *c2 = d.b;
7084 *c3 = d.c;
7085 }
7086 else
7087 {
7088 *c1 = c;
7089 *c2 = *c3 = 0;
7090 }
7091}
7092#endif
7093
7094/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02007095 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00007096 * Return 0 if strings match, non-zero otherwise.
7097 * Correct the length "*n" when composing characters are ignored.
7098 */
7099 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007100cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007101{
7102 int result;
7103
Bram Moolenaar6100d022016-10-02 16:51:57 +02007104 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007105 result = STRNCMP(s1, s2, *n);
7106 else
7107 result = MB_STRNICMP(s1, s2, *n);
7108
7109#ifdef FEAT_MBYTE
7110 /* if it failed and it's utf8 and we want to combineignore: */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007111 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007112 {
7113 char_u *str1, *str2;
7114 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007115 int junk;
7116
7117 /* we have to handle the strcmp ourselves, since it is necessary to
7118 * deal with the composing characters by ignoring them: */
7119 str1 = s1;
7120 str2 = s2;
7121 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007122 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007123 {
7124 c1 = mb_ptr2char_adv(&str1);
7125 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007126
7127 /* decompose the character if necessary, into 'base' characters
7128 * because I don't care about Arabic, I will hard-code the Hebrew
7129 * which I *do* care about! So sue me... */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007130 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007131 {
7132 /* decomposition necessary? */
7133 mb_decompose(c1, &c11, &junk, &junk);
7134 mb_decompose(c2, &c12, &junk, &junk);
7135 c1 = c11;
7136 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007137 if (c11 != c12
7138 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007139 break;
7140 }
7141 }
7142 result = c2 - c1;
7143 if (result == 0)
7144 *n = (int)(str2 - s2);
7145 }
7146#endif
7147
7148 return result;
7149}
7150
7151/*
7152 * cstrchr: This function is used a lot for simple searches, keep it fast!
7153 */
7154 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007155cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007156{
7157 char_u *p;
7158 int cc;
7159
Bram Moolenaar6100d022016-10-02 16:51:57 +02007160 if (!rex.reg_ic
Bram Moolenaar071d4272004-06-13 20:20:40 +00007161#ifdef FEAT_MBYTE
7162 || (!enc_utf8 && mb_char2len(c) > 1)
7163#endif
7164 )
7165 return vim_strchr(s, c);
7166
7167 /* tolower() and toupper() can be slow, comparing twice should be a lot
7168 * faster (esp. when using MS Visual C++!).
7169 * For UTF-8 need to use folded case. */
7170#ifdef FEAT_MBYTE
7171 if (enc_utf8 && c > 0x80)
7172 cc = utf_fold(c);
7173 else
7174#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007175 if (MB_ISUPPER(c))
7176 cc = MB_TOLOWER(c);
7177 else if (MB_ISLOWER(c))
7178 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007179 else
7180 return vim_strchr(s, c);
7181
7182#ifdef FEAT_MBYTE
7183 if (has_mbyte)
7184 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007185 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007186 {
7187 if (enc_utf8 && c > 0x80)
7188 {
7189 if (utf_fold(utf_ptr2char(p)) == cc)
7190 return p;
7191 }
7192 else if (*p == c || *p == cc)
7193 return p;
7194 }
7195 }
7196 else
7197#endif
7198 /* Faster version for when there are no multi-byte characters. */
7199 for (p = s; *p != NUL; ++p)
7200 if (*p == c || *p == cc)
7201 return p;
7202
7203 return NULL;
7204}
7205
7206/***************************************************************
7207 * regsub stuff *
7208 ***************************************************************/
7209
Bram Moolenaar071d4272004-06-13 20:20:40 +00007210/*
7211 * We should define ftpr as a pointer to a function returning a pointer to
7212 * a function returning a pointer to a function ...
7213 * This is impossible, so we declare a pointer to a function returning a
7214 * pointer to a function returning void. This should work for all compilers.
7215 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007216typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007217
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007218static fptr_T do_upper(int *, int);
7219static fptr_T do_Upper(int *, int);
7220static fptr_T do_lower(int *, int);
7221static fptr_T do_Lower(int *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007222
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007223static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007224
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007225 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007226do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007227{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007228 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007229
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007230 return (fptr_T)NULL;
7231}
7232
7233 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007234do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007235{
7236 *d = MB_TOUPPER(c);
7237
7238 return (fptr_T)do_Upper;
7239}
7240
7241 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007242do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007243{
7244 *d = MB_TOLOWER(c);
7245
7246 return (fptr_T)NULL;
7247}
7248
7249 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007250do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007251{
7252 *d = MB_TOLOWER(c);
7253
7254 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007255}
7256
7257/*
7258 * regtilde(): Replace tildes in the pattern by the old pattern.
7259 *
7260 * Short explanation of the tilde: It stands for the previous replacement
7261 * pattern. If that previous pattern also contains a ~ we should go back a
7262 * step further... But we insert the previous pattern into the current one
7263 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007264 * This still does not handle the case where "magic" changes. So require the
7265 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007266 *
7267 * The tildes are parsed once before the first call to vim_regsub().
7268 */
7269 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007270regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007271{
7272 char_u *newsub = source;
7273 char_u *tmpsub;
7274 char_u *p;
7275 int len;
7276 int prevlen;
7277
7278 for (p = newsub; *p; ++p)
7279 {
7280 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7281 {
7282 if (reg_prev_sub != NULL)
7283 {
7284 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7285 prevlen = (int)STRLEN(reg_prev_sub);
7286 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7287 if (tmpsub != NULL)
7288 {
7289 /* copy prefix */
7290 len = (int)(p - newsub); /* not including ~ */
7291 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007292 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007293 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7294 /* copy postfix */
7295 if (!magic)
7296 ++p; /* back off \ */
7297 STRCPY(tmpsub + len + prevlen, p + 1);
7298
7299 if (newsub != source) /* already allocated newsub */
7300 vim_free(newsub);
7301 newsub = tmpsub;
7302 p = newsub + len + prevlen;
7303 }
7304 }
7305 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007306 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007307 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007308 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007309 --p;
7310 }
7311 else
7312 {
7313 if (*p == '\\' && p[1]) /* skip escaped characters */
7314 ++p;
7315#ifdef FEAT_MBYTE
7316 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007317 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007318#endif
7319 }
7320 }
7321
7322 vim_free(reg_prev_sub);
7323 if (newsub != source) /* newsub was allocated, just keep it */
7324 reg_prev_sub = newsub;
7325 else /* no ~ found, need to save newsub */
7326 reg_prev_sub = vim_strsave(newsub);
7327 return newsub;
7328}
7329
7330#ifdef FEAT_EVAL
7331static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7332
Bram Moolenaar6100d022016-10-02 16:51:57 +02007333/* These pointers are used for reg_submatch(). Needed for when the
7334 * substitution string is an expression that contains a call to substitute()
7335 * and submatch(). */
7336typedef struct {
7337 regmatch_T *sm_match;
7338 regmmatch_T *sm_mmatch;
7339 linenr_T sm_firstlnum;
7340 linenr_T sm_maxline;
7341 int sm_line_lbr;
7342} regsubmatch_T;
7343
7344static regsubmatch_T rsm; /* can only be used when can_f_submatch is TRUE */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007345#endif
7346
7347#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007348
7349/*
7350 * Put the submatches in "argv[0]" which is a list passed into call_func() by
7351 * vim_regsub_both().
7352 */
7353 static int
7354fill_submatch_list(int argc UNUSED, typval_T *argv, int argcount)
7355{
7356 listitem_T *li;
7357 int i;
7358 char_u *s;
7359
7360 if (argcount == 0)
7361 /* called function doesn't take an argument */
7362 return 0;
7363
7364 /* Relies on sl_list to be the first item in staticList10_T. */
7365 init_static_list((staticList10_T *)(argv->vval.v_list));
7366
7367 /* There are always 10 list items in staticList10_T. */
7368 li = argv->vval.v_list->lv_first;
7369 for (i = 0; i < 10; ++i)
7370 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007371 s = rsm.sm_match->startp[i];
7372 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007373 s = NULL;
7374 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007375 s = vim_strnsave(s, (int)(rsm.sm_match->endp[i] - s));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007376 li->li_tv.v_type = VAR_STRING;
7377 li->li_tv.vval.v_string = s;
7378 li = li->li_next;
7379 }
7380 return 1;
7381}
7382
7383 static void
7384clear_submatch_list(staticList10_T *sl)
7385{
7386 int i;
7387
7388 for (i = 0; i < 10; ++i)
7389 vim_free(sl->sl_items[i].li_tv.vval.v_string);
7390}
7391
Bram Moolenaar071d4272004-06-13 20:20:40 +00007392/*
7393 * vim_regsub() - perform substitutions after a vim_regexec() or
7394 * vim_regexec_multi() match.
7395 *
7396 * If "copy" is TRUE really copy into "dest".
7397 * If "copy" is FALSE nothing is copied, this is just to find out the length
7398 * of the result.
7399 *
7400 * If "backslash" is TRUE, a backslash will be removed later, need to double
7401 * them to keep them, and insert a backslash before a CR to avoid it being
7402 * replaced with a line break later.
7403 *
7404 * Note: The matched text must not change between the call of
7405 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7406 * references invalid!
7407 *
7408 * Returns the size of the replacement, including terminating NUL.
7409 */
7410 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007411vim_regsub(
7412 regmatch_T *rmp,
7413 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007414 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007415 char_u *dest,
7416 int copy,
7417 int magic,
7418 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007419{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007420 int result;
7421 regexec_T rex_save;
7422 int rex_in_use_save = rex_in_use;
7423
7424 if (rex_in_use)
7425 /* Being called recursively, save the state. */
7426 rex_save = rex;
7427 rex_in_use = TRUE;
7428
7429 rex.reg_match = rmp;
7430 rex.reg_mmatch = NULL;
7431 rex.reg_maxline = 0;
7432 rex.reg_buf = curbuf;
7433 rex.reg_line_lbr = TRUE;
7434 result = vim_regsub_both(source, expr, dest, copy, magic, backslash);
7435
7436 rex_in_use = rex_in_use_save;
7437 if (rex_in_use)
7438 rex = rex_save;
7439
7440 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007441}
7442#endif
7443
7444 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007445vim_regsub_multi(
7446 regmmatch_T *rmp,
7447 linenr_T lnum,
7448 char_u *source,
7449 char_u *dest,
7450 int copy,
7451 int magic,
7452 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007453{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007454 int result;
7455 regexec_T rex_save;
7456 int rex_in_use_save = rex_in_use;
7457
7458 if (rex_in_use)
7459 /* Being called recursively, save the state. */
7460 rex_save = rex;
7461 rex_in_use = TRUE;
7462
7463 rex.reg_match = NULL;
7464 rex.reg_mmatch = rmp;
7465 rex.reg_buf = curbuf; /* always works on the current buffer! */
7466 rex.reg_firstlnum = lnum;
7467 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7468 rex.reg_line_lbr = FALSE;
7469 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash);
7470
7471 rex_in_use = rex_in_use_save;
7472 if (rex_in_use)
7473 rex = rex_save;
7474
7475 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007476}
7477
7478 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007479vim_regsub_both(
7480 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007481 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007482 char_u *dest,
7483 int copy,
7484 int magic,
7485 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007486{
7487 char_u *src;
7488 char_u *dst;
7489 char_u *s;
7490 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007491 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007492 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007493 fptr_T func_all = (fptr_T)NULL;
7494 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007495 linenr_T clnum = 0; /* init for GCC */
7496 int len = 0; /* init for GCC */
7497#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007498 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007499#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007500
7501 /* Be paranoid... */
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007502 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007503 {
7504 EMSG(_(e_null));
7505 return 0;
7506 }
7507 if (prog_magic_wrong())
7508 return 0;
7509 src = source;
7510 dst = dest;
7511
7512 /*
7513 * When the substitute part starts with "\=" evaluate it as an expression.
7514 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007515 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007516 {
7517#ifdef FEAT_EVAL
7518 /* To make sure that the length doesn't change between checking the
7519 * length and copying the string, and to speed up things, the
7520 * resulting string is saved from the call with "copy" == FALSE to the
7521 * call with "copy" == TRUE. */
7522 if (copy)
7523 {
7524 if (eval_result != NULL)
7525 {
7526 STRCPY(dest, eval_result);
7527 dst += STRLEN(eval_result);
Bram Moolenaard23a8232018-02-10 18:45:26 +01007528 VIM_CLEAR(eval_result);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007529 }
7530 }
7531 else
7532 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007533 int prev_can_f_submatch = can_f_submatch;
7534 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007535
7536 vim_free(eval_result);
7537
7538 /* The expression may contain substitute(), which calls us
7539 * recursively. Make sure submatch() gets the text from the first
Bram Moolenaar6100d022016-10-02 16:51:57 +02007540 * level. */
7541 if (can_f_submatch)
7542 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007543 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007544 rsm.sm_match = rex.reg_match;
7545 rsm.sm_mmatch = rex.reg_mmatch;
7546 rsm.sm_firstlnum = rex.reg_firstlnum;
7547 rsm.sm_maxline = rex.reg_maxline;
7548 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007549
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007550 if (expr != NULL)
7551 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007552 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007553 int dummy;
7554 char_u buf[NUMBUFLEN];
7555 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007556 staticList10_T matchList;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007557
7558 rettv.v_type = VAR_STRING;
7559 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007560 argv[0].v_type = VAR_LIST;
7561 argv[0].vval.v_list = &matchList.sl_list;
7562 matchList.sl_list.lv_len = 0;
7563 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007564 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007565 s = expr->vval.v_string;
7566 call_func(s, (int)STRLEN(s), &rettv,
7567 1, argv, fill_submatch_list,
7568 0L, 0L, &dummy, TRUE, NULL, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007569 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007570 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007571 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007572 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007573
Bram Moolenaar6100d022016-10-02 16:51:57 +02007574 s = partial_name(partial);
7575 call_func(s, (int)STRLEN(s), &rettv,
7576 1, argv, fill_submatch_list,
7577 0L, 0L, &dummy, TRUE, partial, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007578 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007579 if (matchList.sl_list.lv_len > 0)
7580 /* fill_submatch_list() was called */
7581 clear_submatch_list(&matchList);
7582
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007583 eval_result = get_tv_string_buf_chk(&rettv, buf);
7584 if (eval_result != NULL)
7585 eval_result = vim_strsave(eval_result);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007586 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007587 }
7588 else
7589 eval_result = eval_to_string(source + 2, NULL, TRUE);
7590
Bram Moolenaar071d4272004-06-13 20:20:40 +00007591 if (eval_result != NULL)
7592 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007593 int had_backslash = FALSE;
7594
Bram Moolenaar91acfff2017-03-12 19:22:36 +01007595 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007596 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007597 /* Change NL to CR, so that it becomes a line break,
7598 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007599 * Skip over a backslashed character. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007600 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007601 *s = CAR;
7602 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007603 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007604 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007605 /* Change NL to CR here too, so that this works:
7606 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7607 * abc\
7608 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007609 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007610 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007611 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007612 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007613 had_backslash = TRUE;
7614 }
7615 }
7616 if (had_backslash && backslash)
7617 {
7618 /* Backslashes will be consumed, need to double them. */
7619 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7620 if (s != NULL)
7621 {
7622 vim_free(eval_result);
7623 eval_result = s;
7624 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007625 }
7626
7627 dst += STRLEN(eval_result);
7628 }
7629
Bram Moolenaar6100d022016-10-02 16:51:57 +02007630 can_f_submatch = prev_can_f_submatch;
7631 if (can_f_submatch)
7632 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007633 }
7634#endif
7635 }
7636 else
7637 while ((c = *src++) != NUL)
7638 {
7639 if (c == '&' && magic)
7640 no = 0;
7641 else if (c == '\\' && *src != NUL)
7642 {
7643 if (*src == '&' && !magic)
7644 {
7645 ++src;
7646 no = 0;
7647 }
7648 else if ('0' <= *src && *src <= '9')
7649 {
7650 no = *src++ - '0';
7651 }
7652 else if (vim_strchr((char_u *)"uUlLeE", *src))
7653 {
7654 switch (*src++)
7655 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007656 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007657 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007658 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007659 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007660 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007661 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007662 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007663 continue;
7664 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007665 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007666 continue;
7667 }
7668 }
7669 }
7670 if (no < 0) /* Ordinary character. */
7671 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007672 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7673 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007674 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007675 if (copy)
7676 {
7677 *dst++ = c;
7678 *dst++ = *src++;
7679 *dst++ = *src++;
7680 }
7681 else
7682 {
7683 dst += 3;
7684 src += 2;
7685 }
7686 continue;
7687 }
7688
Bram Moolenaar071d4272004-06-13 20:20:40 +00007689 if (c == '\\' && *src != NUL)
7690 {
7691 /* Check for abbreviations -- webb */
7692 switch (*src)
7693 {
7694 case 'r': c = CAR; ++src; break;
7695 case 'n': c = NL; ++src; break;
7696 case 't': c = TAB; ++src; break;
7697 /* Oh no! \e already has meaning in subst pat :-( */
7698 /* case 'e': c = ESC; ++src; break; */
7699 case 'b': c = Ctrl_H; ++src; break;
7700
7701 /* If "backslash" is TRUE the backslash will be removed
7702 * later. Used to insert a literal CR. */
7703 default: if (backslash)
7704 {
7705 if (copy)
7706 *dst = '\\';
7707 ++dst;
7708 }
7709 c = *src++;
7710 }
7711 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007712#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007713 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007714 c = mb_ptr2char(src - 1);
7715#endif
7716
Bram Moolenaardb552d602006-03-23 22:59:57 +00007717 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007718 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007719 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007720 func_one = (fptr_T)(func_one(&cc, c));
7721 else if (func_all != (fptr_T)NULL)
7722 /* Turbo C complains without the typecast */
7723 func_all = (fptr_T)(func_all(&cc, c));
7724 else /* just copy */
7725 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007726
7727#ifdef FEAT_MBYTE
7728 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007729 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007730 int totlen = mb_ptr2len(src - 1);
7731
Bram Moolenaar071d4272004-06-13 20:20:40 +00007732 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007733 mb_char2bytes(cc, dst);
7734 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007735 if (enc_utf8)
7736 {
7737 int clen = utf_ptr2len(src - 1);
7738
7739 /* If the character length is shorter than "totlen", there
7740 * are composing characters; copy them as-is. */
7741 if (clen < totlen)
7742 {
7743 if (copy)
7744 mch_memmove(dst + 1, src - 1 + clen,
7745 (size_t)(totlen - clen));
7746 dst += totlen - clen;
7747 }
7748 }
7749 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007750 }
7751 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007752#endif
7753 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007754 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007755 dst++;
7756 }
7757 else
7758 {
7759 if (REG_MULTI)
7760 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007761 clnum = rex.reg_mmatch->startpos[no].lnum;
7762 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007763 s = NULL;
7764 else
7765 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007766 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
7767 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7768 len = rex.reg_mmatch->endpos[no].col
7769 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007770 else
7771 len = (int)STRLEN(s);
7772 }
7773 }
7774 else
7775 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007776 s = rex.reg_match->startp[no];
7777 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007778 s = NULL;
7779 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007780 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007781 }
7782 if (s != NULL)
7783 {
7784 for (;;)
7785 {
7786 if (len == 0)
7787 {
7788 if (REG_MULTI)
7789 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007790 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007791 break;
7792 if (copy)
7793 *dst = CAR;
7794 ++dst;
7795 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02007796 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7797 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007798 else
7799 len = (int)STRLEN(s);
7800 }
7801 else
7802 break;
7803 }
7804 else if (*s == NUL) /* we hit NUL. */
7805 {
7806 if (copy)
7807 EMSG(_(e_re_damg));
7808 goto exit;
7809 }
7810 else
7811 {
7812 if (backslash && (*s == CAR || *s == '\\'))
7813 {
7814 /*
7815 * Insert a backslash in front of a CR, otherwise
7816 * it will be replaced by a line break.
7817 * Number of backslashes will be halved later,
7818 * double them here.
7819 */
7820 if (copy)
7821 {
7822 dst[0] = '\\';
7823 dst[1] = *s;
7824 }
7825 dst += 2;
7826 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007827 else
7828 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007829#ifdef FEAT_MBYTE
7830 if (has_mbyte)
7831 c = mb_ptr2char(s);
7832 else
7833#endif
7834 c = *s;
7835
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007836 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007837 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007838 func_one = (fptr_T)(func_one(&cc, c));
7839 else if (func_all != (fptr_T)NULL)
7840 /* Turbo C complains without the typecast */
7841 func_all = (fptr_T)(func_all(&cc, c));
7842 else /* just copy */
7843 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007844
7845#ifdef FEAT_MBYTE
7846 if (has_mbyte)
7847 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007848 int l;
7849
7850 /* Copy composing characters separately, one
7851 * at a time. */
7852 if (enc_utf8)
7853 l = utf_ptr2len(s) - 1;
7854 else
7855 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007856
7857 s += l;
7858 len -= l;
7859 if (copy)
7860 mb_char2bytes(cc, dst);
7861 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007862 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007863 else
7864#endif
7865 if (copy)
7866 *dst = cc;
7867 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007868 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007869
Bram Moolenaar071d4272004-06-13 20:20:40 +00007870 ++s;
7871 --len;
7872 }
7873 }
7874 }
7875 no = -1;
7876 }
7877 }
7878 if (copy)
7879 *dst = NUL;
7880
7881exit:
7882 return (int)((dst - dest) + 1);
7883}
7884
7885#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007886static char_u *reg_getline_submatch(linenr_T lnum);
Bram Moolenaard32a3192009-11-26 19:40:49 +00007887
Bram Moolenaar071d4272004-06-13 20:20:40 +00007888/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007889 * Call reg_getline() with the line numbers from the submatch. If a
7890 * substitute() was used the reg_maxline and other values have been
7891 * overwritten.
7892 */
7893 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007894reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007895{
7896 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007897 linenr_T save_first = rex.reg_firstlnum;
7898 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007899
Bram Moolenaar6100d022016-10-02 16:51:57 +02007900 rex.reg_firstlnum = rsm.sm_firstlnum;
7901 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007902
7903 s = reg_getline(lnum);
7904
Bram Moolenaar6100d022016-10-02 16:51:57 +02007905 rex.reg_firstlnum = save_first;
7906 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007907 return s;
7908}
7909
7910/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007911 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007912 * allocated memory.
7913 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7914 */
7915 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007916reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007917{
7918 char_u *retval = NULL;
7919 char_u *s;
7920 int len;
7921 int round;
7922 linenr_T lnum;
7923
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007924 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007925 return NULL;
7926
Bram Moolenaar6100d022016-10-02 16:51:57 +02007927 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007928 {
7929 /*
7930 * First round: compute the length and allocate memory.
7931 * Second round: copy the text.
7932 */
7933 for (round = 1; round <= 2; ++round)
7934 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007935 lnum = rsm.sm_mmatch->startpos[no].lnum;
7936 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007937 return NULL;
7938
Bram Moolenaar6100d022016-10-02 16:51:57 +02007939 s = reg_getline_submatch(lnum) + rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007940 if (s == NULL) /* anti-crash check, cannot happen? */
7941 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007942 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007943 {
7944 /* Within one line: take form start to end col. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007945 len = rsm.sm_mmatch->endpos[no].col
7946 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007947 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007948 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007949 ++len;
7950 }
7951 else
7952 {
7953 /* Multiple lines: take start line from start col, middle
7954 * lines completely and end line up to end col. */
7955 len = (int)STRLEN(s);
7956 if (round == 2)
7957 {
7958 STRCPY(retval, s);
7959 retval[len] = '\n';
7960 }
7961 ++len;
7962 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007963 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007964 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007965 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007966 if (round == 2)
7967 STRCPY(retval + len, s);
7968 len += (int)STRLEN(s);
7969 if (round == 2)
7970 retval[len] = '\n';
7971 ++len;
7972 }
7973 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007974 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02007975 rsm.sm_mmatch->endpos[no].col);
7976 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007977 if (round == 2)
7978 retval[len] = NUL;
7979 ++len;
7980 }
7981
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007982 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007983 {
7984 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007985 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007986 return NULL;
7987 }
7988 }
7989 }
7990 else
7991 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007992 s = rsm.sm_match->startp[no];
7993 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007994 retval = NULL;
7995 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007996 retval = vim_strnsave(s, (int)(rsm.sm_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007997 }
7998
7999 return retval;
8000}
Bram Moolenaar41571762014-04-02 19:00:58 +02008001
8002/*
8003 * Used for the submatch() function with the optional non-zero argument: get
8004 * the list of strings from the n'th submatch in allocated memory with NULs
8005 * represented in NLs.
8006 * Returns a list of allocated strings. Returns NULL when not in a ":s"
8007 * command, for a non-existing submatch and for any error.
8008 */
8009 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01008010reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02008011{
8012 char_u *s;
8013 linenr_T slnum;
8014 linenr_T elnum;
8015 colnr_T scol;
8016 colnr_T ecol;
8017 int i;
8018 list_T *list;
8019 int error = FALSE;
8020
8021 if (!can_f_submatch || no < 0)
8022 return NULL;
8023
Bram Moolenaar6100d022016-10-02 16:51:57 +02008024 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008025 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02008026 slnum = rsm.sm_mmatch->startpos[no].lnum;
8027 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02008028 if (slnum < 0 || elnum < 0)
8029 return NULL;
8030
Bram Moolenaar6100d022016-10-02 16:51:57 +02008031 scol = rsm.sm_mmatch->startpos[no].col;
8032 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02008033
8034 list = list_alloc();
8035 if (list == NULL)
8036 return NULL;
8037
8038 s = reg_getline_submatch(slnum) + scol;
8039 if (slnum == elnum)
8040 {
8041 if (list_append_string(list, s, ecol - scol) == FAIL)
8042 error = TRUE;
8043 }
8044 else
8045 {
8046 if (list_append_string(list, s, -1) == FAIL)
8047 error = TRUE;
8048 for (i = 1; i < elnum - slnum; i++)
8049 {
8050 s = reg_getline_submatch(slnum + i);
8051 if (list_append_string(list, s, -1) == FAIL)
8052 error = TRUE;
8053 }
8054 s = reg_getline_submatch(elnum);
8055 if (list_append_string(list, s, ecol) == FAIL)
8056 error = TRUE;
8057 }
8058 }
8059 else
8060 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02008061 s = rsm.sm_match->startp[no];
8062 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008063 return NULL;
8064 list = list_alloc();
8065 if (list == NULL)
8066 return NULL;
8067 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02008068 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008069 error = TRUE;
8070 }
8071
8072 if (error)
8073 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02008074 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02008075 return NULL;
8076 }
8077 return list;
8078}
Bram Moolenaar071d4272004-06-13 20:20:40 +00008079#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008080
8081static regengine_T bt_regengine =
8082{
8083 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008084 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008085 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008086 bt_regexec_multi,
8087 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008088};
8089
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008090#include "regexp_nfa.c"
8091
8092static regengine_T nfa_regengine =
8093{
8094 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008095 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008096 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008097 nfa_regexec_multi,
8098 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008099};
8100
8101/* Which regexp engine to use? Needed for vim_regcomp().
8102 * Must match with 'regexpengine'. */
8103static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008104
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008105#ifdef DEBUG
8106static char_u regname[][30] = {
8107 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02008108 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008109 "NFA Regexp Engine"
8110 };
8111#endif
8112
8113/*
8114 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02008115 * Returns the program in allocated memory.
8116 * Use vim_regfree() to free the memory.
8117 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008118 */
8119 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01008120vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008121{
8122 regprog_T *prog = NULL;
8123 char_u *expr = expr_arg;
8124
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008125 regexp_engine = p_re;
8126
8127 /* Check for prefix "\%#=", that sets the regexp engine */
8128 if (STRNCMP(expr, "\\%#=", 4) == 0)
8129 {
8130 int newengine = expr[4] - '0';
8131
8132 if (newengine == AUTOMATIC_ENGINE
8133 || newengine == BACKTRACKING_ENGINE
8134 || newengine == NFA_ENGINE)
8135 {
8136 regexp_engine = expr[4] - '0';
8137 expr += 5;
8138#ifdef DEBUG
Bram Moolenaar6e132072014-05-13 16:46:32 +02008139 smsg((char_u *)"New regexp mode selected (%d): %s",
8140 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008141#endif
8142 }
8143 else
8144 {
8145 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
8146 regexp_engine = AUTOMATIC_ENGINE;
8147 }
8148 }
Bram Moolenaar0270f382018-07-17 05:43:58 +02008149#ifdef DEBUG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008150 bt_regengine.expr = expr;
8151 nfa_regengine.expr = expr;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008152#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008153
8154 /*
8155 * First try the NFA engine, unless backtracking was requested.
8156 */
8157 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaard23a8232018-02-10 18:45:26 +01008158 prog = nfa_regengine.regcomp(expr,
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008159 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008160 else
8161 prog = bt_regengine.regcomp(expr, re_flags);
8162
Bram Moolenaarfda37292014-11-05 14:27:36 +01008163 /* Check for error compiling regexp with initial engine. */
8164 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008165 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008166#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008167 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8168 {
8169 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008170 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008171 if (f)
8172 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008173 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008174 fclose(f);
8175 }
8176 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008177 EMSG2("(NFA) Could not open \"%s\" to write !!!",
Bram Moolenaard23a8232018-02-10 18:45:26 +01008178 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008179 }
8180#endif
8181 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01008182 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008183 * The NFA engine also fails for patterns that it can't handle well
8184 * but are still valid patterns, thus a retry should work.
8185 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008186 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008187 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008188 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008189 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008190 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008191 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008192
Bram Moolenaarfda37292014-11-05 14:27:36 +01008193 if (prog != NULL)
8194 {
8195 /* Store the info needed to call regcomp() again when the engine turns
8196 * out to be very slow when executing it. */
8197 prog->re_engine = regexp_engine;
8198 prog->re_flags = re_flags;
8199 }
8200
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008201 return prog;
8202}
8203
8204/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008205 * Free a compiled regexp program, returned by vim_regcomp().
8206 */
8207 void
Bram Moolenaar05540972016-01-30 20:31:25 +01008208vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02008209{
8210 if (prog != NULL)
8211 prog->engine->regfree(prog);
8212}
8213
Bram Moolenaarfda37292014-11-05 14:27:36 +01008214#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01008215static void report_re_switch(char_u *pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008216
8217 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01008218report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008219{
8220 if (p_verbose > 0)
8221 {
8222 verbose_enter();
8223 MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
8224 MSG_PUTS(pat);
8225 verbose_leave();
8226 }
8227}
8228#endif
8229
Bram Moolenaar473de612013-06-08 18:19:48 +02008230/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008231 * Match a regexp against a string.
8232 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008233 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008234 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01008235 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008236 *
8237 * Return TRUE if there is a match, FALSE if not.
8238 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01008239 static int
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008240vim_regexec_string(
Bram Moolenaar05540972016-01-30 20:31:25 +01008241 regmatch_T *rmp,
8242 char_u *line, /* string to match against */
8243 colnr_T col, /* column to start looking for match */
8244 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008245{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008246 int result;
8247 regexec_T rex_save;
8248 int rex_in_use_save = rex_in_use;
8249
Bram Moolenaar0270f382018-07-17 05:43:58 +02008250 // Cannot use the same prog recursively, it contains state.
8251 if (rmp->regprog->re_in_use)
8252 {
8253 EMSG(_(e_recursive));
8254 return FALSE;
8255 }
8256 rmp->regprog->re_in_use = TRUE;
8257
Bram Moolenaar6100d022016-10-02 16:51:57 +02008258 if (rex_in_use)
Bram Moolenaar0270f382018-07-17 05:43:58 +02008259 // Being called recursively, save the state.
Bram Moolenaar6100d022016-10-02 16:51:57 +02008260 rex_save = rex;
8261 rex_in_use = TRUE;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008262
Bram Moolenaar6100d022016-10-02 16:51:57 +02008263 rex.reg_startp = NULL;
8264 rex.reg_endp = NULL;
8265 rex.reg_startpos = NULL;
8266 rex.reg_endpos = NULL;
8267
8268 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008269
8270 /* NFA engine aborted because it's very slow. */
8271 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8272 && result == NFA_TOO_EXPENSIVE)
8273 {
8274 int save_p_re = p_re;
8275 int re_flags = rmp->regprog->re_flags;
8276 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8277
8278 p_re = BACKTRACKING_ENGINE;
8279 vim_regfree(rmp->regprog);
8280 if (pat != NULL)
8281 {
8282#ifdef FEAT_EVAL
8283 report_re_switch(pat);
8284#endif
8285 rmp->regprog = vim_regcomp(pat, re_flags);
8286 if (rmp->regprog != NULL)
8287 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8288 vim_free(pat);
8289 }
8290
8291 p_re = save_p_re;
8292 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02008293
8294 rex_in_use = rex_in_use_save;
8295 if (rex_in_use)
8296 rex = rex_save;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008297 rmp->regprog->re_in_use = FALSE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02008298
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008299 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008300}
8301
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008302/*
8303 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008304 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008305 */
8306 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008307vim_regexec_prog(
8308 regprog_T **prog,
8309 int ignore_case,
8310 char_u *line,
8311 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008312{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008313 int r;
8314 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008315
8316 regmatch.regprog = *prog;
8317 regmatch.rm_ic = ignore_case;
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008318 r = vim_regexec_string(&regmatch, line, col, FALSE);
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008319 *prog = regmatch.regprog;
8320 return r;
8321}
8322
8323/*
8324 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008325 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008326 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008327 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008328vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008329{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008330 return vim_regexec_string(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008331}
8332
8333#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8334 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8335/*
8336 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008337 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008338 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008339 */
8340 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008341vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008342{
Bram Moolenaar06f1ed22017-06-18 22:41:03 +02008343 return vim_regexec_string(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008344}
8345#endif
8346
8347/*
8348 * Match a regexp against multiple lines.
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008349 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
8350 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008351 * Uses curbuf for line count and 'iskeyword'.
8352 *
8353 * Return zero if there is no match. Return number of lines contained in the
8354 * match otherwise.
8355 */
8356 long
Bram Moolenaar05540972016-01-30 20:31:25 +01008357vim_regexec_multi(
8358 regmmatch_T *rmp,
Bram Moolenaard23a8232018-02-10 18:45:26 +01008359 win_T *win, /* window in which to search or NULL */
8360 buf_T *buf, /* buffer in which to search */
8361 linenr_T lnum, /* nr of line to start looking for match */
8362 colnr_T col, /* column to start looking for match */
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008363 proftime_T *tm, /* timeout limit or NULL */
8364 int *timed_out) /* flag is set when timeout limit reached */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008365{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008366 int result;
8367 regexec_T rex_save;
8368 int rex_in_use_save = rex_in_use;
8369
Bram Moolenaar0270f382018-07-17 05:43:58 +02008370 // Cannot use the same prog recursively, it contains state.
8371 if (rmp->regprog->re_in_use)
8372 {
8373 EMSG(_(e_recursive));
8374 return FALSE;
8375 }
8376 rmp->regprog->re_in_use = TRUE;
8377
Bram Moolenaar6100d022016-10-02 16:51:57 +02008378 if (rex_in_use)
8379 /* Being called recursively, save the state. */
8380 rex_save = rex;
8381 rex_in_use = TRUE;
8382
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008383 result = rmp->regprog->engine->regexec_multi(
8384 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008385
8386 /* NFA engine aborted because it's very slow. */
8387 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8388 && result == NFA_TOO_EXPENSIVE)
8389 {
8390 int save_p_re = p_re;
8391 int re_flags = rmp->regprog->re_flags;
8392 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8393
8394 p_re = BACKTRACKING_ENGINE;
8395 vim_regfree(rmp->regprog);
8396 if (pat != NULL)
8397 {
8398#ifdef FEAT_EVAL
8399 report_re_switch(pat);
8400#endif
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008401#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008402 // checking for \z misuse was already done when compiling for NFA,
8403 // allow all here
8404 reg_do_extmatch = REX_ALL;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008405#endif
Bram Moolenaarfda37292014-11-05 14:27:36 +01008406 rmp->regprog = vim_regcomp(pat, re_flags);
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008407#ifdef FEAT_SYN_HL
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008408 reg_do_extmatch = 0;
Bram Moolenaar1f8c4692018-06-23 15:09:10 +02008409#endif
Bram Moolenaarbcf94422018-06-23 14:21:42 +02008410
Bram Moolenaarfda37292014-11-05 14:27:36 +01008411 if (rmp->regprog != NULL)
8412 result = rmp->regprog->engine->regexec_multi(
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02008413 rmp, win, buf, lnum, col, tm, timed_out);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008414 vim_free(pat);
8415 }
8416 p_re = save_p_re;
8417 }
8418
Bram Moolenaar6100d022016-10-02 16:51:57 +02008419 rex_in_use = rex_in_use_save;
8420 if (rex_in_use)
8421 rex = rex_save;
Bram Moolenaar0270f382018-07-17 05:43:58 +02008422 rmp->regprog->re_in_use = FALSE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02008423
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008424 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008425}