blob: 78d643174e7f97fab9a470e0c64073db8f181fd0 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100259no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260{
261 if (is_Magic(x))
262 return un_Magic(x);
263 return x;
264}
265
266 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100267toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268{
269 if (is_Magic(x))
270 return un_Magic(x);
271 return Magic(x);
272}
273
274/*
275 * The first byte of the regexp internal "program" is actually this magic
276 * number; the start node begins in the second byte. It's used to catch the
277 * most severe mutilation of the program by the caller.
278 */
279
280#define REGMAGIC 0234
281
282/*
283 * Opcode notes:
284 *
285 * BRANCH The set of branches constituting a single choice are hooked
286 * together with their "next" pointers, since precedence prevents
287 * anything being concatenated to any individual branch. The
288 * "next" pointer of the last BRANCH in a choice points to the
289 * thing following the whole choice. This is also where the
290 * final "next" pointer of each individual branch points; each
291 * branch starts with the operand node of a BRANCH node.
292 *
293 * BACK Normal "next" pointers all implicitly point forward; BACK
294 * exists to make loop structures possible.
295 *
296 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
297 * BRANCH structures using BACK. Simple cases (one character
298 * per match) are implemented with STAR and PLUS for speed
299 * and to minimize recursive plunges.
300 *
301 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
302 * node, and defines the min and max limits to be used for that
303 * node.
304 *
305 * MOPEN,MCLOSE ...are numbered at compile time.
306 * ZOPEN,ZCLOSE ...ditto
307 */
308
309/*
310 * A node is one char of opcode followed by two chars of "next" pointer.
311 * "Next" pointers are stored as two 8-bit bytes, high order first. The
312 * value is a positive offset from the opcode of the node containing it.
313 * An operand, if any, simply follows the node. (Note that much of the
314 * code generation knows about this implicit relationship.)
315 *
316 * Using two bytes for the "next" pointer is vast overkill for most things,
317 * but allows patterns to get big without disasters.
318 */
319#define OP(p) ((int)*(p))
320#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
321#define OPERAND(p) ((p) + 3)
322/* Obtain an operand that was stored as four bytes, MSB first. */
323#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
324 + ((long)(p)[5] << 8) + (long)(p)[6])
325/* Obtain a second operand stored as four bytes. */
326#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
327/* Obtain a second single-byte operand stored after a four bytes operand. */
328#define OPERAND_CMP(p) (p)[7]
329
330/*
331 * Utility definitions.
332 */
333#define UCHARAT(p) ((int)*(char_u *)(p))
334
335/* Used for an error (down from) vim_regcomp(): give the error message, set
336 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000337#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar95f09602016-11-10 20:01:45 +0100338#define IEMSG_RET_NULL(m) return (IEMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000339#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200340#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
341#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
342#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343
Bram Moolenaar95f09602016-11-10 20:01:45 +0100344
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345#define MAX_LIMIT (32767L << 16L)
346
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100347static int re_multi_type(int);
348static int cstrncmp(char_u *s1, char_u *s2, int *n);
349static char_u *cstrchr(char_u *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200351#ifdef BT_REGEXP_DUMP
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100352static void regdump(char_u *, bt_regprog_T *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200353#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354#ifdef DEBUG
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100355static char_u *regprop(char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356#endif
357
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static int re_mult_next(char *what);
Bram Moolenaarfb031402014-09-09 17:18:49 +0200359
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200360static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
361static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
362static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
363static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200364#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200365static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
366static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200367#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200368static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200369static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370#define NOT_MULTI 0
371#define MULTI_ONE 1
372#define MULTI_MULT 2
373/*
374 * Return NOT_MULTI if c is not a "multi" operator.
375 * Return MULTI_ONE if c is a single "multi" operator.
376 * Return MULTI_MULT if c is a multi "multi" operator.
377 */
378 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100379re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000380{
381 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
382 return MULTI_ONE;
383 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
384 return MULTI_MULT;
385 return NOT_MULTI;
386}
387
388/*
389 * Flags to be passed up and down.
390 */
391#define HASWIDTH 0x1 /* Known never to match null string. */
392#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
393#define SPSTART 0x4 /* Starts with * or +. */
394#define HASNL 0x8 /* Contains some \n. */
395#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
396#define WORST 0 /* Worst case. */
397
398/*
399 * When regcode is set to this value, code is not emitted and size is computed
400 * instead.
401 */
402#define JUST_CALC_SIZE ((char_u *) -1)
403
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000404static char_u *reg_prev_sub = NULL;
405
Bram Moolenaar071d4272004-06-13 20:20:40 +0000406/*
407 * REGEXP_INRANGE contains all characters which are always special in a []
408 * range after '\'.
409 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
410 * These are:
411 * \n - New line (NL).
412 * \r - Carriage Return (CR).
413 * \t - Tab (TAB).
414 * \e - Escape (ESC).
415 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000416 * \d - Character code in decimal, eg \d123
417 * \o - Character code in octal, eg \o80
418 * \x - Character code in hex, eg \x4a
419 * \u - Multibyte character code, eg \u20ac
420 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000421 */
422static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000423static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100425static int backslash_trans(int c);
426static int get_char_class(char_u **pp);
427static int get_equi_class(char_u **pp);
428static void reg_equi_class(int c);
429static int get_coll_element(char_u **pp);
430static char_u *skip_anyof(char_u *p);
431static void init_class_tab(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000432
433/*
434 * Translate '\x' to its control character, except "\n", which is Magic.
435 */
436 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100437backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000438{
439 switch (c)
440 {
441 case 'r': return CAR;
442 case 't': return TAB;
443 case 'e': return ESC;
444 case 'b': return BS;
445 }
446 return c;
447}
448
449/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000450 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
452 * recognized. Otherwise "pp" is advanced to after the item.
453 */
454 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100455get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000456{
457 static const char *(class_names[]) =
458 {
459 "alnum:]",
460#define CLASS_ALNUM 0
461 "alpha:]",
462#define CLASS_ALPHA 1
463 "blank:]",
464#define CLASS_BLANK 2
465 "cntrl:]",
466#define CLASS_CNTRL 3
467 "digit:]",
468#define CLASS_DIGIT 4
469 "graph:]",
470#define CLASS_GRAPH 5
471 "lower:]",
472#define CLASS_LOWER 6
473 "print:]",
474#define CLASS_PRINT 7
475 "punct:]",
476#define CLASS_PUNCT 8
477 "space:]",
478#define CLASS_SPACE 9
479 "upper:]",
480#define CLASS_UPPER 10
481 "xdigit:]",
482#define CLASS_XDIGIT 11
483 "tab:]",
484#define CLASS_TAB 12
485 "return:]",
486#define CLASS_RETURN 13
487 "backspace:]",
488#define CLASS_BACKSPACE 14
489 "escape:]",
490#define CLASS_ESCAPE 15
491 };
492#define CLASS_NONE 99
493 int i;
494
495 if ((*pp)[1] == ':')
496 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000497 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000498 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
499 {
500 *pp += STRLEN(class_names[i]) + 2;
501 return i;
502 }
503 }
504 return CLASS_NONE;
505}
506
507/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000508 * Specific version of character class functions.
509 * Using a table to keep this fast.
510 */
511static short class_tab[256];
512
513#define RI_DIGIT 0x01
514#define RI_HEX 0x02
515#define RI_OCTAL 0x04
516#define RI_WORD 0x08
517#define RI_HEAD 0x10
518#define RI_ALPHA 0x20
519#define RI_LOWER 0x40
520#define RI_UPPER 0x80
521#define RI_WHITE 0x100
522
523 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100524init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000525{
526 int i;
527 static int done = FALSE;
528
529 if (done)
530 return;
531
532 for (i = 0; i < 256; ++i)
533 {
534 if (i >= '0' && i <= '7')
535 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
536 else if (i >= '8' && i <= '9')
537 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
538 else if (i >= 'a' && i <= 'f')
539 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
540#ifdef EBCDIC
541 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
542 || (i >= 's' && i <= 'z'))
543#else
544 else if (i >= 'g' && i <= 'z')
545#endif
546 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
547 else if (i >= 'A' && i <= 'F')
548 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
549#ifdef EBCDIC
550 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
551 || (i >= 'S' && i <= 'Z'))
552#else
553 else if (i >= 'G' && i <= 'Z')
554#endif
555 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
556 else if (i == '_')
557 class_tab[i] = RI_WORD + RI_HEAD;
558 else
559 class_tab[i] = 0;
560 }
561 class_tab[' '] |= RI_WHITE;
562 class_tab['\t'] |= RI_WHITE;
563 done = TRUE;
564}
565
566#ifdef FEAT_MBYTE
567# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
568# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
569# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
570# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
571# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
572# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
573# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
574# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
575# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
576#else
577# define ri_digit(c) (class_tab[c] & RI_DIGIT)
578# define ri_hex(c) (class_tab[c] & RI_HEX)
579# define ri_octal(c) (class_tab[c] & RI_OCTAL)
580# define ri_word(c) (class_tab[c] & RI_WORD)
581# define ri_head(c) (class_tab[c] & RI_HEAD)
582# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
583# define ri_lower(c) (class_tab[c] & RI_LOWER)
584# define ri_upper(c) (class_tab[c] & RI_UPPER)
585# define ri_white(c) (class_tab[c] & RI_WHITE)
586#endif
587
588/* flags for regflags */
589#define RF_ICASE 1 /* ignore case */
590#define RF_NOICASE 2 /* don't ignore case */
591#define RF_HASNL 4 /* can match a NL */
592#define RF_ICOMBINE 8 /* ignore combining characters */
593#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
594
595/*
596 * Global work variables for vim_regcomp().
597 */
598
599static char_u *regparse; /* Input-scan pointer. */
600static int prevchr_len; /* byte length of previous char */
601static int num_complex_braces; /* Complex \{...} count */
602static int regnpar; /* () count. */
603#ifdef FEAT_SYN_HL
604static int regnzpar; /* \z() count. */
605static int re_has_z; /* \z item detected */
606#endif
607static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
608static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000609static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000610static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
611static unsigned regflags; /* RF_ flags for prog */
612static long brace_min[10]; /* Minimums for complex brace repeats */
613static long brace_max[10]; /* Maximums for complex brace repeats */
614static int brace_count[10]; /* Current counts for complex brace repeats */
615#if defined(FEAT_SYN_HL) || defined(PROTO)
616static int had_eol; /* TRUE when EOL found by vim_regcomp() */
617#endif
618static int one_exactly = FALSE; /* only do one char for EXACTLY */
619
620static int reg_magic; /* magicness of the pattern: */
621#define MAGIC_NONE 1 /* "\V" very unmagic */
622#define MAGIC_OFF 2 /* "\M" or 'magic' off */
623#define MAGIC_ON 3 /* "\m" or 'magic' */
624#define MAGIC_ALL 4 /* "\v" very magic */
625
626static int reg_string; /* matching with a string instead of a buffer
627 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000628static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000629
630/*
631 * META contains all characters that may be magic, except '^' and '$'.
632 */
633
634#ifdef EBCDIC
635static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
636#else
637/* META[] is used often enough to justify turning it into a table. */
638static char_u META_flags[] = {
639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
641/* % & ( ) * + . */
642 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
643/* 1 2 3 4 5 6 7 8 9 < = > ? */
644 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
645/* @ A C D F H I K L M O */
646 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
647/* P S U V W X Z [ _ */
648 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
649/* a c d f h i k l m n o */
650 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
651/* p s u v w x z { | ~ */
652 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
653};
654#endif
655
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200656static int curchr; /* currently parsed character */
657/* Previous character. Note: prevchr is sometimes -1 when we are not at the
658 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
659 * because ^ was taken to be magic -- webb */
660static int prevchr;
661static int prevprevchr; /* previous-previous character */
662static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000663
664/* arguments for reg() */
665#define REG_NOPAREN 0 /* toplevel reg() */
666#define REG_PAREN 1 /* \(\) */
667#define REG_ZPAREN 2 /* \z(\) */
668#define REG_NPAREN 3 /* \%(\) */
669
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200670typedef struct
671{
672 char_u *regparse;
673 int prevchr_len;
674 int curchr;
675 int prevchr;
676 int prevprevchr;
677 int nextchr;
678 int at_start;
679 int prev_at_start;
680 int regnpar;
681} parse_state_T;
682
Bram Moolenaar071d4272004-06-13 20:20:40 +0000683/*
684 * Forward declarations for vim_regcomp()'s friends.
685 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100686static void initchr(char_u *);
687static void save_parse_state(parse_state_T *ps);
688static void restore_parse_state(parse_state_T *ps);
689static int getchr(void);
690static void skipchr_keepstart(void);
691static int peekchr(void);
692static void skipchr(void);
693static void ungetchr(void);
694static int gethexchrs(int maxinputlen);
695static int getoctchrs(void);
696static int getdecchrs(void);
697static int coll_get_char(void);
698static void regcomp_start(char_u *expr, int flags);
699static char_u *reg(int, int *);
700static char_u *regbranch(int *flagp);
701static char_u *regconcat(int *flagp);
702static char_u *regpiece(int *);
703static char_u *regatom(int *);
704static char_u *regnode(int);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000705#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100706static int use_multibytecode(int c);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000707#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100708static int prog_magic_wrong(void);
709static char_u *regnext(char_u *);
710static void regc(int b);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000711#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100712static void regmbc(int c);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200713# define REGMBC(x) regmbc(x);
714# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000715#else
716# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200717# define REGMBC(x)
718# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000719#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100720static void reginsert(int, char_u *);
721static void reginsert_nr(int op, long val, char_u *opnd);
722static void reginsert_limits(int, long, long, char_u *);
723static char_u *re_put_long(char_u *pr, long_u val);
724static int read_limits(long *, long *);
725static void regtail(char_u *, char_u *);
726static void regoptail(char_u *, char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000727
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200728static regengine_T bt_regengine;
729static regengine_T nfa_regengine;
730
Bram Moolenaar071d4272004-06-13 20:20:40 +0000731/*
732 * Return TRUE if compiled regular expression "prog" can match a line break.
733 */
734 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100735re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000736{
737 return (prog->regflags & RF_HASNL);
738}
739
740/*
741 * Return TRUE if compiled regular expression "prog" looks before the start
742 * position (pattern contains "\@<=" or "\@<!").
743 */
744 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100745re_lookbehind(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000746{
747 return (prog->regflags & RF_LOOKBH);
748}
749
750/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000751 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
752 * Returns a character representing the class. Zero means that no item was
753 * recognized. Otherwise "pp" is advanced to after the item.
754 */
755 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100756get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000757{
758 int c;
759 int l = 1;
760 char_u *p = *pp;
761
762 if (p[1] == '=')
763 {
764#ifdef FEAT_MBYTE
765 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000766 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000767#endif
768 if (p[l + 2] == '=' && p[l + 3] == ']')
769 {
770#ifdef FEAT_MBYTE
771 if (has_mbyte)
772 c = mb_ptr2char(p + 2);
773 else
774#endif
775 c = p[2];
776 *pp += l + 4;
777 return c;
778 }
779 }
780 return 0;
781}
782
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200783#ifdef EBCDIC
784/*
785 * Table for equivalence class "c". (IBM-1047)
786 */
787char *EQUIVAL_CLASS_C[16] = {
788 "A\x62\x63\x64\x65\x66\x67",
789 "C\x68",
790 "E\x71\x72\x73\x74",
791 "I\x75\x76\x77\x78",
792 "N\x69",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200793 "O\xEB\xEC\xED\xEE\xEF\x80",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200794 "U\xFB\xFC\xFD\xFE",
795 "Y\xBA",
796 "a\x42\x43\x44\x45\x46\x47",
797 "c\x48",
798 "e\x51\x52\x53\x54",
799 "i\x55\x56\x57\x58",
800 "n\x49",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200801 "o\xCB\xCC\xCD\xCE\xCF\x70",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200802 "u\xDB\xDC\xDD\xDE",
803 "y\x8D\xDF",
804};
805#endif
806
Bram Moolenaardf177f62005-02-22 08:39:57 +0000807/*
808 * Produce the bytes for equivalence class "c".
809 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200810 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000811 */
812 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100813reg_equi_class(int c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000814{
815#ifdef FEAT_MBYTE
816 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000817 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000818#endif
819 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200820#ifdef EBCDIC
821 int i;
822
823 /* This might be slower than switch/case below. */
824 for (i = 0; i < 16; i++)
825 {
826 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
827 {
828 char *p = EQUIVAL_CLASS_C[i];
829
830 while (*p != 0)
831 regmbc(*p++);
832 return;
833 }
834 }
835#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000836 switch (c)
837 {
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200838 /* Do not use '\300' style, it results in a negative number. */
839 case 'A': case 0xc0: case 0xc1: case 0xc2:
840 case 0xc3: case 0xc4: case 0xc5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200841 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
842 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200843 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
844 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
845 regmbc(0xc5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200846 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
847 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
848 REGMBC(0x1ea2)
849 return;
850 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
851 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000852 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200853 case 'C': case 0xc7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200854 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200855 regmbc('C'); regmbc(0xc7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200856 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
857 REGMBC(0x10c)
858 return;
859 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
860 CASEMBC(0x1e0e) CASEMBC(0x1e10)
861 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
862 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000863 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200864 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200865 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
866 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200867 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
868 regmbc(0xca); regmbc(0xcb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200869 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
870 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
871 REGMBC(0x1ebc)
872 return;
873 case 'F': CASEMBC(0x1e1e)
874 regmbc('F'); REGMBC(0x1e1e)
875 return;
876 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
877 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
878 CASEMBC(0x1e20)
879 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
880 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
881 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
882 return;
883 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
884 CASEMBC(0x1e26) CASEMBC(0x1e28)
885 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
886 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000887 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200888 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200889 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
890 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200891 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
892 regmbc(0xce); regmbc(0xcf);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200893 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
894 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
895 REGMBC(0x1ec8)
896 return;
897 case 'J': CASEMBC(0x134)
898 regmbc('J'); REGMBC(0x134)
899 return;
900 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
901 CASEMBC(0x1e34)
902 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
903 REGMBC(0x1e30) REGMBC(0x1e34)
904 return;
905 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
906 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
907 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
908 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
909 REGMBC(0x1e3a)
910 return;
911 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
912 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000913 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200914 case 'N': case 0xd1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200915 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
916 CASEMBC(0x1e48)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200917 regmbc('N'); regmbc(0xd1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200918 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
919 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000920 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200921 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
922 case 0xd6: case 0xd8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200923 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
924 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200925 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
926 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
927 regmbc(0xd8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200928 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
929 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
930 REGMBC(0x1ec) REGMBC(0x1ece)
931 return;
932 case 'P': case 0x1e54: case 0x1e56:
933 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
934 return;
935 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
936 CASEMBC(0x1e58) CASEMBC(0x1e5e)
937 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
938 REGMBC(0x1e58) REGMBC(0x1e5e)
939 return;
940 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
941 CASEMBC(0x160) CASEMBC(0x1e60)
942 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
943 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
944 return;
945 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
946 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
947 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
948 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000949 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200950 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200951 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
952 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
953 CASEMBC(0x1ee6)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200954 regmbc('U'); regmbc(0xd9); regmbc(0xda);
955 regmbc(0xdb); regmbc(0xdc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200956 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
957 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
958 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
959 return;
960 case 'V': CASEMBC(0x1e7c)
961 regmbc('V'); REGMBC(0x1e7c)
962 return;
963 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
964 CASEMBC(0x1e84) CASEMBC(0x1e86)
965 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
966 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
967 return;
968 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
969 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000970 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200971 case 'Y': case 0xdd:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200972 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
973 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200974 regmbc('Y'); regmbc(0xdd);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200975 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
976 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
977 return;
978 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
979 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
980 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
981 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
982 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000983 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200984 case 'a': case 0xe0: case 0xe1: case 0xe2:
985 case 0xe3: case 0xe4: case 0xe5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200986 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
987 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200988 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
989 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
990 regmbc(0xe5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200991 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
992 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
993 REGMBC(0x1ea3)
994 return;
995 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
996 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000997 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200998 case 'c': case 0xe7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200999 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001000 regmbc('c'); regmbc(0xe7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001001 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
1002 REGMBC(0x10d)
1003 return;
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001004 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
1005 CASEMBC(0x1e0f) CASEMBC(0x1e11)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001006 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001007 REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001008 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001009 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001010 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1011 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001012 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
1013 regmbc(0xea); regmbc(0xeb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001014 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1015 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1016 REGMBC(0x1ebd)
1017 return;
1018 case 'f': CASEMBC(0x1e1f)
1019 regmbc('f'); REGMBC(0x1e1f)
1020 return;
1021 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1022 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1023 CASEMBC(0x1e21)
1024 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1025 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1026 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1027 return;
1028 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1029 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1030 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1031 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1032 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001033 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001034 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001035 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1036 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001037 regmbc('i'); regmbc(0xec); regmbc(0xed);
1038 regmbc(0xee); regmbc(0xef);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001039 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1040 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1041 return;
1042 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1043 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1044 return;
1045 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1046 CASEMBC(0x1e35)
1047 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1048 REGMBC(0x1e31) REGMBC(0x1e35)
1049 return;
1050 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1051 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1052 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1053 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1054 REGMBC(0x1e3b)
1055 return;
1056 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1057 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001058 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001059 case 'n': case 0xf1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001060 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1061 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001062 regmbc('n'); regmbc(0xf1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001063 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1064 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001065 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001066 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1067 case 0xf6: case 0xf8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001068 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1069 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001070 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1071 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1072 regmbc(0xf8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001073 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1074 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1075 REGMBC(0x1ed) REGMBC(0x1ecf)
1076 return;
1077 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1078 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1079 return;
1080 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1081 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1082 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1083 REGMBC(0x1e59) REGMBC(0x1e5f)
1084 return;
1085 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1086 CASEMBC(0x161) CASEMBC(0x1e61)
1087 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1088 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1089 return;
1090 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1091 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1092 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1093 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001094 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001095 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001096 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1097 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1098 CASEMBC(0x1ee7)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001099 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1100 regmbc(0xfb); regmbc(0xfc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001101 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1102 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1103 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1104 return;
1105 case 'v': CASEMBC(0x1e7d)
1106 regmbc('v'); REGMBC(0x1e7d)
1107 return;
1108 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1109 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1110 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1111 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1112 REGMBC(0x1e98)
1113 return;
1114 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1115 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001116 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001117 case 'y': case 0xfd: case 0xff:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001118 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1119 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001120 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001121 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1122 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1123 return;
1124 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1125 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1126 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1127 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1128 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001129 return;
1130 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001131#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001132 }
1133 regmbc(c);
1134}
1135
1136/*
1137 * Check for a collating element "[.a.]". "pp" points to the '['.
1138 * Returns a character. Zero means that no item was recognized. Otherwise
1139 * "pp" is advanced to after the item.
1140 * Currently only single characters are recognized!
1141 */
1142 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001143get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001144{
1145 int c;
1146 int l = 1;
1147 char_u *p = *pp;
1148
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001149 if (p[0] != NUL && p[1] == '.')
Bram Moolenaardf177f62005-02-22 08:39:57 +00001150 {
1151#ifdef FEAT_MBYTE
1152 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001153 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001154#endif
1155 if (p[l + 2] == '.' && p[l + 3] == ']')
1156 {
1157#ifdef FEAT_MBYTE
1158 if (has_mbyte)
1159 c = mb_ptr2char(p + 2);
1160 else
1161#endif
1162 c = p[2];
1163 *pp += l + 4;
1164 return c;
1165 }
1166 }
1167 return 0;
1168}
1169
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001170static void get_cpo_flags(void);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001171static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1172static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1173
1174 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001175get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001176{
1177 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1178 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1179}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001180
1181/*
1182 * Skip over a "[]" range.
1183 * "p" must point to the character after the '['.
1184 * The returned pointer is on the matching ']', or the terminating NUL.
1185 */
1186 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001187skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001188{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001189#ifdef FEAT_MBYTE
1190 int l;
1191#endif
1192
Bram Moolenaardf177f62005-02-22 08:39:57 +00001193 if (*p == '^') /* Complement of range. */
1194 ++p;
1195 if (*p == ']' || *p == '-')
1196 ++p;
1197 while (*p != NUL && *p != ']')
1198 {
1199#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001200 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001201 p += l;
1202 else
1203#endif
1204 if (*p == '-')
1205 {
1206 ++p;
1207 if (*p != ']' && *p != NUL)
1208 mb_ptr_adv(p);
1209 }
1210 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001211 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001212 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001213 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001214 p += 2;
1215 else if (*p == '[')
1216 {
1217 if (get_char_class(&p) == CLASS_NONE
1218 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001219 && get_coll_element(&p) == 0
1220 && *p != NUL)
1221 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001222 }
1223 else
1224 ++p;
1225 }
1226
1227 return p;
1228}
1229
1230/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001231 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001232 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001233 * Take care of characters with a backslash in front of it.
1234 * Skip strings inside [ and ].
1235 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1236 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1237 * is changed in-place.
1238 */
1239 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001240skip_regexp(
1241 char_u *startp,
1242 int dirc,
1243 int magic,
1244 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001245{
1246 int mymagic;
1247 char_u *p = startp;
1248
1249 if (magic)
1250 mymagic = MAGIC_ON;
1251 else
1252 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001253 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001254
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001255 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001256 {
1257 if (p[0] == dirc) /* found end of regexp */
1258 break;
1259 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1260 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1261 {
1262 p = skip_anyof(p + 1);
1263 if (p[0] == NUL)
1264 break;
1265 }
1266 else if (p[0] == '\\' && p[1] != NUL)
1267 {
1268 if (dirc == '?' && newp != NULL && p[1] == '?')
1269 {
1270 /* change "\?" to "?", make a copy first. */
1271 if (*newp == NULL)
1272 {
1273 *newp = vim_strsave(startp);
1274 if (*newp != NULL)
1275 p = *newp + (p - startp);
1276 }
1277 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001278 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001279 else
1280 ++p;
1281 }
1282 else
1283 ++p; /* skip next character */
1284 if (*p == 'v')
1285 mymagic = MAGIC_ALL;
1286 else if (*p == 'V')
1287 mymagic = MAGIC_NONE;
1288 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001289 }
1290 return p;
1291}
1292
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001293static regprog_T *bt_regcomp(char_u *expr, int re_flags);
1294static void bt_regfree(regprog_T *prog);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001295
Bram Moolenaar071d4272004-06-13 20:20:40 +00001296/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001297 * bt_regcomp() - compile a regular expression into internal code for the
1298 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001299 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001300 *
1301 * We can't allocate space until we know how big the compiled form will be,
1302 * but we can't compile it (and thus know how big it is) until we've got a
1303 * place to put the code. So we cheat: we compile it twice, once with code
1304 * generation turned off and size counting turned on, and once "for real".
1305 * This also means that we don't allocate space until we are sure that the
1306 * thing really will compile successfully, and we never have to move the
1307 * code and thus invalidate pointers into it. (Note that it has to be in
1308 * one piece because vim_free() must be able to free it all.)
1309 *
1310 * Whether upper/lower case is to be ignored is decided when executing the
1311 * program, it does not matter here.
1312 *
1313 * Beware that the optimization-preparation code in here knows about some
1314 * of the structure of the compiled regexp.
1315 * "re_flags": RE_MAGIC and/or RE_STRING.
1316 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001317 static regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001318bt_regcomp(char_u *expr, int re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001319{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001320 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001321 char_u *scan;
1322 char_u *longest;
1323 int len;
1324 int flags;
1325
1326 if (expr == NULL)
1327 EMSG_RET_NULL(_(e_null));
1328
1329 init_class_tab();
1330
1331 /*
1332 * First pass: determine size, legality.
1333 */
1334 regcomp_start(expr, re_flags);
1335 regcode = JUST_CALC_SIZE;
1336 regc(REGMAGIC);
1337 if (reg(REG_NOPAREN, &flags) == NULL)
1338 return NULL;
1339
Bram Moolenaar071d4272004-06-13 20:20:40 +00001340 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001341 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001342 if (r == NULL)
1343 return NULL;
1344
1345 /*
1346 * Second pass: emit code.
1347 */
1348 regcomp_start(expr, re_flags);
1349 regcode = r->program;
1350 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001351 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001352 {
1353 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001354 if (reg_toolong)
1355 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001356 return NULL;
1357 }
1358
1359 /* Dig out information for optimizations. */
1360 r->regstart = NUL; /* Worst-case defaults. */
1361 r->reganch = 0;
1362 r->regmust = NULL;
1363 r->regmlen = 0;
1364 r->regflags = regflags;
1365 if (flags & HASNL)
1366 r->regflags |= RF_HASNL;
1367 if (flags & HASLOOKBH)
1368 r->regflags |= RF_LOOKBH;
1369#ifdef FEAT_SYN_HL
1370 /* Remember whether this pattern has any \z specials in it. */
1371 r->reghasz = re_has_z;
1372#endif
1373 scan = r->program + 1; /* First BRANCH. */
1374 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1375 {
1376 scan = OPERAND(scan);
1377
1378 /* Starting-point info. */
1379 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1380 {
1381 r->reganch++;
1382 scan = regnext(scan);
1383 }
1384
1385 if (OP(scan) == EXACTLY)
1386 {
1387#ifdef FEAT_MBYTE
1388 if (has_mbyte)
1389 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1390 else
1391#endif
1392 r->regstart = *OPERAND(scan);
1393 }
1394 else if ((OP(scan) == BOW
1395 || OP(scan) == EOW
1396 || OP(scan) == NOTHING
1397 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1398 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1399 && OP(regnext(scan)) == EXACTLY)
1400 {
1401#ifdef FEAT_MBYTE
1402 if (has_mbyte)
1403 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1404 else
1405#endif
1406 r->regstart = *OPERAND(regnext(scan));
1407 }
1408
1409 /*
1410 * If there's something expensive in the r.e., find the longest
1411 * literal string that must appear and make it the regmust. Resolve
1412 * ties in favor of later strings, since the regstart check works
1413 * with the beginning of the r.e. and avoiding duplication
1414 * strengthens checking. Not a strong reason, but sufficient in the
1415 * absence of others.
1416 */
1417 /*
1418 * When the r.e. starts with BOW, it is faster to look for a regmust
1419 * first. Used a lot for "#" and "*" commands. (Added by mool).
1420 */
1421 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1422 && !(flags & HASNL))
1423 {
1424 longest = NULL;
1425 len = 0;
1426 for (; scan != NULL; scan = regnext(scan))
1427 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1428 {
1429 longest = OPERAND(scan);
1430 len = (int)STRLEN(OPERAND(scan));
1431 }
1432 r->regmust = longest;
1433 r->regmlen = len;
1434 }
1435 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001436#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001437 regdump(expr, r);
1438#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001439 r->engine = &bt_regengine;
1440 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001441}
1442
1443/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001444 * Free a compiled regexp program, returned by bt_regcomp().
1445 */
1446 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001447bt_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02001448{
1449 vim_free(prog);
1450}
1451
1452/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001453 * Setup to parse the regexp. Used once to get the length and once to do it.
1454 */
1455 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001456regcomp_start(
1457 char_u *expr,
1458 int re_flags) /* see vim_regcomp() */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001459{
1460 initchr(expr);
1461 if (re_flags & RE_MAGIC)
1462 reg_magic = MAGIC_ON;
1463 else
1464 reg_magic = MAGIC_OFF;
1465 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001466 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001467 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001468
1469 num_complex_braces = 0;
1470 regnpar = 1;
1471 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1472#ifdef FEAT_SYN_HL
1473 regnzpar = 1;
1474 re_has_z = 0;
1475#endif
1476 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001477 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001478 regflags = 0;
1479#if defined(FEAT_SYN_HL) || defined(PROTO)
1480 had_eol = FALSE;
1481#endif
1482}
1483
1484#if defined(FEAT_SYN_HL) || defined(PROTO)
1485/*
1486 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1487 * found. This is messy, but it works fine.
1488 */
1489 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001490vim_regcomp_had_eol(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001491{
1492 return had_eol;
1493}
1494#endif
1495
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001496/* variables for parsing reginput */
1497static int at_start; /* True when on the first character */
1498static int prev_at_start; /* True when on the second character */
1499
Bram Moolenaar071d4272004-06-13 20:20:40 +00001500/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001501 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001502 *
1503 * Caller must absorb opening parenthesis.
1504 *
1505 * Combining parenthesis handling with the base level of regular expression
1506 * is a trifle forced, but the need to tie the tails of the branches to what
1507 * follows makes it hard to avoid.
1508 */
1509 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001510reg(
1511 int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1512 int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001513{
1514 char_u *ret;
1515 char_u *br;
1516 char_u *ender;
1517 int parno = 0;
1518 int flags;
1519
1520 *flagp = HASWIDTH; /* Tentatively. */
1521
1522#ifdef FEAT_SYN_HL
1523 if (paren == REG_ZPAREN)
1524 {
1525 /* Make a ZOPEN node. */
1526 if (regnzpar >= NSUBEXP)
1527 EMSG_RET_NULL(_("E50: Too many \\z("));
1528 parno = regnzpar;
1529 regnzpar++;
1530 ret = regnode(ZOPEN + parno);
1531 }
1532 else
1533#endif
1534 if (paren == REG_PAREN)
1535 {
1536 /* Make a MOPEN node. */
1537 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001538 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001539 parno = regnpar;
1540 ++regnpar;
1541 ret = regnode(MOPEN + parno);
1542 }
1543 else if (paren == REG_NPAREN)
1544 {
1545 /* Make a NOPEN node. */
1546 ret = regnode(NOPEN);
1547 }
1548 else
1549 ret = NULL;
1550
1551 /* Pick up the branches, linking them together. */
1552 br = regbranch(&flags);
1553 if (br == NULL)
1554 return NULL;
1555 if (ret != NULL)
1556 regtail(ret, br); /* [MZ]OPEN -> first. */
1557 else
1558 ret = br;
1559 /* If one of the branches can be zero-width, the whole thing can.
1560 * If one of the branches has * at start or matches a line-break, the
1561 * whole thing can. */
1562 if (!(flags & HASWIDTH))
1563 *flagp &= ~HASWIDTH;
1564 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1565 while (peekchr() == Magic('|'))
1566 {
1567 skipchr();
1568 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001569 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570 return NULL;
1571 regtail(ret, br); /* BRANCH -> BRANCH. */
1572 if (!(flags & HASWIDTH))
1573 *flagp &= ~HASWIDTH;
1574 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1575 }
1576
1577 /* Make a closing node, and hook it on the end. */
1578 ender = regnode(
1579#ifdef FEAT_SYN_HL
1580 paren == REG_ZPAREN ? ZCLOSE + parno :
1581#endif
1582 paren == REG_PAREN ? MCLOSE + parno :
1583 paren == REG_NPAREN ? NCLOSE : END);
1584 regtail(ret, ender);
1585
1586 /* Hook the tails of the branches to the closing node. */
1587 for (br = ret; br != NULL; br = regnext(br))
1588 regoptail(br, ender);
1589
1590 /* Check for proper termination. */
1591 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1592 {
1593#ifdef FEAT_SYN_HL
1594 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001595 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001596 else
1597#endif
1598 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001599 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001600 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001601 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001602 }
1603 else if (paren == REG_NOPAREN && peekchr() != NUL)
1604 {
1605 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001606 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001607 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001608 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609 /* NOTREACHED */
1610 }
1611 /*
1612 * Here we set the flag allowing back references to this set of
1613 * parentheses.
1614 */
1615 if (paren == REG_PAREN)
1616 had_endbrace[parno] = TRUE; /* have seen the close paren */
1617 return ret;
1618}
1619
1620/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001621 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001622 * Implements the & operator.
1623 */
1624 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001625regbranch(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001626{
1627 char_u *ret;
1628 char_u *chain = NULL;
1629 char_u *latest;
1630 int flags;
1631
1632 *flagp = WORST | HASNL; /* Tentatively. */
1633
1634 ret = regnode(BRANCH);
1635 for (;;)
1636 {
1637 latest = regconcat(&flags);
1638 if (latest == NULL)
1639 return NULL;
1640 /* If one of the branches has width, the whole thing has. If one of
1641 * the branches anchors at start-of-line, the whole thing does.
1642 * If one of the branches uses look-behind, the whole thing does. */
1643 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1644 /* If one of the branches doesn't match a line-break, the whole thing
1645 * doesn't. */
1646 *flagp &= ~HASNL | (flags & HASNL);
1647 if (chain != NULL)
1648 regtail(chain, latest);
1649 if (peekchr() != Magic('&'))
1650 break;
1651 skipchr();
1652 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001653 if (reg_toolong)
1654 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001655 reginsert(MATCH, latest);
1656 chain = latest;
1657 }
1658
1659 return ret;
1660}
1661
1662/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001663 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001664 * Implements the concatenation operator.
1665 */
1666 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001667regconcat(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001668{
1669 char_u *first = NULL;
1670 char_u *chain = NULL;
1671 char_u *latest;
1672 int flags;
1673 int cont = TRUE;
1674
1675 *flagp = WORST; /* Tentatively. */
1676
1677 while (cont)
1678 {
1679 switch (peekchr())
1680 {
1681 case NUL:
1682 case Magic('|'):
1683 case Magic('&'):
1684 case Magic(')'):
1685 cont = FALSE;
1686 break;
1687 case Magic('Z'):
1688#ifdef FEAT_MBYTE
1689 regflags |= RF_ICOMBINE;
1690#endif
1691 skipchr_keepstart();
1692 break;
1693 case Magic('c'):
1694 regflags |= RF_ICASE;
1695 skipchr_keepstart();
1696 break;
1697 case Magic('C'):
1698 regflags |= RF_NOICASE;
1699 skipchr_keepstart();
1700 break;
1701 case Magic('v'):
1702 reg_magic = MAGIC_ALL;
1703 skipchr_keepstart();
1704 curchr = -1;
1705 break;
1706 case Magic('m'):
1707 reg_magic = MAGIC_ON;
1708 skipchr_keepstart();
1709 curchr = -1;
1710 break;
1711 case Magic('M'):
1712 reg_magic = MAGIC_OFF;
1713 skipchr_keepstart();
1714 curchr = -1;
1715 break;
1716 case Magic('V'):
1717 reg_magic = MAGIC_NONE;
1718 skipchr_keepstart();
1719 curchr = -1;
1720 break;
1721 default:
1722 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001723 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001724 return NULL;
1725 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1726 if (chain == NULL) /* First piece. */
1727 *flagp |= flags & SPSTART;
1728 else
1729 regtail(chain, latest);
1730 chain = latest;
1731 if (first == NULL)
1732 first = latest;
1733 break;
1734 }
1735 }
1736 if (first == NULL) /* Loop ran zero times. */
1737 first = regnode(NOTHING);
1738 return first;
1739}
1740
1741/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001742 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001743 *
1744 * Note that the branching code sequences used for = and the general cases
1745 * of * and + are somewhat optimized: they use the same NOTHING node as
1746 * both the endmarker for their branch list and the body of the last branch.
1747 * It might seem that this node could be dispensed with entirely, but the
1748 * endmarker role is not redundant.
1749 */
1750 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001751regpiece(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001752{
1753 char_u *ret;
1754 int op;
1755 char_u *next;
1756 int flags;
1757 long minval;
1758 long maxval;
1759
1760 ret = regatom(&flags);
1761 if (ret == NULL)
1762 return NULL;
1763
1764 op = peekchr();
1765 if (re_multi_type(op) == NOT_MULTI)
1766 {
1767 *flagp = flags;
1768 return ret;
1769 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001770 /* default flags */
1771 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1772
1773 skipchr();
1774 switch (op)
1775 {
1776 case Magic('*'):
1777 if (flags & SIMPLE)
1778 reginsert(STAR, ret);
1779 else
1780 {
1781 /* Emit x* as (x&|), where & means "self". */
1782 reginsert(BRANCH, ret); /* Either x */
1783 regoptail(ret, regnode(BACK)); /* and loop */
1784 regoptail(ret, ret); /* back */
1785 regtail(ret, regnode(BRANCH)); /* or */
1786 regtail(ret, regnode(NOTHING)); /* null. */
1787 }
1788 break;
1789
1790 case Magic('+'):
1791 if (flags & SIMPLE)
1792 reginsert(PLUS, ret);
1793 else
1794 {
1795 /* Emit x+ as x(&|), where & means "self". */
1796 next = regnode(BRANCH); /* Either */
1797 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001798 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001799 regtail(next, regnode(BRANCH)); /* or */
1800 regtail(ret, regnode(NOTHING)); /* null. */
1801 }
1802 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1803 break;
1804
1805 case Magic('@'):
1806 {
1807 int lop = END;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001808 int nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001809
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001810 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811 switch (no_Magic(getchr()))
1812 {
1813 case '=': lop = MATCH; break; /* \@= */
1814 case '!': lop = NOMATCH; break; /* \@! */
1815 case '>': lop = SUBPAT; break; /* \@> */
1816 case '<': switch (no_Magic(getchr()))
1817 {
1818 case '=': lop = BEHIND; break; /* \@<= */
1819 case '!': lop = NOBEHIND; break; /* \@<! */
1820 }
1821 }
1822 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001823 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001824 reg_magic == MAGIC_ALL);
1825 /* Look behind must match with behind_pos. */
1826 if (lop == BEHIND || lop == NOBEHIND)
1827 {
1828 regtail(ret, regnode(BHPOS));
1829 *flagp |= HASLOOKBH;
1830 }
1831 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001832 if (lop == BEHIND || lop == NOBEHIND)
1833 {
1834 if (nr < 0)
1835 nr = 0; /* no limit is same as zero limit */
1836 reginsert_nr(lop, nr, ret);
1837 }
1838 else
1839 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001840 break;
1841 }
1842
1843 case Magic('?'):
1844 case Magic('='):
1845 /* Emit x= as (x|) */
1846 reginsert(BRANCH, ret); /* Either x */
1847 regtail(ret, regnode(BRANCH)); /* or */
1848 next = regnode(NOTHING); /* null. */
1849 regtail(ret, next);
1850 regoptail(ret, next);
1851 break;
1852
1853 case Magic('{'):
1854 if (!read_limits(&minval, &maxval))
1855 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001856 if (flags & SIMPLE)
1857 {
1858 reginsert(BRACE_SIMPLE, ret);
1859 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1860 }
1861 else
1862 {
1863 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001864 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001865 reg_magic == MAGIC_ALL);
1866 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1867 regoptail(ret, regnode(BACK));
1868 regoptail(ret, ret);
1869 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1870 ++num_complex_braces;
1871 }
1872 if (minval > 0 && maxval > 0)
1873 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1874 break;
1875 }
1876 if (re_multi_type(peekchr()) != NOT_MULTI)
1877 {
1878 /* Can't have a multi follow a multi. */
1879 if (peekchr() == Magic('*'))
1880 sprintf((char *)IObuff, _("E61: Nested %s*"),
1881 reg_magic >= MAGIC_ON ? "" : "\\");
1882 else
1883 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1884 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1885 EMSG_RET_NULL(IObuff);
1886 }
1887
1888 return ret;
1889}
1890
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001891/* When making changes to classchars also change nfa_classcodes. */
1892static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1893static int classcodes[] = {
1894 ANY, IDENT, SIDENT, KWORD, SKWORD,
1895 FNAME, SFNAME, PRINT, SPRINT,
1896 WHITE, NWHITE, DIGIT, NDIGIT,
1897 HEX, NHEX, OCTAL, NOCTAL,
1898 WORD, NWORD, HEAD, NHEAD,
1899 ALPHA, NALPHA, LOWER, NLOWER,
1900 UPPER, NUPPER
1901};
1902
Bram Moolenaar071d4272004-06-13 20:20:40 +00001903/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001904 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001905 *
1906 * Optimization: gobbles an entire sequence of ordinary characters so that
1907 * it can turn them into a single node, which is smaller to store and
1908 * faster to run. Don't do this when one_exactly is set.
1909 */
1910 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001911regatom(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001912{
1913 char_u *ret;
1914 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001915 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001916 char_u *p;
1917 int extra = 0;
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001918 int save_prev_at_start = prev_at_start;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001919
1920 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001921
1922 c = getchr();
1923 switch (c)
1924 {
1925 case Magic('^'):
1926 ret = regnode(BOL);
1927 break;
1928
1929 case Magic('$'):
1930 ret = regnode(EOL);
1931#if defined(FEAT_SYN_HL) || defined(PROTO)
1932 had_eol = TRUE;
1933#endif
1934 break;
1935
1936 case Magic('<'):
1937 ret = regnode(BOW);
1938 break;
1939
1940 case Magic('>'):
1941 ret = regnode(EOW);
1942 break;
1943
1944 case Magic('_'):
1945 c = no_Magic(getchr());
1946 if (c == '^') /* "\_^" is start-of-line */
1947 {
1948 ret = regnode(BOL);
1949 break;
1950 }
1951 if (c == '$') /* "\_$" is end-of-line */
1952 {
1953 ret = regnode(EOL);
1954#if defined(FEAT_SYN_HL) || defined(PROTO)
1955 had_eol = TRUE;
1956#endif
1957 break;
1958 }
1959
1960 extra = ADD_NL;
1961 *flagp |= HASNL;
1962
1963 /* "\_[" is character range plus newline */
1964 if (c == '[')
1965 goto collection;
1966
1967 /* "\_x" is character class plus newline */
1968 /*FALLTHROUGH*/
1969
1970 /*
1971 * Character classes.
1972 */
1973 case Magic('.'):
1974 case Magic('i'):
1975 case Magic('I'):
1976 case Magic('k'):
1977 case Magic('K'):
1978 case Magic('f'):
1979 case Magic('F'):
1980 case Magic('p'):
1981 case Magic('P'):
1982 case Magic('s'):
1983 case Magic('S'):
1984 case Magic('d'):
1985 case Magic('D'):
1986 case Magic('x'):
1987 case Magic('X'):
1988 case Magic('o'):
1989 case Magic('O'):
1990 case Magic('w'):
1991 case Magic('W'):
1992 case Magic('h'):
1993 case Magic('H'):
1994 case Magic('a'):
1995 case Magic('A'):
1996 case Magic('l'):
1997 case Magic('L'):
1998 case Magic('u'):
1999 case Magic('U'):
2000 p = vim_strchr(classchars, no_Magic(c));
2001 if (p == NULL)
2002 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002003#ifdef FEAT_MBYTE
2004 /* When '.' is followed by a composing char ignore the dot, so that
2005 * the composing char is matched here. */
2006 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2007 {
2008 c = getchr();
2009 goto do_multibyte;
2010 }
2011#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002012 ret = regnode(classcodes[p - classchars] + extra);
2013 *flagp |= HASWIDTH | SIMPLE;
2014 break;
2015
2016 case Magic('n'):
2017 if (reg_string)
2018 {
2019 /* In a string "\n" matches a newline character. */
2020 ret = regnode(EXACTLY);
2021 regc(NL);
2022 regc(NUL);
2023 *flagp |= HASWIDTH | SIMPLE;
2024 }
2025 else
2026 {
2027 /* In buffer text "\n" matches the end of a line. */
2028 ret = regnode(NEWL);
2029 *flagp |= HASWIDTH | HASNL;
2030 }
2031 break;
2032
2033 case Magic('('):
2034 if (one_exactly)
2035 EMSG_ONE_RET_NULL;
2036 ret = reg(REG_PAREN, &flags);
2037 if (ret == NULL)
2038 return NULL;
2039 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2040 break;
2041
2042 case NUL:
2043 case Magic('|'):
2044 case Magic('&'):
2045 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002046 if (one_exactly)
2047 EMSG_ONE_RET_NULL;
Bram Moolenaar95f09602016-11-10 20:01:45 +01002048 IEMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002049 /* NOTREACHED */
2050
2051 case Magic('='):
2052 case Magic('?'):
2053 case Magic('+'):
2054 case Magic('@'):
2055 case Magic('{'):
2056 case Magic('*'):
2057 c = no_Magic(c);
2058 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2059 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2060 ? "" : "\\", c);
2061 EMSG_RET_NULL(IObuff);
2062 /* NOTREACHED */
2063
2064 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002065 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002066 {
2067 char_u *lp;
2068
2069 ret = regnode(EXACTLY);
2070 lp = reg_prev_sub;
2071 while (*lp != NUL)
2072 regc(*lp++);
2073 regc(NUL);
2074 if (*reg_prev_sub != NUL)
2075 {
2076 *flagp |= HASWIDTH;
2077 if ((lp - reg_prev_sub) == 1)
2078 *flagp |= SIMPLE;
2079 }
2080 }
2081 else
2082 EMSG_RET_NULL(_(e_nopresub));
2083 break;
2084
2085 case Magic('1'):
2086 case Magic('2'):
2087 case Magic('3'):
2088 case Magic('4'):
2089 case Magic('5'):
2090 case Magic('6'):
2091 case Magic('7'):
2092 case Magic('8'):
2093 case Magic('9'):
2094 {
2095 int refnum;
2096
2097 refnum = c - Magic('0');
2098 /*
2099 * Check if the back reference is legal. We must have seen the
2100 * close brace.
2101 * TODO: Should also check that we don't refer to something
2102 * that is repeated (+*=): what instance of the repetition
2103 * should we match?
2104 */
2105 if (!had_endbrace[refnum])
2106 {
2107 /* Trick: check if "@<=" or "@<!" follows, in which case
2108 * the \1 can appear before the referenced match. */
2109 for (p = regparse; *p != NUL; ++p)
2110 if (p[0] == '@' && p[1] == '<'
2111 && (p[2] == '!' || p[2] == '='))
2112 break;
2113 if (*p == NUL)
2114 EMSG_RET_NULL(_("E65: Illegal back reference"));
2115 }
2116 ret = regnode(BACKREF + refnum);
2117 }
2118 break;
2119
Bram Moolenaar071d4272004-06-13 20:20:40 +00002120 case Magic('z'):
2121 {
2122 c = no_Magic(getchr());
2123 switch (c)
2124 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002125#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002126 case '(': if (reg_do_extmatch != REX_SET)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002127 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002128 if (one_exactly)
2129 EMSG_ONE_RET_NULL;
2130 ret = reg(REG_ZPAREN, &flags);
2131 if (ret == NULL)
2132 return NULL;
2133 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2134 re_has_z = REX_SET;
2135 break;
2136
2137 case '1':
2138 case '2':
2139 case '3':
2140 case '4':
2141 case '5':
2142 case '6':
2143 case '7':
2144 case '8':
2145 case '9': if (reg_do_extmatch != REX_USE)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002146 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002147 ret = regnode(ZREF + c - '0');
2148 re_has_z = REX_USE;
2149 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002150#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002151
2152 case 's': ret = regnode(MOPEN + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002153 if (re_mult_next("\\zs") == FAIL)
2154 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002155 break;
2156
2157 case 'e': ret = regnode(MCLOSE + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002158 if (re_mult_next("\\ze") == FAIL)
2159 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002160 break;
2161
2162 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2163 }
2164 }
2165 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002166
2167 case Magic('%'):
2168 {
2169 c = no_Magic(getchr());
2170 switch (c)
2171 {
2172 /* () without a back reference */
2173 case '(':
2174 if (one_exactly)
2175 EMSG_ONE_RET_NULL;
2176 ret = reg(REG_NPAREN, &flags);
2177 if (ret == NULL)
2178 return NULL;
2179 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2180 break;
2181
2182 /* Catch \%^ and \%$ regardless of where they appear in the
2183 * pattern -- regardless of whether or not it makes sense. */
2184 case '^':
2185 ret = regnode(RE_BOF);
2186 break;
2187
2188 case '$':
2189 ret = regnode(RE_EOF);
2190 break;
2191
2192 case '#':
2193 ret = regnode(CURSOR);
2194 break;
2195
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002196 case 'V':
2197 ret = regnode(RE_VISUAL);
2198 break;
2199
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002200 case 'C':
2201 ret = regnode(RE_COMPOSING);
2202 break;
2203
Bram Moolenaar071d4272004-06-13 20:20:40 +00002204 /* \%[abc]: Emit as a list of branches, all ending at the last
2205 * branch which matches nothing. */
2206 case '[':
2207 if (one_exactly) /* doesn't nest */
2208 EMSG_ONE_RET_NULL;
2209 {
2210 char_u *lastbranch;
2211 char_u *lastnode = NULL;
2212 char_u *br;
2213
2214 ret = NULL;
2215 while ((c = getchr()) != ']')
2216 {
2217 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002218 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002219 reg_magic == MAGIC_ALL);
2220 br = regnode(BRANCH);
2221 if (ret == NULL)
2222 ret = br;
2223 else
2224 regtail(lastnode, br);
2225
2226 ungetchr();
2227 one_exactly = TRUE;
2228 lastnode = regatom(flagp);
2229 one_exactly = FALSE;
2230 if (lastnode == NULL)
2231 return NULL;
2232 }
2233 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002234 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002235 reg_magic == MAGIC_ALL);
2236 lastbranch = regnode(BRANCH);
2237 br = regnode(NOTHING);
2238 if (ret != JUST_CALC_SIZE)
2239 {
2240 regtail(lastnode, br);
2241 regtail(lastbranch, br);
2242 /* connect all branches to the NOTHING
2243 * branch at the end */
2244 for (br = ret; br != lastnode; )
2245 {
2246 if (OP(br) == BRANCH)
2247 {
2248 regtail(br, lastbranch);
2249 br = OPERAND(br);
2250 }
2251 else
2252 br = regnext(br);
2253 }
2254 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002255 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002256 break;
2257 }
2258
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002259 case 'd': /* %d123 decimal */
2260 case 'o': /* %o123 octal */
2261 case 'x': /* %xab hex 2 */
2262 case 'u': /* %uabcd hex 4 */
2263 case 'U': /* %U1234abcd hex 8 */
2264 {
2265 int i;
2266
2267 switch (c)
2268 {
2269 case 'd': i = getdecchrs(); break;
2270 case 'o': i = getoctchrs(); break;
2271 case 'x': i = gethexchrs(2); break;
2272 case 'u': i = gethexchrs(4); break;
2273 case 'U': i = gethexchrs(8); break;
2274 default: i = -1; break;
2275 }
2276
2277 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002278 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002279 _("E678: Invalid character after %s%%[dxouU]"),
2280 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002281#ifdef FEAT_MBYTE
2282 if (use_multibytecode(i))
2283 ret = regnode(MULTIBYTECODE);
2284 else
2285#endif
2286 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002287 if (i == 0)
2288 regc(0x0a);
2289 else
2290#ifdef FEAT_MBYTE
2291 regmbc(i);
2292#else
2293 regc(i);
2294#endif
2295 regc(NUL);
2296 *flagp |= HASWIDTH;
2297 break;
2298 }
2299
Bram Moolenaar071d4272004-06-13 20:20:40 +00002300 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002301 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2302 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002303 {
2304 long_u n = 0;
2305 int cmp;
2306
2307 cmp = c;
2308 if (cmp == '<' || cmp == '>')
2309 c = getchr();
2310 while (VIM_ISDIGIT(c))
2311 {
2312 n = n * 10 + (c - '0');
2313 c = getchr();
2314 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002315 if (c == '\'' && n == 0)
2316 {
2317 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2318 c = getchr();
2319 ret = regnode(RE_MARK);
2320 if (ret == JUST_CALC_SIZE)
2321 regsize += 2;
2322 else
2323 {
2324 *regcode++ = c;
2325 *regcode++ = cmp;
2326 }
2327 break;
2328 }
2329 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002330 {
2331 if (c == 'l')
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002332 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002333 ret = regnode(RE_LNUM);
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002334 if (save_prev_at_start)
2335 at_start = TRUE;
2336 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002337 else if (c == 'c')
2338 ret = regnode(RE_COL);
2339 else
2340 ret = regnode(RE_VCOL);
2341 if (ret == JUST_CALC_SIZE)
2342 regsize += 5;
2343 else
2344 {
2345 /* put the number and the optional
2346 * comparator after the opcode */
2347 regcode = re_put_long(regcode, n);
2348 *regcode++ = cmp;
2349 }
2350 break;
2351 }
2352 }
2353
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002354 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002355 reg_magic == MAGIC_ALL);
2356 }
2357 }
2358 break;
2359
2360 case Magic('['):
2361collection:
2362 {
2363 char_u *lp;
2364
2365 /*
2366 * If there is no matching ']', we assume the '[' is a normal
2367 * character. This makes 'incsearch' and ":help [" work.
2368 */
2369 lp = skip_anyof(regparse);
2370 if (*lp == ']') /* there is a matching ']' */
2371 {
2372 int startc = -1; /* > 0 when next '-' is a range */
2373 int endc;
2374
2375 /*
2376 * In a character class, different parsing rules apply.
2377 * Not even \ is special anymore, nothing is.
2378 */
2379 if (*regparse == '^') /* Complement of range. */
2380 {
2381 ret = regnode(ANYBUT + extra);
2382 regparse++;
2383 }
2384 else
2385 ret = regnode(ANYOF + extra);
2386
2387 /* At the start ']' and '-' mean the literal character. */
2388 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002389 {
2390 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002391 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002392 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002393
2394 while (*regparse != NUL && *regparse != ']')
2395 {
2396 if (*regparse == '-')
2397 {
2398 ++regparse;
2399 /* The '-' is not used for a range at the end and
2400 * after or before a '\n'. */
2401 if (*regparse == ']' || *regparse == NUL
2402 || startc == -1
2403 || (regparse[0] == '\\' && regparse[1] == 'n'))
2404 {
2405 regc('-');
2406 startc = '-'; /* [--x] is a range */
2407 }
2408 else
2409 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002410 /* Also accept "a-[.z.]" */
2411 endc = 0;
2412 if (*regparse == '[')
2413 endc = get_coll_element(&regparse);
2414 if (endc == 0)
2415 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002416#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002417 if (has_mbyte)
2418 endc = mb_ptr2char_adv(&regparse);
2419 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002420#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002421 endc = *regparse++;
2422 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002423
2424 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002425 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002426 endc = coll_get_char();
2427
Bram Moolenaar071d4272004-06-13 20:20:40 +00002428 if (startc > endc)
2429 EMSG_RET_NULL(_(e_invrange));
2430#ifdef FEAT_MBYTE
2431 if (has_mbyte && ((*mb_char2len)(startc) > 1
2432 || (*mb_char2len)(endc) > 1))
2433 {
2434 /* Limit to a range of 256 chars */
2435 if (endc > startc + 256)
2436 EMSG_RET_NULL(_(e_invrange));
2437 while (++startc <= endc)
2438 regmbc(startc);
2439 }
2440 else
2441#endif
2442 {
2443#ifdef EBCDIC
2444 int alpha_only = FALSE;
2445
2446 /* for alphabetical range skip the gaps
2447 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2448 if (isalpha(startc) && isalpha(endc))
2449 alpha_only = TRUE;
2450#endif
2451 while (++startc <= endc)
2452#ifdef EBCDIC
2453 if (!alpha_only || isalpha(startc))
2454#endif
2455 regc(startc);
2456 }
2457 startc = -1;
2458 }
2459 }
2460 /*
2461 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2462 * accepts "\t", "\e", etc., but only when the 'l' flag in
2463 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002464 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002465 */
2466 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002467 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002468 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002469 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002470 && vim_strchr(REGEXP_ABBR,
2471 regparse[1]) != NULL)))
2472 {
2473 regparse++;
2474 if (*regparse == 'n')
2475 {
2476 /* '\n' in range: also match NL */
2477 if (ret != JUST_CALC_SIZE)
2478 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002479 /* Using \n inside [^] does not change what
2480 * matches. "[^\n]" is the same as ".". */
2481 if (*ret == ANYOF)
2482 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002483 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002484 *flagp |= HASNL;
2485 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002486 /* else: must have had a \n already */
2487 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002488 regparse++;
2489 startc = -1;
2490 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002491 else if (*regparse == 'd'
2492 || *regparse == 'o'
2493 || *regparse == 'x'
2494 || *regparse == 'u'
2495 || *regparse == 'U')
2496 {
2497 startc = coll_get_char();
2498 if (startc == 0)
2499 regc(0x0a);
2500 else
2501#ifdef FEAT_MBYTE
2502 regmbc(startc);
2503#else
2504 regc(startc);
2505#endif
2506 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002507 else
2508 {
2509 startc = backslash_trans(*regparse++);
2510 regc(startc);
2511 }
2512 }
2513 else if (*regparse == '[')
2514 {
2515 int c_class;
2516 int cu;
2517
Bram Moolenaardf177f62005-02-22 08:39:57 +00002518 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002519 startc = -1;
2520 /* Characters assumed to be 8 bits! */
2521 switch (c_class)
2522 {
2523 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002524 c_class = get_equi_class(&regparse);
2525 if (c_class != 0)
2526 {
2527 /* produce equivalence class */
2528 reg_equi_class(c_class);
2529 }
2530 else if ((c_class =
2531 get_coll_element(&regparse)) != 0)
2532 {
2533 /* produce a collating element */
2534 regmbc(c_class);
2535 }
2536 else
2537 {
2538 /* literal '[', allow [[-x] as a range */
2539 startc = *regparse++;
2540 regc(startc);
2541 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002542 break;
2543 case CLASS_ALNUM:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002544 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002545 if (isalnum(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002546 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002547 break;
2548 case CLASS_ALPHA:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002549 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002550 if (isalpha(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002551 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002552 break;
2553 case CLASS_BLANK:
2554 regc(' ');
2555 regc('\t');
2556 break;
2557 case CLASS_CNTRL:
2558 for (cu = 1; cu <= 255; cu++)
2559 if (iscntrl(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002560 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002561 break;
2562 case CLASS_DIGIT:
2563 for (cu = 1; cu <= 255; cu++)
2564 if (VIM_ISDIGIT(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002565 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002566 break;
2567 case CLASS_GRAPH:
2568 for (cu = 1; cu <= 255; cu++)
2569 if (isgraph(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002570 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002571 break;
2572 case CLASS_LOWER:
2573 for (cu = 1; cu <= 255; cu++)
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002574 if (MB_ISLOWER(cu) && cu != 170
2575 && cu != 186)
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002576 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002577 break;
2578 case CLASS_PRINT:
2579 for (cu = 1; cu <= 255; cu++)
2580 if (vim_isprintc(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002581 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002582 break;
2583 case CLASS_PUNCT:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002584 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002585 if (ispunct(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002586 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002587 break;
2588 case CLASS_SPACE:
2589 for (cu = 9; cu <= 13; cu++)
2590 regc(cu);
2591 regc(' ');
2592 break;
2593 case CLASS_UPPER:
2594 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002595 if (MB_ISUPPER(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002596 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002597 break;
2598 case CLASS_XDIGIT:
2599 for (cu = 1; cu <= 255; cu++)
2600 if (vim_isxdigit(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002601 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002602 break;
2603 case CLASS_TAB:
2604 regc('\t');
2605 break;
2606 case CLASS_RETURN:
2607 regc('\r');
2608 break;
2609 case CLASS_BACKSPACE:
2610 regc('\b');
2611 break;
2612 case CLASS_ESCAPE:
2613 regc('\033');
2614 break;
2615 }
2616 }
2617 else
2618 {
2619#ifdef FEAT_MBYTE
2620 if (has_mbyte)
2621 {
2622 int len;
2623
2624 /* produce a multibyte character, including any
2625 * following composing characters */
2626 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002627 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002628 if (enc_utf8 && utf_char2len(startc) != len)
2629 startc = -1; /* composing chars */
2630 while (--len >= 0)
2631 regc(*regparse++);
2632 }
2633 else
2634#endif
2635 {
2636 startc = *regparse++;
2637 regc(startc);
2638 }
2639 }
2640 }
2641 regc(NUL);
2642 prevchr_len = 1; /* last char was the ']' */
2643 if (*regparse != ']')
2644 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2645 skipchr(); /* let's be friends with the lexer again */
2646 *flagp |= HASWIDTH | SIMPLE;
2647 break;
2648 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002649 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002650 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002651 }
2652 /* FALLTHROUGH */
2653
2654 default:
2655 {
2656 int len;
2657
2658#ifdef FEAT_MBYTE
2659 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002660 * before a multi and when it's a composing char. */
2661 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002662 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002663do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002664 ret = regnode(MULTIBYTECODE);
2665 regmbc(c);
2666 *flagp |= HASWIDTH | SIMPLE;
2667 break;
2668 }
2669#endif
2670
2671 ret = regnode(EXACTLY);
2672
2673 /*
2674 * Append characters as long as:
2675 * - there is no following multi, we then need the character in
2676 * front of it as a single character operand
2677 * - not running into a Magic character
2678 * - "one_exactly" is not set
2679 * But always emit at least one character. Might be a Multi,
2680 * e.g., a "[" without matching "]".
2681 */
2682 for (len = 0; c != NUL && (len == 0
2683 || (re_multi_type(peekchr()) == NOT_MULTI
2684 && !one_exactly
2685 && !is_Magic(c))); ++len)
2686 {
2687 c = no_Magic(c);
2688#ifdef FEAT_MBYTE
2689 if (has_mbyte)
2690 {
2691 regmbc(c);
2692 if (enc_utf8)
2693 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002694 int l;
2695
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002696 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002697 for (;;)
2698 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002699 l = utf_ptr2len(regparse);
2700 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002701 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002702 regmbc(utf_ptr2char(regparse));
2703 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002704 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002705 }
2706 }
2707 else
2708#endif
2709 regc(c);
2710 c = getchr();
2711 }
2712 ungetchr();
2713
2714 regc(NUL);
2715 *flagp |= HASWIDTH;
2716 if (len == 1)
2717 *flagp |= SIMPLE;
2718 }
2719 break;
2720 }
2721
2722 return ret;
2723}
2724
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002725#ifdef FEAT_MBYTE
2726/*
2727 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2728 * character "c".
2729 */
2730 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002731use_multibytecode(int c)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002732{
2733 return has_mbyte && (*mb_char2len)(c) > 1
2734 && (re_multi_type(peekchr()) != NOT_MULTI
2735 || (enc_utf8 && utf_iscomposing(c)));
2736}
2737#endif
2738
Bram Moolenaar071d4272004-06-13 20:20:40 +00002739/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002740 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002741 * Return pointer to generated code.
2742 */
2743 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002744regnode(int op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002745{
2746 char_u *ret;
2747
2748 ret = regcode;
2749 if (ret == JUST_CALC_SIZE)
2750 regsize += 3;
2751 else
2752 {
2753 *regcode++ = op;
2754 *regcode++ = NUL; /* Null "next" pointer. */
2755 *regcode++ = NUL;
2756 }
2757 return ret;
2758}
2759
2760/*
2761 * Emit (if appropriate) a byte of code
2762 */
2763 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002764regc(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002765{
2766 if (regcode == JUST_CALC_SIZE)
2767 regsize++;
2768 else
2769 *regcode++ = b;
2770}
2771
2772#ifdef FEAT_MBYTE
2773/*
2774 * Emit (if appropriate) a multi-byte character of code
2775 */
2776 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002777regmbc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002778{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002779 if (!has_mbyte && c > 0xff)
2780 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002781 if (regcode == JUST_CALC_SIZE)
2782 regsize += (*mb_char2len)(c);
2783 else
2784 regcode += (*mb_char2bytes)(c, regcode);
2785}
2786#endif
2787
2788/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002789 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002790 *
2791 * Means relocating the operand.
2792 */
2793 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002794reginsert(int op, char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002795{
2796 char_u *src;
2797 char_u *dst;
2798 char_u *place;
2799
2800 if (regcode == JUST_CALC_SIZE)
2801 {
2802 regsize += 3;
2803 return;
2804 }
2805 src = regcode;
2806 regcode += 3;
2807 dst = regcode;
2808 while (src > opnd)
2809 *--dst = *--src;
2810
2811 place = opnd; /* Op node, where operand used to be. */
2812 *place++ = op;
2813 *place++ = NUL;
2814 *place = NUL;
2815}
2816
2817/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002818 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002819 * Add a number to the operator.
2820 */
2821 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002822reginsert_nr(int op, long val, char_u *opnd)
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002823{
2824 char_u *src;
2825 char_u *dst;
2826 char_u *place;
2827
2828 if (regcode == JUST_CALC_SIZE)
2829 {
2830 regsize += 7;
2831 return;
2832 }
2833 src = regcode;
2834 regcode += 7;
2835 dst = regcode;
2836 while (src > opnd)
2837 *--dst = *--src;
2838
2839 place = opnd; /* Op node, where operand used to be. */
2840 *place++ = op;
2841 *place++ = NUL;
2842 *place++ = NUL;
2843 place = re_put_long(place, (long_u)val);
2844}
2845
2846/*
2847 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002848 * The operator has the given limit values as operands. Also set next pointer.
2849 *
2850 * Means relocating the operand.
2851 */
2852 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002853reginsert_limits(
2854 int op,
2855 long minval,
2856 long maxval,
2857 char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002858{
2859 char_u *src;
2860 char_u *dst;
2861 char_u *place;
2862
2863 if (regcode == JUST_CALC_SIZE)
2864 {
2865 regsize += 11;
2866 return;
2867 }
2868 src = regcode;
2869 regcode += 11;
2870 dst = regcode;
2871 while (src > opnd)
2872 *--dst = *--src;
2873
2874 place = opnd; /* Op node, where operand used to be. */
2875 *place++ = op;
2876 *place++ = NUL;
2877 *place++ = NUL;
2878 place = re_put_long(place, (long_u)minval);
2879 place = re_put_long(place, (long_u)maxval);
2880 regtail(opnd, place);
2881}
2882
2883/*
2884 * Write a long as four bytes at "p" and return pointer to the next char.
2885 */
2886 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002887re_put_long(char_u *p, long_u val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002888{
2889 *p++ = (char_u) ((val >> 24) & 0377);
2890 *p++ = (char_u) ((val >> 16) & 0377);
2891 *p++ = (char_u) ((val >> 8) & 0377);
2892 *p++ = (char_u) (val & 0377);
2893 return p;
2894}
2895
2896/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002897 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002898 */
2899 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002900regtail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002901{
2902 char_u *scan;
2903 char_u *temp;
2904 int offset;
2905
2906 if (p == JUST_CALC_SIZE)
2907 return;
2908
2909 /* Find last node. */
2910 scan = p;
2911 for (;;)
2912 {
2913 temp = regnext(scan);
2914 if (temp == NULL)
2915 break;
2916 scan = temp;
2917 }
2918
Bram Moolenaar582fd852005-03-28 20:58:01 +00002919 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002920 offset = (int)(scan - val);
2921 else
2922 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002923 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002924 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002925 * values in too many places. */
2926 if (offset > 0xffff)
2927 reg_toolong = TRUE;
2928 else
2929 {
2930 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2931 *(scan + 2) = (char_u) (offset & 0377);
2932 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002933}
2934
2935/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002936 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002937 */
2938 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002939regoptail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002940{
2941 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2942 if (p == NULL || p == JUST_CALC_SIZE
2943 || (OP(p) != BRANCH
2944 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2945 return;
2946 regtail(OPERAND(p), val);
2947}
2948
2949/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002950 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002951 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002952/*
2953 * Start parsing at "str".
2954 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002955 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002956initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002957{
2958 regparse = str;
2959 prevchr_len = 0;
2960 curchr = prevprevchr = prevchr = nextchr = -1;
2961 at_start = TRUE;
2962 prev_at_start = FALSE;
2963}
2964
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002965/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002966 * Save the current parse state, so that it can be restored and parsing
2967 * starts in the same state again.
2968 */
2969 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002970save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002971{
2972 ps->regparse = regparse;
2973 ps->prevchr_len = prevchr_len;
2974 ps->curchr = curchr;
2975 ps->prevchr = prevchr;
2976 ps->prevprevchr = prevprevchr;
2977 ps->nextchr = nextchr;
2978 ps->at_start = at_start;
2979 ps->prev_at_start = prev_at_start;
2980 ps->regnpar = regnpar;
2981}
2982
2983/*
2984 * Restore a previously saved parse state.
2985 */
2986 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002987restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002988{
2989 regparse = ps->regparse;
2990 prevchr_len = ps->prevchr_len;
2991 curchr = ps->curchr;
2992 prevchr = ps->prevchr;
2993 prevprevchr = ps->prevprevchr;
2994 nextchr = ps->nextchr;
2995 at_start = ps->at_start;
2996 prev_at_start = ps->prev_at_start;
2997 regnpar = ps->regnpar;
2998}
2999
3000
3001/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003002 * Get the next character without advancing.
3003 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003004 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003005peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003006{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003007 static int after_slash = FALSE;
3008
Bram Moolenaar071d4272004-06-13 20:20:40 +00003009 if (curchr == -1)
3010 {
3011 switch (curchr = regparse[0])
3012 {
3013 case '.':
3014 case '[':
3015 case '~':
3016 /* magic when 'magic' is on */
3017 if (reg_magic >= MAGIC_ON)
3018 curchr = Magic(curchr);
3019 break;
3020 case '(':
3021 case ')':
3022 case '{':
3023 case '%':
3024 case '+':
3025 case '=':
3026 case '?':
3027 case '@':
3028 case '!':
3029 case '&':
3030 case '|':
3031 case '<':
3032 case '>':
3033 case '#': /* future ext. */
3034 case '"': /* future ext. */
3035 case '\'': /* future ext. */
3036 case ',': /* future ext. */
3037 case '-': /* future ext. */
3038 case ':': /* future ext. */
3039 case ';': /* future ext. */
3040 case '`': /* future ext. */
3041 case '/': /* Can't be used in / command */
3042 /* magic only after "\v" */
3043 if (reg_magic == MAGIC_ALL)
3044 curchr = Magic(curchr);
3045 break;
3046 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003047 /* * is not magic as the very first character, eg "?*ptr", when
3048 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3049 * "\(\*" is not magic, thus must be magic if "after_slash" */
3050 if (reg_magic >= MAGIC_ON
3051 && !at_start
3052 && !(prev_at_start && prevchr == Magic('^'))
3053 && (after_slash
3054 || (prevchr != Magic('(')
3055 && prevchr != Magic('&')
3056 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003057 curchr = Magic('*');
3058 break;
3059 case '^':
3060 /* '^' is only magic as the very first character and if it's after
3061 * "\(", "\|", "\&' or "\n" */
3062 if (reg_magic >= MAGIC_OFF
3063 && (at_start
3064 || reg_magic == MAGIC_ALL
3065 || prevchr == Magic('(')
3066 || prevchr == Magic('|')
3067 || prevchr == Magic('&')
3068 || prevchr == Magic('n')
3069 || (no_Magic(prevchr) == '('
3070 && prevprevchr == Magic('%'))))
3071 {
3072 curchr = Magic('^');
3073 at_start = TRUE;
3074 prev_at_start = FALSE;
3075 }
3076 break;
3077 case '$':
3078 /* '$' is only magic as the very last char and if it's in front of
3079 * either "\|", "\)", "\&", or "\n" */
3080 if (reg_magic >= MAGIC_OFF)
3081 {
3082 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003083 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003084
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003085 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003086 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003087 || p[1] == 'm' || p[1] == 'M'
3088 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3089 {
3090 if (p[1] == 'v')
3091 is_magic_all = TRUE;
3092 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3093 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003094 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003095 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003096 if (p[0] == NUL
3097 || (p[0] == '\\'
3098 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3099 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003100 || (is_magic_all
3101 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003102 || reg_magic == MAGIC_ALL)
3103 curchr = Magic('$');
3104 }
3105 break;
3106 case '\\':
3107 {
3108 int c = regparse[1];
3109
3110 if (c == NUL)
3111 curchr = '\\'; /* trailing '\' */
3112 else if (
3113#ifdef EBCDIC
3114 vim_strchr(META, c)
3115#else
3116 c <= '~' && META_flags[c]
3117#endif
3118 )
3119 {
3120 /*
3121 * META contains everything that may be magic sometimes,
3122 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02003123 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +00003124 * magicness. Therefore, \ is so meta-magic that it is
3125 * not in META.
3126 */
3127 curchr = -1;
3128 prev_at_start = at_start;
3129 at_start = FALSE; /* be able to say "/\*ptr" */
3130 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003131 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003132 peekchr();
3133 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003134 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003135 curchr = toggle_Magic(curchr);
3136 }
3137 else if (vim_strchr(REGEXP_ABBR, c))
3138 {
3139 /*
3140 * Handle abbreviations, like "\t" for TAB -- webb
3141 */
3142 curchr = backslash_trans(c);
3143 }
3144 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3145 curchr = toggle_Magic(c);
3146 else
3147 {
3148 /*
3149 * Next character can never be (made) magic?
3150 * Then backslashing it won't do anything.
3151 */
3152#ifdef FEAT_MBYTE
3153 if (has_mbyte)
3154 curchr = (*mb_ptr2char)(regparse + 1);
3155 else
3156#endif
3157 curchr = c;
3158 }
3159 break;
3160 }
3161
3162#ifdef FEAT_MBYTE
3163 default:
3164 if (has_mbyte)
3165 curchr = (*mb_ptr2char)(regparse);
3166#endif
3167 }
3168 }
3169
3170 return curchr;
3171}
3172
3173/*
3174 * Eat one lexed character. Do this in a way that we can undo it.
3175 */
3176 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003177skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003178{
3179 /* peekchr() eats a backslash, do the same here */
3180 if (*regparse == '\\')
3181 prevchr_len = 1;
3182 else
3183 prevchr_len = 0;
3184 if (regparse[prevchr_len] != NUL)
3185 {
3186#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003187 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003188 /* exclude composing chars that mb_ptr2len does include */
3189 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003190 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003191 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003192 else
3193#endif
3194 ++prevchr_len;
3195 }
3196 regparse += prevchr_len;
3197 prev_at_start = at_start;
3198 at_start = FALSE;
3199 prevprevchr = prevchr;
3200 prevchr = curchr;
3201 curchr = nextchr; /* use previously unget char, or -1 */
3202 nextchr = -1;
3203}
3204
3205/*
3206 * Skip a character while keeping the value of prev_at_start for at_start.
3207 * prevchr and prevprevchr are also kept.
3208 */
3209 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003210skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003211{
3212 int as = prev_at_start;
3213 int pr = prevchr;
3214 int prpr = prevprevchr;
3215
3216 skipchr();
3217 at_start = as;
3218 prevchr = pr;
3219 prevprevchr = prpr;
3220}
3221
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003222/*
3223 * Get the next character from the pattern. We know about magic and such, so
3224 * therefore we need a lexical analyzer.
3225 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003226 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003227getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003228{
3229 int chr = peekchr();
3230
3231 skipchr();
3232 return chr;
3233}
3234
3235/*
3236 * put character back. Works only once!
3237 */
3238 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003239ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003240{
3241 nextchr = curchr;
3242 curchr = prevchr;
3243 prevchr = prevprevchr;
3244 at_start = prev_at_start;
3245 prev_at_start = FALSE;
3246
3247 /* Backup regparse, so that it's at the same position as before the
3248 * getchr(). */
3249 regparse -= prevchr_len;
3250}
3251
3252/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003253 * Get and return the value of the hex string at the current position.
3254 * Return -1 if there is no valid hex number.
3255 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003256 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003257 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003258 * The parameter controls the maximum number of input characters. This will be
3259 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3260 */
3261 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003262gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003263{
3264 int nr = 0;
3265 int c;
3266 int i;
3267
3268 for (i = 0; i < maxinputlen; ++i)
3269 {
3270 c = regparse[0];
3271 if (!vim_isxdigit(c))
3272 break;
3273 nr <<= 4;
3274 nr |= hex2nr(c);
3275 ++regparse;
3276 }
3277
3278 if (i == 0)
3279 return -1;
3280 return nr;
3281}
3282
3283/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003284 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003285 * current position. Return -1 for invalid. Consumes all digits.
3286 */
3287 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003288getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003289{
3290 int nr = 0;
3291 int c;
3292 int i;
3293
3294 for (i = 0; ; ++i)
3295 {
3296 c = regparse[0];
3297 if (c < '0' || c > '9')
3298 break;
3299 nr *= 10;
3300 nr += c - '0';
3301 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003302 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003303 }
3304
3305 if (i == 0)
3306 return -1;
3307 return nr;
3308}
3309
3310/*
3311 * get and return the value of the octal string immediately after the current
3312 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3313 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3314 * treat 8 or 9 as recognised characters. Position is updated:
3315 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003316 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003317 */
3318 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003319getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003320{
3321 int nr = 0;
3322 int c;
3323 int i;
3324
3325 for (i = 0; i < 3 && nr < 040; ++i)
3326 {
3327 c = regparse[0];
3328 if (c < '0' || c > '7')
3329 break;
3330 nr <<= 3;
3331 nr |= hex2nr(c);
3332 ++regparse;
3333 }
3334
3335 if (i == 0)
3336 return -1;
3337 return nr;
3338}
3339
3340/*
3341 * Get a number after a backslash that is inside [].
3342 * When nothing is recognized return a backslash.
3343 */
3344 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003345coll_get_char(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003346{
3347 int nr = -1;
3348
3349 switch (*regparse++)
3350 {
3351 case 'd': nr = getdecchrs(); break;
3352 case 'o': nr = getoctchrs(); break;
3353 case 'x': nr = gethexchrs(2); break;
3354 case 'u': nr = gethexchrs(4); break;
3355 case 'U': nr = gethexchrs(8); break;
3356 }
3357 if (nr < 0)
3358 {
3359 /* If getting the number fails be backwards compatible: the character
3360 * is a backslash. */
3361 --regparse;
3362 nr = '\\';
3363 }
3364 return nr;
3365}
3366
3367/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003368 * read_limits - Read two integers to be taken as a minimum and maximum.
3369 * If the first character is '-', then the range is reversed.
3370 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3371 * missing, a very big number is the default.
3372 */
3373 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003374read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003375{
3376 int reverse = FALSE;
3377 char_u *first_char;
3378 long tmp;
3379
3380 if (*regparse == '-')
3381 {
3382 /* Starts with '-', so reverse the range later */
3383 regparse++;
3384 reverse = TRUE;
3385 }
3386 first_char = regparse;
3387 *minval = getdigits(&regparse);
3388 if (*regparse == ',') /* There is a comma */
3389 {
3390 if (vim_isdigit(*++regparse))
3391 *maxval = getdigits(&regparse);
3392 else
3393 *maxval = MAX_LIMIT;
3394 }
3395 else if (VIM_ISDIGIT(*first_char))
3396 *maxval = *minval; /* It was \{n} or \{-n} */
3397 else
3398 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3399 if (*regparse == '\\')
3400 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003401 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003402 {
3403 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3404 reg_magic == MAGIC_ALL ? "" : "\\");
3405 EMSG_RET_FAIL(IObuff);
3406 }
3407
3408 /*
3409 * Reverse the range if there was a '-', or make sure it is in the right
3410 * order otherwise.
3411 */
3412 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3413 {
3414 tmp = *minval;
3415 *minval = *maxval;
3416 *maxval = tmp;
3417 }
3418 skipchr(); /* let's be friends with the lexer again */
3419 return OK;
3420}
3421
3422/*
3423 * vim_regexec and friends
3424 */
3425
3426/*
3427 * Global work variables for vim_regexec().
3428 */
3429
3430/* The current match-position is remembered with these variables: */
3431static linenr_T reglnum; /* line number, relative to first line */
3432static char_u *regline; /* start of current line */
3433static char_u *reginput; /* current input, points into "regline" */
3434
3435static int need_clear_subexpr; /* subexpressions still need to be
3436 * cleared */
3437#ifdef FEAT_SYN_HL
3438static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3439 * still need to be cleared */
3440#endif
3441
Bram Moolenaar071d4272004-06-13 20:20:40 +00003442/*
3443 * Structure used to save the current input state, when it needs to be
3444 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003445 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003446 */
3447typedef struct
3448{
3449 union
3450 {
3451 char_u *ptr; /* reginput pointer, for single-line regexp */
3452 lpos_T pos; /* reginput pos, for multi-line regexp */
3453 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003454 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003455} regsave_T;
3456
3457/* struct to save start/end pointer/position in for \(\) */
3458typedef struct
3459{
3460 union
3461 {
3462 char_u *ptr;
3463 lpos_T pos;
3464 } se_u;
3465} save_se_T;
3466
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003467/* used for BEHIND and NOBEHIND matching */
3468typedef struct regbehind_S
3469{
3470 regsave_T save_after;
3471 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003472 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003473 save_se_T save_start[NSUBEXP];
3474 save_se_T save_end[NSUBEXP];
3475} regbehind_T;
3476
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003477static char_u *reg_getline(linenr_T lnum);
3478static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm);
3479static long regtry(bt_regprog_T *prog, colnr_T col);
3480static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003481#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003482static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003483#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003484static void save_subexpr(regbehind_T *bp);
3485static void restore_subexpr(regbehind_T *bp);
3486static void reg_nextline(void);
3487static void reg_save(regsave_T *save, garray_T *gap);
3488static void reg_restore(regsave_T *save, garray_T *gap);
3489static int reg_save_equal(regsave_T *save);
3490static void save_se_multi(save_se_T *savep, lpos_T *posp);
3491static void save_se_one(save_se_T *savep, char_u **pp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003492
3493/* Save the sub-expressions before attempting a match. */
3494#define save_se(savep, posp, pp) \
3495 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3496
3497/* After a failed match restore the sub-expressions. */
3498#define restore_se(savep, posp, pp) { \
3499 if (REG_MULTI) \
3500 *(posp) = (savep)->se_u.pos; \
3501 else \
3502 *(pp) = (savep)->se_u.ptr; }
3503
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003504static int re_num_cmp(long_u val, char_u *scan);
3505static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
3506static int regmatch(char_u *prog);
3507static int regrepeat(char_u *p, long maxcount);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003508
3509#ifdef DEBUG
3510int regnarrate = 0;
3511#endif
3512
3513/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003514 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3515 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003516 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003517 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003518static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003519static unsigned reg_tofreelen;
3520
3521/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02003522 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003523 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003524 * done:
3525 * single-line multi-line
3526 * reg_match &regmatch_T NULL
3527 * reg_mmatch NULL &regmmatch_T
3528 * reg_startp reg_match->startp <invalid>
3529 * reg_endp reg_match->endp <invalid>
3530 * reg_startpos <invalid> reg_mmatch->startpos
3531 * reg_endpos <invalid> reg_mmatch->endpos
3532 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003533 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003534 * reg_firstlnum <invalid> first line in which to search
3535 * reg_maxline 0 last line nr
3536 * reg_line_lbr FALSE or TRUE FALSE
3537 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003538typedef struct {
3539 regmatch_T *reg_match;
3540 regmmatch_T *reg_mmatch;
3541 char_u **reg_startp;
3542 char_u **reg_endp;
3543 lpos_T *reg_startpos;
3544 lpos_T *reg_endpos;
3545 win_T *reg_win;
3546 buf_T *reg_buf;
3547 linenr_T reg_firstlnum;
3548 linenr_T reg_maxline;
3549 int reg_line_lbr; /* "\n" in string is line break */
3550
3551 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3552 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3553 * contains '\c' or '\C' the value is overruled. */
3554 int reg_ic;
3555
3556#ifdef FEAT_MBYTE
3557 /* Similar to rex.reg_ic, but only for 'combining' characters. Set with \Z
3558 * flag in the regexp. Defaults to false, always. */
3559 int reg_icombine;
3560#endif
3561
3562 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3563 * there is no maximum. */
3564 colnr_T reg_maxcol;
3565} regexec_T;
3566
3567static regexec_T rex;
3568static int rex_in_use = FALSE;
3569
Bram Moolenaar071d4272004-06-13 20:20:40 +00003570
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003571/* Values for rs_state in regitem_T. */
3572typedef enum regstate_E
3573{
3574 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3575 , RS_MOPEN /* MOPEN + [0-9] */
3576 , RS_MCLOSE /* MCLOSE + [0-9] */
3577#ifdef FEAT_SYN_HL
3578 , RS_ZOPEN /* ZOPEN + [0-9] */
3579 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3580#endif
3581 , RS_BRANCH /* BRANCH */
3582 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3583 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3584 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3585 , RS_NOMATCH /* NOMATCH */
3586 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3587 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3588 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3589 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3590} regstate_T;
3591
3592/*
3593 * When there are alternatives a regstate_T is put on the regstack to remember
3594 * what we are doing.
3595 * Before it may be another type of item, depending on rs_state, to remember
3596 * more things.
3597 */
3598typedef struct regitem_S
3599{
3600 regstate_T rs_state; /* what we are doing, one of RS_ above */
3601 char_u *rs_scan; /* current node in program */
3602 union
3603 {
3604 save_se_T sesave;
3605 regsave_T regsave;
3606 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003607 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003608} regitem_T;
3609
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003610static regitem_T *regstack_push(regstate_T state, char_u *scan);
3611static void regstack_pop(char_u **scan);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003612
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003613/* used for STAR, PLUS and BRACE_SIMPLE matching */
3614typedef struct regstar_S
3615{
3616 int nextb; /* next byte */
3617 int nextb_ic; /* next byte reverse case */
3618 long count;
3619 long minval;
3620 long maxval;
3621} regstar_T;
3622
3623/* used to store input position when a BACK was encountered, so that we now if
3624 * we made any progress since the last time. */
3625typedef struct backpos_S
3626{
3627 char_u *bp_scan; /* "scan" where BACK was encountered */
3628 regsave_T bp_pos; /* last input position */
3629} backpos_T;
3630
3631/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003632 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3633 * to avoid invoking malloc() and free() often.
3634 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3635 * or regbehind_T.
3636 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003637 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003638static garray_T regstack = {0, 0, 0, 0, NULL};
3639static garray_T backpos = {0, 0, 0, 0, NULL};
3640
3641/*
3642 * Both for regstack and backpos tables we use the following strategy of
3643 * allocation (to reduce malloc/free calls):
3644 * - Initial size is fairly small.
3645 * - When needed, the tables are grown bigger (8 times at first, double after
3646 * that).
3647 * - After executing the match we free the memory only if the array has grown.
3648 * Thus the memory is kept allocated when it's at the initial size.
3649 * This makes it fast while not keeping a lot of memory allocated.
3650 * A three times speed increase was observed when using many simple patterns.
3651 */
3652#define REGSTACK_INITIAL 2048
3653#define BACKPOS_INITIAL 64
3654
3655#if defined(EXITFREE) || defined(PROTO)
3656 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003657free_regexp_stuff(void)
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003658{
3659 ga_clear(&regstack);
3660 ga_clear(&backpos);
3661 vim_free(reg_tofree);
3662 vim_free(reg_prev_sub);
3663}
3664#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003665
Bram Moolenaar071d4272004-06-13 20:20:40 +00003666/*
3667 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3668 */
3669 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01003670reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003671{
3672 /* when looking behind for a match/no-match lnum is negative. But we
3673 * can't go before line 1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003674 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003675 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003676 if (lnum > rex.reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003677 /* Must have matched the "\n" in the last line. */
3678 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02003679 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003680}
3681
3682static regsave_T behind_pos;
3683
3684#ifdef FEAT_SYN_HL
3685static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3686static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3687static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3688static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3689#endif
3690
3691/* TRUE if using multi-line regexp. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003692#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003693
Bram Moolenaar071d4272004-06-13 20:20:40 +00003694/*
3695 * Match a regexp against a string.
3696 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3697 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003698 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003699 *
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003700 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003701 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003702 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003703bt_regexec_nl(
3704 regmatch_T *rmp,
3705 char_u *line, /* string to match against */
3706 colnr_T col, /* column to start looking for match */
3707 int line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003708{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003709 rex.reg_match = rmp;
3710 rex.reg_mmatch = NULL;
3711 rex.reg_maxline = 0;
3712 rex.reg_line_lbr = line_lbr;
3713 rex.reg_buf = curbuf;
3714 rex.reg_win = NULL;
3715 rex.reg_ic = rmp->rm_ic;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003716#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003717 rex.reg_icombine = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003718#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003719 rex.reg_maxcol = 0;
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003720
3721 return bt_regexec_both(line, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003722}
3723
Bram Moolenaar071d4272004-06-13 20:20:40 +00003724/*
3725 * Match a regexp against multiple lines.
3726 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3727 * Uses curbuf for line count and 'iskeyword'.
3728 *
3729 * Return zero if there is no match. Return number of lines contained in the
3730 * match otherwise.
3731 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003732 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003733bt_regexec_multi(
3734 regmmatch_T *rmp,
3735 win_T *win, /* window in which to search or NULL */
3736 buf_T *buf, /* buffer in which to search */
3737 linenr_T lnum, /* nr of line to start looking for match */
3738 colnr_T col, /* column to start looking for match */
3739 proftime_T *tm) /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003740{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003741 rex.reg_match = NULL;
3742 rex.reg_mmatch = rmp;
3743 rex.reg_buf = buf;
3744 rex.reg_win = win;
3745 rex.reg_firstlnum = lnum;
3746 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
3747 rex.reg_line_lbr = FALSE;
3748 rex.reg_ic = rmp->rmm_ic;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003749#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003750 rex.reg_icombine = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003751#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003752 rex.reg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003753
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003754 return bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003755}
3756
3757/*
3758 * Match a regexp against a string ("line" points to the string) or multiple
3759 * lines ("line" is NULL, use reg_getline()).
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003760 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003761 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003762 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003763bt_regexec_both(
3764 char_u *line,
3765 colnr_T col, /* column to start looking for match */
3766 proftime_T *tm UNUSED) /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003767{
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003768 bt_regprog_T *prog;
3769 char_u *s;
3770 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003771
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003772 /* Create "regstack" and "backpos" if they are not allocated yet.
3773 * We allocate *_INITIAL amount of bytes first and then set the grow size
3774 * to much bigger value to avoid many malloc calls in case of deep regular
3775 * expressions. */
3776 if (regstack.ga_data == NULL)
3777 {
3778 /* Use an item size of 1 byte, since we push different things
3779 * onto the regstack. */
3780 ga_init2(&regstack, 1, REGSTACK_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003781 (void)ga_grow(&regstack, REGSTACK_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003782 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3783 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003784
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003785 if (backpos.ga_data == NULL)
3786 {
3787 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003788 (void)ga_grow(&backpos, BACKPOS_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003789 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3790 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003791
Bram Moolenaar071d4272004-06-13 20:20:40 +00003792 if (REG_MULTI)
3793 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003794 prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003795 line = reg_getline((linenr_T)0);
Bram Moolenaar6100d022016-10-02 16:51:57 +02003796 rex.reg_startpos = rex.reg_mmatch->startpos;
3797 rex.reg_endpos = rex.reg_mmatch->endpos;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003798 }
3799 else
3800 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003801 prog = (bt_regprog_T *)rex.reg_match->regprog;
3802 rex.reg_startp = rex.reg_match->startp;
3803 rex.reg_endp = rex.reg_match->endp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003804 }
3805
3806 /* Be paranoid... */
3807 if (prog == NULL || line == NULL)
3808 {
3809 EMSG(_(e_null));
3810 goto theend;
3811 }
3812
3813 /* Check validity of program. */
3814 if (prog_magic_wrong())
3815 goto theend;
3816
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003817 /* If the start column is past the maximum column: no need to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003818 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003819 goto theend;
3820
Bram Moolenaar6100d022016-10-02 16:51:57 +02003821 /* If pattern contains "\c" or "\C": overrule value of rex.reg_ic */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003822 if (prog->regflags & RF_ICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003823 rex.reg_ic = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003824 else if (prog->regflags & RF_NOICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003825 rex.reg_ic = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003826
3827#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003828 /* If pattern contains "\Z" overrule value of rex.reg_icombine */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003829 if (prog->regflags & RF_ICOMBINE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003830 rex.reg_icombine = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003831#endif
3832
3833 /* If there is a "must appear" string, look for it. */
3834 if (prog->regmust != NULL)
3835 {
3836 int c;
3837
3838#ifdef FEAT_MBYTE
3839 if (has_mbyte)
3840 c = (*mb_ptr2char)(prog->regmust);
3841 else
3842#endif
3843 c = *prog->regmust;
3844 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003845
3846 /*
3847 * This is used very often, esp. for ":global". Use three versions of
3848 * the loop to avoid overhead of conditions.
3849 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003850 if (!rex.reg_ic
Bram Moolenaar05159a02005-02-26 23:04:13 +00003851#ifdef FEAT_MBYTE
3852 && !has_mbyte
3853#endif
3854 )
3855 while ((s = vim_strbyte(s, c)) != NULL)
3856 {
3857 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3858 break; /* Found it. */
3859 ++s;
3860 }
3861#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003862 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar05159a02005-02-26 23:04:13 +00003863 while ((s = vim_strchr(s, c)) != NULL)
3864 {
3865 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3866 break; /* Found it. */
3867 mb_ptr_adv(s);
3868 }
3869#endif
3870 else
3871 while ((s = cstrchr(s, c)) != NULL)
3872 {
3873 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3874 break; /* Found it. */
3875 mb_ptr_adv(s);
3876 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003877 if (s == NULL) /* Not present. */
3878 goto theend;
3879 }
3880
3881 regline = line;
3882 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003883 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003884
3885 /* Simplest case: Anchored match need be tried only once. */
3886 if (prog->reganch)
3887 {
3888 int c;
3889
3890#ifdef FEAT_MBYTE
3891 if (has_mbyte)
3892 c = (*mb_ptr2char)(regline + col);
3893 else
3894#endif
3895 c = regline[col];
3896 if (prog->regstart == NUL
3897 || prog->regstart == c
Bram Moolenaar6100d022016-10-02 16:51:57 +02003898 || (rex.reg_ic && ((
Bram Moolenaar071d4272004-06-13 20:20:40 +00003899#ifdef FEAT_MBYTE
3900 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3901 || (c < 255 && prog->regstart < 255 &&
3902#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003903 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003904 retval = regtry(prog, col);
3905 else
3906 retval = 0;
3907 }
3908 else
3909 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003910#ifdef FEAT_RELTIME
3911 int tm_count = 0;
3912#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003913 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003914 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003915 {
3916 if (prog->regstart != NUL)
3917 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003918 /* Skip until the char we know it must start with.
3919 * Used often, do some work to avoid call overhead. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003920 if (!rex.reg_ic
Bram Moolenaar05159a02005-02-26 23:04:13 +00003921#ifdef FEAT_MBYTE
3922 && !has_mbyte
3923#endif
3924 )
3925 s = vim_strbyte(regline + col, prog->regstart);
3926 else
3927 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003928 if (s == NULL)
3929 {
3930 retval = 0;
3931 break;
3932 }
3933 col = (int)(s - regline);
3934 }
3935
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003936 /* Check for maximum column to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003937 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003938 {
3939 retval = 0;
3940 break;
3941 }
3942
Bram Moolenaar071d4272004-06-13 20:20:40 +00003943 retval = regtry(prog, col);
3944 if (retval > 0)
3945 break;
3946
3947 /* if not currently on the first line, get it again */
3948 if (reglnum != 0)
3949 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003950 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003951 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003952 }
3953 if (regline[col] == NUL)
3954 break;
3955#ifdef FEAT_MBYTE
3956 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003957 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003958 else
3959#endif
3960 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003961#ifdef FEAT_RELTIME
3962 /* Check for timeout once in a twenty times to avoid overhead. */
3963 if (tm != NULL && ++tm_count == 20)
3964 {
3965 tm_count = 0;
3966 if (profile_passed_limit(tm))
3967 break;
3968 }
3969#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003970 }
3971 }
3972
Bram Moolenaar071d4272004-06-13 20:20:40 +00003973theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003974 /* Free "reg_tofree" when it's a bit big.
3975 * Free regstack and backpos if they are bigger than their initial size. */
3976 if (reg_tofreelen > 400)
3977 {
3978 vim_free(reg_tofree);
3979 reg_tofree = NULL;
3980 }
3981 if (regstack.ga_maxlen > REGSTACK_INITIAL)
3982 ga_clear(&regstack);
3983 if (backpos.ga_maxlen > BACKPOS_INITIAL)
3984 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003985
Bram Moolenaar071d4272004-06-13 20:20:40 +00003986 return retval;
3987}
3988
3989#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003990static reg_extmatch_T *make_extmatch(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003991
3992/*
3993 * Create a new extmatch and mark it as referenced once.
3994 */
3995 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01003996make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003997{
3998 reg_extmatch_T *em;
3999
4000 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4001 if (em != NULL)
4002 em->refcnt = 1;
4003 return em;
4004}
4005
4006/*
4007 * Add a reference to an extmatch.
4008 */
4009 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01004010ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004011{
4012 if (em != NULL)
4013 em->refcnt++;
4014 return em;
4015}
4016
4017/*
4018 * Remove a reference to an extmatch. If there are no references left, free
4019 * the info.
4020 */
4021 void
Bram Moolenaar05540972016-01-30 20:31:25 +01004022unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004023{
4024 int i;
4025
4026 if (em != NULL && --em->refcnt <= 0)
4027 {
4028 for (i = 0; i < NSUBEXP; ++i)
4029 vim_free(em->matches[i]);
4030 vim_free(em);
4031 }
4032}
4033#endif
4034
4035/*
4036 * regtry - try match of "prog" with at regline["col"].
4037 * Returns 0 for failure, number of lines contained in the match otherwise.
4038 */
4039 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01004040regtry(bt_regprog_T *prog, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004041{
4042 reginput = regline + col;
4043 need_clear_subexpr = TRUE;
4044#ifdef FEAT_SYN_HL
4045 /* Clear the external match subpointers if necessary. */
4046 if (prog->reghasz == REX_SET)
4047 need_clear_zsubexpr = TRUE;
4048#endif
4049
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004050 if (regmatch(prog->program + 1) == 0)
4051 return 0;
4052
4053 cleanup_subexpr();
4054 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004055 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004056 if (rex.reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004057 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004058 rex.reg_startpos[0].lnum = 0;
4059 rex.reg_startpos[0].col = col;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004060 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004061 if (rex.reg_endpos[0].lnum < 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004062 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004063 rex.reg_endpos[0].lnum = reglnum;
4064 rex.reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004065 }
4066 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004067 /* Use line number of "\ze". */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004068 reglnum = rex.reg_endpos[0].lnum;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004069 }
4070 else
4071 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004072 if (rex.reg_startp[0] == NULL)
4073 rex.reg_startp[0] = regline + col;
4074 if (rex.reg_endp[0] == NULL)
4075 rex.reg_endp[0] = reginput;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004076 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004077#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004078 /* Package any found \z(...\) matches for export. Default is none. */
4079 unref_extmatch(re_extmatch_out);
4080 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004081
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004082 if (prog->reghasz == REX_SET)
4083 {
4084 int i;
4085
4086 cleanup_zsubexpr();
4087 re_extmatch_out = make_extmatch();
4088 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004089 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004090 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004091 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004092 /* Only accept single line matches. */
4093 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004094 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4095 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004096 re_extmatch_out->matches[i] =
4097 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004098 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004099 reg_endzpos[i].col - reg_startzpos[i].col);
4100 }
4101 else
4102 {
4103 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4104 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004105 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004106 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004107 }
4108 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004109 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004110#endif
4111 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004112}
4113
4114#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004115static int reg_prev_class(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004116
Bram Moolenaar071d4272004-06-13 20:20:40 +00004117/*
4118 * Get class of previous character.
4119 */
4120 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004121reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004122{
4123 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004124 return mb_get_class_buf(reginput - 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02004125 - (*mb_head_off)(regline, reginput - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004126 return -1;
4127}
Bram Moolenaar071d4272004-06-13 20:20:40 +00004128#endif
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004129
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004130static int reg_match_visual(void);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004131
4132/*
4133 * Return TRUE if the current reginput position matches the Visual area.
4134 */
4135 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004136reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004137{
4138 pos_T top, bot;
4139 linenr_T lnum;
4140 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004141 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004142 int mode;
4143 colnr_T start, end;
4144 colnr_T start2, end2;
4145 colnr_T cols;
4146
4147 /* Check if the buffer is the current buffer. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004148 if (rex.reg_buf != curbuf || VIsual.lnum == 0)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004149 return FALSE;
4150
4151 if (VIsual_active)
4152 {
4153 if (lt(VIsual, wp->w_cursor))
4154 {
4155 top = VIsual;
4156 bot = wp->w_cursor;
4157 }
4158 else
4159 {
4160 top = wp->w_cursor;
4161 bot = VIsual;
4162 }
4163 mode = VIsual_mode;
4164 }
4165 else
4166 {
4167 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
4168 {
4169 top = curbuf->b_visual.vi_start;
4170 bot = curbuf->b_visual.vi_end;
4171 }
4172 else
4173 {
4174 top = curbuf->b_visual.vi_end;
4175 bot = curbuf->b_visual.vi_start;
4176 }
4177 mode = curbuf->b_visual.vi_mode;
4178 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004179 lnum = reglnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004180 if (lnum < top.lnum || lnum > bot.lnum)
4181 return FALSE;
4182
4183 if (mode == 'v')
4184 {
4185 col = (colnr_T)(reginput - regline);
4186 if ((lnum == top.lnum && col < top.col)
4187 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4188 return FALSE;
4189 }
4190 else if (mode == Ctrl_V)
4191 {
4192 getvvcol(wp, &top, &start, NULL, &end);
4193 getvvcol(wp, &bot, &start2, NULL, &end2);
4194 if (start2 < start)
4195 start = start2;
4196 if (end2 > end)
4197 end = end2;
4198 if (top.col == MAXCOL || bot.col == MAXCOL)
4199 end = MAXCOL;
4200 cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline));
4201 if (cols < start || cols > end - (*p_sel == 'e'))
4202 return FALSE;
4203 }
4204 return TRUE;
4205}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004206
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00004207#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004208
4209/*
4210 * The arguments from BRACE_LIMITS are stored here. They are actually local
4211 * to regmatch(), but they are here to reduce the amount of stack space used
4212 * (it can be called recursively many times).
4213 */
4214static long bl_minval;
4215static long bl_maxval;
4216
4217/*
4218 * regmatch - main matching routine
4219 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004220 * Conceptually the strategy is simple: Check to see whether the current node
4221 * matches, push an item onto the regstack and loop to see whether the rest
4222 * matches, and then act accordingly. In practice we make some effort to
4223 * avoid using the regstack, in particular by going through "ordinary" nodes
4224 * (that don't need to know whether the rest of the match failed) by a nested
4225 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004226 *
4227 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4228 * the last matched character.
4229 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4230 * undefined state!
4231 */
4232 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004233regmatch(
4234 char_u *scan) /* Current node. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004235{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004236 char_u *next; /* Next node. */
4237 int op;
4238 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004239 regitem_T *rp;
4240 int no;
4241 int status; /* one of the RA_ values: */
4242#define RA_FAIL 1 /* something failed, abort */
4243#define RA_CONT 2 /* continue in inner loop */
4244#define RA_BREAK 3 /* break inner loop */
4245#define RA_MATCH 4 /* successful match */
4246#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004247
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004248 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004249 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004250 regstack.ga_len = 0;
4251 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004252
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004253 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004254 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004255 */
4256 for (;;)
4257 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004258 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4259 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004260 fast_breakcheck();
4261
4262#ifdef DEBUG
4263 if (scan != NULL && regnarrate)
4264 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004265 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004266 mch_errmsg("(\n");
4267 }
4268#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004269
4270 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004271 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004272 * regstack.
4273 */
4274 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004275 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004276 if (got_int || scan == NULL)
4277 {
4278 status = RA_FAIL;
4279 break;
4280 }
4281 status = RA_CONT;
4282
Bram Moolenaar071d4272004-06-13 20:20:40 +00004283#ifdef DEBUG
4284 if (regnarrate)
4285 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004286 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004287 mch_errmsg("...\n");
4288# ifdef FEAT_SYN_HL
4289 if (re_extmatch_in != NULL)
4290 {
4291 int i;
4292
4293 mch_errmsg(_("External submatches:\n"));
4294 for (i = 0; i < NSUBEXP; i++)
4295 {
4296 mch_errmsg(" \"");
4297 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004298 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004299 mch_errmsg("\"\n");
4300 }
4301 }
4302# endif
4303 }
4304#endif
4305 next = regnext(scan);
4306
4307 op = OP(scan);
4308 /* Check for character class with NL added. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004309 if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
4310 && *reginput == NUL && reglnum <= rex.reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004311 {
4312 reg_nextline();
4313 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004314 else if (rex.reg_line_lbr && WITH_NL(op) && *reginput == '\n')
Bram Moolenaar071d4272004-06-13 20:20:40 +00004315 {
4316 ADVANCE_REGINPUT();
4317 }
4318 else
4319 {
4320 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004321 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004322#ifdef FEAT_MBYTE
4323 if (has_mbyte)
4324 c = (*mb_ptr2char)(reginput);
4325 else
4326#endif
4327 c = *reginput;
4328 switch (op)
4329 {
4330 case BOL:
4331 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004332 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004333 break;
4334
4335 case EOL:
4336 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004337 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004338 break;
4339
4340 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004341 /* We're not at the beginning of the file when below the first
4342 * line where we started, not at the start of the line or we
4343 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004344 if (reglnum != 0 || reginput != regline
Bram Moolenaar6100d022016-10-02 16:51:57 +02004345 || (REG_MULTI && rex.reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004346 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004347 break;
4348
4349 case RE_EOF:
Bram Moolenaar6100d022016-10-02 16:51:57 +02004350 if (reglnum != rex.reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004351 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004352 break;
4353
4354 case CURSOR:
4355 /* Check if the buffer is in a window and compare the
Bram Moolenaar6100d022016-10-02 16:51:57 +02004356 * rex.reg_win->w_cursor position to the match position. */
4357 if (rex.reg_win == NULL
4358 || (reglnum + rex.reg_firstlnum
4359 != rex.reg_win->w_cursor.lnum)
4360 || ((colnr_T)(reginput - regline)
4361 != rex.reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004362 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004363 break;
4364
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004365 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004366 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004367 {
4368 int mark = OPERAND(scan)[0];
4369 int cmp = OPERAND(scan)[1];
4370 pos_T *pos;
4371
Bram Moolenaar6100d022016-10-02 16:51:57 +02004372 pos = getmark_buf(rex.reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004373 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004374 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004375 || (pos->lnum == reglnum + rex.reg_firstlnum
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004376 ? (pos->col == (colnr_T)(reginput - regline)
4377 ? (cmp == '<' || cmp == '>')
4378 : (pos->col < (colnr_T)(reginput - regline)
4379 ? cmp != '>'
4380 : cmp != '<'))
Bram Moolenaar6100d022016-10-02 16:51:57 +02004381 : (pos->lnum < reglnum + rex.reg_firstlnum
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004382 ? cmp != '>'
4383 : cmp != '<')))
4384 status = RA_NOMATCH;
4385 }
4386 break;
4387
4388 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004389 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004390 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004391 break;
4392
Bram Moolenaar071d4272004-06-13 20:20:40 +00004393 case RE_LNUM:
Bram Moolenaar6100d022016-10-02 16:51:57 +02004394 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + rex.reg_firstlnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00004395 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004396 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004397 break;
4398
4399 case RE_COL:
4400 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004401 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004402 break;
4403
4404 case RE_VCOL:
4405 if (!re_num_cmp((long_u)win_linetabsize(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004406 rex.reg_win == NULL ? curwin : rex.reg_win,
Bram Moolenaar071d4272004-06-13 20:20:40 +00004407 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004408 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004409 break;
4410
4411 case BOW: /* \<word; reginput points to w */
4412 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004413 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004414#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004415 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004416 {
4417 int this_class;
4418
4419 /* Get class of current and previous char (if it exists). */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004420 this_class = mb_get_class_buf(reginput, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004421 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004422 status = RA_NOMATCH; /* not on a word at all */
4423 else if (reg_prev_class() == this_class)
4424 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004425 }
4426#endif
4427 else
4428 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004429 if (!vim_iswordc_buf(c, rex.reg_buf) || (reginput > regline
4430 && vim_iswordc_buf(reginput[-1], rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004431 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004432 }
4433 break;
4434
4435 case EOW: /* word\>; reginput points after d */
4436 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004437 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004438#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004439 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004440 {
4441 int this_class, prev_class;
4442
4443 /* Get class of current and previous char (if it exists). */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004444 this_class = mb_get_class_buf(reginput, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004445 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004446 if (this_class == prev_class
4447 || prev_class == 0 || prev_class == 1)
4448 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004449 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004450#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004451 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004452 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004453 if (!vim_iswordc_buf(reginput[-1], rex.reg_buf)
4454 || (reginput[0] != NUL
4455 && vim_iswordc_buf(c, rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004456 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004457 }
4458 break; /* Matched with EOW */
4459
4460 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004461 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004462 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004463 status = RA_NOMATCH;
4464 else
4465 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004466 break;
4467
4468 case IDENT:
4469 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004470 status = RA_NOMATCH;
4471 else
4472 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004473 break;
4474
4475 case SIDENT:
4476 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004477 status = RA_NOMATCH;
4478 else
4479 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004480 break;
4481
4482 case KWORD:
Bram Moolenaar6100d022016-10-02 16:51:57 +02004483 if (!vim_iswordp_buf(reginput, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004484 status = RA_NOMATCH;
4485 else
4486 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004487 break;
4488
4489 case SKWORD:
Bram Moolenaar6100d022016-10-02 16:51:57 +02004490 if (VIM_ISDIGIT(*reginput)
4491 || !vim_iswordp_buf(reginput, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004492 status = RA_NOMATCH;
4493 else
4494 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004495 break;
4496
4497 case FNAME:
4498 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004499 status = RA_NOMATCH;
4500 else
4501 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004502 break;
4503
4504 case SFNAME:
4505 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004506 status = RA_NOMATCH;
4507 else
4508 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004509 break;
4510
4511 case PRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004512 if (!vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004513 status = RA_NOMATCH;
4514 else
4515 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004516 break;
4517
4518 case SPRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004519 if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004520 status = RA_NOMATCH;
4521 else
4522 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004523 break;
4524
4525 case WHITE:
4526 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004527 status = RA_NOMATCH;
4528 else
4529 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004530 break;
4531
4532 case NWHITE:
4533 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004534 status = RA_NOMATCH;
4535 else
4536 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004537 break;
4538
4539 case DIGIT:
4540 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004541 status = RA_NOMATCH;
4542 else
4543 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004544 break;
4545
4546 case NDIGIT:
4547 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004548 status = RA_NOMATCH;
4549 else
4550 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004551 break;
4552
4553 case HEX:
4554 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004555 status = RA_NOMATCH;
4556 else
4557 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004558 break;
4559
4560 case NHEX:
4561 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004562 status = RA_NOMATCH;
4563 else
4564 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004565 break;
4566
4567 case OCTAL:
4568 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004569 status = RA_NOMATCH;
4570 else
4571 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004572 break;
4573
4574 case NOCTAL:
4575 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004576 status = RA_NOMATCH;
4577 else
4578 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004579 break;
4580
4581 case WORD:
4582 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004583 status = RA_NOMATCH;
4584 else
4585 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004586 break;
4587
4588 case NWORD:
4589 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004590 status = RA_NOMATCH;
4591 else
4592 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004593 break;
4594
4595 case HEAD:
4596 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004597 status = RA_NOMATCH;
4598 else
4599 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004600 break;
4601
4602 case NHEAD:
4603 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004604 status = RA_NOMATCH;
4605 else
4606 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004607 break;
4608
4609 case ALPHA:
4610 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004611 status = RA_NOMATCH;
4612 else
4613 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004614 break;
4615
4616 case NALPHA:
4617 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004618 status = RA_NOMATCH;
4619 else
4620 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004621 break;
4622
4623 case LOWER:
4624 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004625 status = RA_NOMATCH;
4626 else
4627 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004628 break;
4629
4630 case NLOWER:
4631 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004632 status = RA_NOMATCH;
4633 else
4634 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004635 break;
4636
4637 case UPPER:
4638 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004639 status = RA_NOMATCH;
4640 else
4641 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004642 break;
4643
4644 case NUPPER:
4645 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004646 status = RA_NOMATCH;
4647 else
4648 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004649 break;
4650
4651 case EXACTLY:
4652 {
4653 int len;
4654 char_u *opnd;
4655
4656 opnd = OPERAND(scan);
4657 /* Inline the first byte, for speed. */
4658 if (*opnd != *reginput
Bram Moolenaar6100d022016-10-02 16:51:57 +02004659 && (!rex.reg_ic || (
Bram Moolenaar071d4272004-06-13 20:20:40 +00004660#ifdef FEAT_MBYTE
4661 !enc_utf8 &&
4662#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004663 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004664 status = RA_NOMATCH;
4665 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004666 {
4667 /* match empty string always works; happens when "~" is
4668 * empty. */
4669 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004670 else
4671 {
4672 if (opnd[1] == NUL
Bram Moolenaar071d4272004-06-13 20:20:40 +00004673#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02004674 && !(enc_utf8 && rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004675#endif
4676 )
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004677 {
4678 len = 1; /* matched a single byte above */
4679 }
4680 else
4681 {
4682 /* Need to match first byte again for multi-byte. */
4683 len = (int)STRLEN(opnd);
4684 if (cstrncmp(opnd, reginput, &len) != 0)
4685 status = RA_NOMATCH;
4686 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004687#ifdef FEAT_MBYTE
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004688 /* Check for following composing character, unless %C
4689 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004690 if (status != RA_NOMATCH
4691 && enc_utf8
4692 && UTF_COMPOSINGLIKE(reginput, reginput + len)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004693 && !rex.reg_icombine
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004694 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004695 {
4696 /* raaron: This code makes a composing character get
4697 * ignored, which is the correct behavior (sometimes)
4698 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004699 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004700 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004701#endif
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004702 if (status != RA_NOMATCH)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004703 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004704 }
4705 }
4706 break;
4707
4708 case ANYOF:
4709 case ANYBUT:
4710 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004711 status = RA_NOMATCH;
4712 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4713 status = RA_NOMATCH;
4714 else
4715 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004716 break;
4717
4718#ifdef FEAT_MBYTE
4719 case MULTIBYTECODE:
4720 if (has_mbyte)
4721 {
4722 int i, len;
4723 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004724 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004725
4726 opnd = OPERAND(scan);
4727 /* Safety check (just in case 'encoding' was changed since
4728 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004729 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004730 {
4731 status = RA_NOMATCH;
4732 break;
4733 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004734 if (enc_utf8)
4735 opndc = mb_ptr2char(opnd);
4736 if (enc_utf8 && utf_iscomposing(opndc))
4737 {
4738 /* When only a composing char is given match at any
4739 * position where that composing char appears. */
4740 status = RA_NOMATCH;
Bram Moolenaar0e462412015-03-31 14:17:31 +02004741 for (i = 0; reginput[i] != NUL;
4742 i += utf_ptr2len(reginput + i))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004743 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004744 inpc = mb_ptr2char(reginput + i);
4745 if (!utf_iscomposing(inpc))
4746 {
4747 if (i > 0)
4748 break;
4749 }
4750 else if (opndc == inpc)
4751 {
4752 /* Include all following composing chars. */
4753 len = i + mb_ptr2len(reginput + i);
4754 status = RA_MATCH;
4755 break;
4756 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004757 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004758 }
4759 else
4760 for (i = 0; i < len; ++i)
4761 if (opnd[i] != reginput[i])
4762 {
4763 status = RA_NOMATCH;
4764 break;
4765 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004766 reginput += len;
4767 }
4768 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004769 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004770 break;
4771#endif
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004772 case RE_COMPOSING:
4773#ifdef FEAT_MBYTE
4774 if (enc_utf8)
4775 {
4776 /* Skip composing characters. */
4777 while (utf_iscomposing(utf_ptr2char(reginput)))
4778 mb_cptr_adv(reginput);
4779 }
4780#endif
4781 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004782
4783 case NOTHING:
4784 break;
4785
4786 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004787 {
4788 int i;
4789 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004790
Bram Moolenaar582fd852005-03-28 20:58:01 +00004791 /*
4792 * When we run into BACK we need to check if we don't keep
4793 * looping without matching any input. The second and later
4794 * times a BACK is encountered it fails if the input is still
4795 * at the same position as the previous time.
4796 * The positions are stored in "backpos" and found by the
4797 * current value of "scan", the position in the RE program.
4798 */
4799 bp = (backpos_T *)backpos.ga_data;
4800 for (i = 0; i < backpos.ga_len; ++i)
4801 if (bp[i].bp_scan == scan)
4802 break;
4803 if (i == backpos.ga_len)
4804 {
4805 /* First time at this BACK, make room to store the pos. */
4806 if (ga_grow(&backpos, 1) == FAIL)
4807 status = RA_FAIL;
4808 else
4809 {
4810 /* get "ga_data" again, it may have changed */
4811 bp = (backpos_T *)backpos.ga_data;
4812 bp[i].bp_scan = scan;
4813 ++backpos.ga_len;
4814 }
4815 }
4816 else if (reg_save_equal(&bp[i].bp_pos))
4817 /* Still at same position as last time, fail. */
4818 status = RA_NOMATCH;
4819
4820 if (status != RA_FAIL && status != RA_NOMATCH)
4821 reg_save(&bp[i].bp_pos, &backpos);
4822 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004823 break;
4824
Bram Moolenaar071d4272004-06-13 20:20:40 +00004825 case MOPEN + 0: /* Match start: \zs */
4826 case MOPEN + 1: /* \( */
4827 case MOPEN + 2:
4828 case MOPEN + 3:
4829 case MOPEN + 4:
4830 case MOPEN + 5:
4831 case MOPEN + 6:
4832 case MOPEN + 7:
4833 case MOPEN + 8:
4834 case MOPEN + 9:
4835 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004836 no = op - MOPEN;
4837 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004838 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004839 if (rp == NULL)
4840 status = RA_FAIL;
4841 else
4842 {
4843 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004844 save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
4845 &rex.reg_startp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004846 /* We simply continue and handle the result when done. */
4847 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004848 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004849 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004850
4851 case NOPEN: /* \%( */
4852 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004853 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004854 status = RA_FAIL;
4855 /* We simply continue and handle the result when done. */
4856 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004857
4858#ifdef FEAT_SYN_HL
4859 case ZOPEN + 1:
4860 case ZOPEN + 2:
4861 case ZOPEN + 3:
4862 case ZOPEN + 4:
4863 case ZOPEN + 5:
4864 case ZOPEN + 6:
4865 case ZOPEN + 7:
4866 case ZOPEN + 8:
4867 case ZOPEN + 9:
4868 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004869 no = op - ZOPEN;
4870 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004871 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004872 if (rp == NULL)
4873 status = RA_FAIL;
4874 else
4875 {
4876 rp->rs_no = no;
4877 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4878 &reg_startzp[no]);
4879 /* We simply continue and handle the result when done. */
4880 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004881 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004882 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004883#endif
4884
4885 case MCLOSE + 0: /* Match end: \ze */
4886 case MCLOSE + 1: /* \) */
4887 case MCLOSE + 2:
4888 case MCLOSE + 3:
4889 case MCLOSE + 4:
4890 case MCLOSE + 5:
4891 case MCLOSE + 6:
4892 case MCLOSE + 7:
4893 case MCLOSE + 8:
4894 case MCLOSE + 9:
4895 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004896 no = op - MCLOSE;
4897 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004898 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004899 if (rp == NULL)
4900 status = RA_FAIL;
4901 else
4902 {
4903 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004904 save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
4905 &rex.reg_endp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004906 /* We simply continue and handle the result when done. */
4907 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004908 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004909 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004910
4911#ifdef FEAT_SYN_HL
4912 case ZCLOSE + 1: /* \) after \z( */
4913 case ZCLOSE + 2:
4914 case ZCLOSE + 3:
4915 case ZCLOSE + 4:
4916 case ZCLOSE + 5:
4917 case ZCLOSE + 6:
4918 case ZCLOSE + 7:
4919 case ZCLOSE + 8:
4920 case ZCLOSE + 9:
4921 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004922 no = op - ZCLOSE;
4923 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004924 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004925 if (rp == NULL)
4926 status = RA_FAIL;
4927 else
4928 {
4929 rp->rs_no = no;
4930 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4931 &reg_endzp[no]);
4932 /* We simply continue and handle the result when done. */
4933 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004934 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004935 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004936#endif
4937
4938 case BACKREF + 1:
4939 case BACKREF + 2:
4940 case BACKREF + 3:
4941 case BACKREF + 4:
4942 case BACKREF + 5:
4943 case BACKREF + 6:
4944 case BACKREF + 7:
4945 case BACKREF + 8:
4946 case BACKREF + 9:
4947 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004948 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004949
4950 no = op - BACKREF;
4951 cleanup_subexpr();
4952 if (!REG_MULTI) /* Single-line regexp */
4953 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004954 if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004955 {
4956 /* Backref was not set: Match an empty string. */
4957 len = 0;
4958 }
4959 else
4960 {
4961 /* Compare current input with back-ref in the same
4962 * line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004963 len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
4964 if (cstrncmp(rex.reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004965 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004966 }
4967 }
4968 else /* Multi-line regexp */
4969 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004970 if (rex.reg_startpos[no].lnum < 0
4971 || rex.reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004972 {
4973 /* Backref was not set: Match an empty string. */
4974 len = 0;
4975 }
4976 else
4977 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004978 if (rex.reg_startpos[no].lnum == reglnum
4979 && rex.reg_endpos[no].lnum == reglnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004980 {
4981 /* Compare back-ref within the current line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004982 len = rex.reg_endpos[no].col
4983 - rex.reg_startpos[no].col;
4984 if (cstrncmp(regline + rex.reg_startpos[no].col,
Bram Moolenaar071d4272004-06-13 20:20:40 +00004985 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004986 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004987 }
4988 else
4989 {
4990 /* Messy situation: Need to compare between two
4991 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02004992 int r = match_with_backref(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004993 rex.reg_startpos[no].lnum,
4994 rex.reg_startpos[no].col,
4995 rex.reg_endpos[no].lnum,
4996 rex.reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02004997 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02004998
4999 if (r != RA_MATCH)
5000 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005001 }
5002 }
5003 }
5004
5005 /* Matched the backref, skip over it. */
5006 reginput += len;
5007 }
5008 break;
5009
5010#ifdef FEAT_SYN_HL
5011 case ZREF + 1:
5012 case ZREF + 2:
5013 case ZREF + 3:
5014 case ZREF + 4:
5015 case ZREF + 5:
5016 case ZREF + 6:
5017 case ZREF + 7:
5018 case ZREF + 8:
5019 case ZREF + 9:
5020 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005021 int len;
5022
5023 cleanup_zsubexpr();
5024 no = op - ZREF;
5025 if (re_extmatch_in != NULL
5026 && re_extmatch_in->matches[no] != NULL)
5027 {
5028 len = (int)STRLEN(re_extmatch_in->matches[no]);
5029 if (cstrncmp(re_extmatch_in->matches[no],
5030 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005031 status = RA_NOMATCH;
5032 else
5033 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005034 }
5035 else
5036 {
5037 /* Backref was not set: Match an empty string. */
5038 }
5039 }
5040 break;
5041#endif
5042
5043 case BRANCH:
5044 {
5045 if (OP(next) != BRANCH) /* No choice. */
5046 next = OPERAND(scan); /* Avoid recursion. */
5047 else
5048 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005049 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005050 if (rp == NULL)
5051 status = RA_FAIL;
5052 else
5053 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005054 }
5055 }
5056 break;
5057
5058 case BRACE_LIMITS:
5059 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005060 if (OP(next) == BRACE_SIMPLE)
5061 {
5062 bl_minval = OPERAND_MIN(scan);
5063 bl_maxval = OPERAND_MAX(scan);
5064 }
5065 else if (OP(next) >= BRACE_COMPLEX
5066 && OP(next) < BRACE_COMPLEX + 10)
5067 {
5068 no = OP(next) - BRACE_COMPLEX;
5069 brace_min[no] = OPERAND_MIN(scan);
5070 brace_max[no] = OPERAND_MAX(scan);
5071 brace_count[no] = 0;
5072 }
5073 else
5074 {
Bram Moolenaar95f09602016-11-10 20:01:45 +01005075 internal_error("BRACE_LIMITS");
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005076 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005077 }
5078 }
5079 break;
5080
5081 case BRACE_COMPLEX + 0:
5082 case BRACE_COMPLEX + 1:
5083 case BRACE_COMPLEX + 2:
5084 case BRACE_COMPLEX + 3:
5085 case BRACE_COMPLEX + 4:
5086 case BRACE_COMPLEX + 5:
5087 case BRACE_COMPLEX + 6:
5088 case BRACE_COMPLEX + 7:
5089 case BRACE_COMPLEX + 8:
5090 case BRACE_COMPLEX + 9:
5091 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005092 no = op - BRACE_COMPLEX;
5093 ++brace_count[no];
5094
5095 /* If not matched enough times yet, try one more */
5096 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005097 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005098 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005099 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005100 if (rp == NULL)
5101 status = RA_FAIL;
5102 else
5103 {
5104 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005105 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005106 next = OPERAND(scan);
5107 /* We continue and handle the result when done. */
5108 }
5109 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005110 }
5111
5112 /* If matched enough times, may try matching some more */
5113 if (brace_min[no] <= brace_max[no])
5114 {
5115 /* Range is the normal way around, use longest match */
5116 if (brace_count[no] <= brace_max[no])
5117 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005118 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005119 if (rp == NULL)
5120 status = RA_FAIL;
5121 else
5122 {
5123 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005124 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005125 next = OPERAND(scan);
5126 /* We continue and handle the result when done. */
5127 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005128 }
5129 }
5130 else
5131 {
5132 /* Range is backwards, use shortest match first */
5133 if (brace_count[no] <= brace_min[no])
5134 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005135 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005136 if (rp == NULL)
5137 status = RA_FAIL;
5138 else
5139 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005140 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005141 /* We continue and handle the result when done. */
5142 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005143 }
5144 }
5145 }
5146 break;
5147
5148 case BRACE_SIMPLE:
5149 case STAR:
5150 case PLUS:
5151 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005152 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005153
5154 /*
5155 * Lookahead to avoid useless match attempts when we know
5156 * what character comes next.
5157 */
5158 if (OP(next) == EXACTLY)
5159 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005160 rst.nextb = *OPERAND(next);
Bram Moolenaar6100d022016-10-02 16:51:57 +02005161 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005162 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005163 if (MB_ISUPPER(rst.nextb))
5164 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005165 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005166 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005167 }
5168 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005169 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005170 }
5171 else
5172 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005173 rst.nextb = NUL;
5174 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005175 }
5176 if (op != BRACE_SIMPLE)
5177 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005178 rst.minval = (op == STAR) ? 0 : 1;
5179 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005180 }
5181 else
5182 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005183 rst.minval = bl_minval;
5184 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005185 }
5186
5187 /*
5188 * When maxval > minval, try matching as much as possible, up
5189 * to maxval. When maxval < minval, try matching at least the
5190 * minimal number (since the range is backwards, that's also
5191 * maxval!).
5192 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005193 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005194 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005195 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005196 status = RA_FAIL;
5197 break;
5198 }
5199 if (rst.minval <= rst.maxval
5200 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5201 {
5202 /* It could match. Prepare for trying to match what
5203 * follows. The code is below. Parameters are stored in
5204 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005205 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005206 {
5207 EMSG(_(e_maxmempat));
5208 status = RA_FAIL;
5209 }
5210 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005211 status = RA_FAIL;
5212 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005213 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005214 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005215 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005216 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005217 if (rp == NULL)
5218 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005219 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005220 {
5221 *(((regstar_T *)rp) - 1) = rst;
5222 status = RA_BREAK; /* skip the restore bits */
5223 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005224 }
5225 }
5226 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005227 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005228
Bram Moolenaar071d4272004-06-13 20:20:40 +00005229 }
5230 break;
5231
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005232 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005233 case MATCH:
5234 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005235 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005236 if (rp == NULL)
5237 status = RA_FAIL;
5238 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005239 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005240 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005241 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005242 next = OPERAND(scan);
5243 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005244 }
5245 break;
5246
5247 case BEHIND:
5248 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005249 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005250 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005251 {
5252 EMSG(_(e_maxmempat));
5253 status = RA_FAIL;
5254 }
5255 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005256 status = RA_FAIL;
5257 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005258 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005259 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005260 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005261 if (rp == NULL)
5262 status = RA_FAIL;
5263 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005264 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005265 /* Need to save the subexpr to be able to restore them
5266 * when there is a match but we don't use it. */
5267 save_subexpr(((regbehind_T *)rp) - 1);
5268
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005269 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005270 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005271 /* First try if what follows matches. If it does then we
5272 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005273 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005274 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005275 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005276
5277 case BHPOS:
5278 if (REG_MULTI)
5279 {
5280 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5281 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005282 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005283 }
5284 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005285 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005286 break;
5287
5288 case NEWL:
Bram Moolenaar6100d022016-10-02 16:51:57 +02005289 if ((c != NUL || !REG_MULTI || reglnum > rex.reg_maxline
5290 || rex.reg_line_lbr)
5291 && (c != '\n' || !rex.reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005292 status = RA_NOMATCH;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005293 else if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005294 ADVANCE_REGINPUT();
5295 else
5296 reg_nextline();
5297 break;
5298
5299 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005300 status = RA_MATCH; /* Success! */
5301 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005302
5303 default:
5304 EMSG(_(e_re_corr));
5305#ifdef DEBUG
5306 printf("Illegal op code %d\n", op);
5307#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005308 status = RA_FAIL;
5309 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005310 }
5311 }
5312
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005313 /* If we can't continue sequentially, break the inner loop. */
5314 if (status != RA_CONT)
5315 break;
5316
5317 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005318 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005319
5320 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005321
5322 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005323 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005324 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005325 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005326 while (regstack.ga_len > 0 && status != RA_FAIL)
5327 {
5328 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5329 switch (rp->rs_state)
5330 {
5331 case RS_NOPEN:
5332 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005333 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005334 break;
5335
5336 case RS_MOPEN:
5337 /* Pop the state. Restore pointers when there is no match. */
5338 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005339 restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
5340 &rex.reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005341 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005342 break;
5343
5344#ifdef FEAT_SYN_HL
5345 case RS_ZOPEN:
5346 /* Pop the state. Restore pointers when there is no match. */
5347 if (status == RA_NOMATCH)
5348 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5349 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005350 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005351 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005352#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005353
5354 case RS_MCLOSE:
5355 /* Pop the state. Restore pointers when there is no match. */
5356 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005357 restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
5358 &rex.reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005359 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005360 break;
5361
5362#ifdef FEAT_SYN_HL
5363 case RS_ZCLOSE:
5364 /* Pop the state. Restore pointers when there is no match. */
5365 if (status == RA_NOMATCH)
5366 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5367 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005368 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005369 break;
5370#endif
5371
5372 case RS_BRANCH:
5373 if (status == RA_MATCH)
5374 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005375 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005376 else
5377 {
5378 if (status != RA_BREAK)
5379 {
5380 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005381 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005382 scan = rp->rs_scan;
5383 }
5384 if (scan == NULL || OP(scan) != BRANCH)
5385 {
5386 /* no more branches, didn't find a match */
5387 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005388 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005389 }
5390 else
5391 {
5392 /* Prepare to try a branch. */
5393 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005394 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005395 scan = OPERAND(scan);
5396 }
5397 }
5398 break;
5399
5400 case RS_BRCPLX_MORE:
5401 /* Pop the state. Restore pointers when there is no match. */
5402 if (status == RA_NOMATCH)
5403 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005404 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005405 --brace_count[rp->rs_no]; /* decrement match count */
5406 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005407 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005408 break;
5409
5410 case RS_BRCPLX_LONG:
5411 /* Pop the state. Restore pointers when there is no match. */
5412 if (status == RA_NOMATCH)
5413 {
5414 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005415 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005416 --brace_count[rp->rs_no];
5417 /* continue with the items after "\{}" */
5418 status = RA_CONT;
5419 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005420 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005421 if (status == RA_CONT)
5422 scan = regnext(scan);
5423 break;
5424
5425 case RS_BRCPLX_SHORT:
5426 /* Pop the state. Restore pointers when there is no match. */
5427 if (status == RA_NOMATCH)
5428 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005429 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005430 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005431 if (status == RA_NOMATCH)
5432 {
5433 scan = OPERAND(scan);
5434 status = RA_CONT;
5435 }
5436 break;
5437
5438 case RS_NOMATCH:
5439 /* Pop the state. If the operand matches for NOMATCH or
5440 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5441 * except for SUBPAT, and continue with the next item. */
5442 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5443 status = RA_NOMATCH;
5444 else
5445 {
5446 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005447 if (rp->rs_no != SUBPAT) /* zero-width */
5448 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005449 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005450 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005451 if (status == RA_CONT)
5452 scan = regnext(scan);
5453 break;
5454
5455 case RS_BEHIND1:
5456 if (status == RA_NOMATCH)
5457 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005458 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005459 regstack.ga_len -= sizeof(regbehind_T);
5460 }
5461 else
5462 {
5463 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5464 * the behind part does (not) match before the current
5465 * position in the input. This must be done at every
5466 * position in the input and checking if the match ends at
5467 * the current position. */
5468
5469 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005470 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005471
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005472 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005473 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005474 * result, hitting the start of the line or the previous
5475 * line (for multi-line matching).
5476 * Set behind_pos to where the match should end, BHPOS
5477 * will match it. Save the current value. */
5478 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5479 behind_pos = rp->rs_un.regsave;
5480
5481 rp->rs_state = RS_BEHIND2;
5482
Bram Moolenaar582fd852005-03-28 20:58:01 +00005483 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005484 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005485 }
5486 break;
5487
5488 case RS_BEHIND2:
5489 /*
5490 * Looping for BEHIND / NOBEHIND match.
5491 */
5492 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5493 {
5494 /* found a match that ends where "next" started */
5495 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5496 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005497 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5498 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005499 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005500 {
5501 /* But we didn't want a match. Need to restore the
5502 * subexpr, because what follows matched, so they have
5503 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005504 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005505 restore_subexpr(((regbehind_T *)rp) - 1);
5506 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005507 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005508 regstack.ga_len -= sizeof(regbehind_T);
5509 }
5510 else
5511 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005512 long limit;
5513
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005514 /* No match or a match that doesn't end where we want it: Go
5515 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005516 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005517 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005518 if (REG_MULTI)
5519 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005520 if (limit > 0
5521 && ((rp->rs_un.regsave.rs_u.pos.lnum
5522 < behind_pos.rs_u.pos.lnum
5523 ? (colnr_T)STRLEN(regline)
5524 : behind_pos.rs_u.pos.col)
5525 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5526 no = FAIL;
5527 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005528 {
5529 if (rp->rs_un.regsave.rs_u.pos.lnum
5530 < behind_pos.rs_u.pos.lnum
5531 || reg_getline(
5532 --rp->rs_un.regsave.rs_u.pos.lnum)
5533 == NULL)
5534 no = FAIL;
5535 else
5536 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005537 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005538 rp->rs_un.regsave.rs_u.pos.col =
5539 (colnr_T)STRLEN(regline);
5540 }
5541 }
5542 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005543 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005544#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005545 if (has_mbyte)
5546 rp->rs_un.regsave.rs_u.pos.col -=
5547 (*mb_head_off)(regline, regline
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005548 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005549 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005550#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005551 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005552 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005553 }
5554 else
5555 {
5556 if (rp->rs_un.regsave.rs_u.ptr == regline)
5557 no = FAIL;
5558 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005559 {
5560 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
5561 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5562 - rp->rs_un.regsave.rs_u.ptr) > limit)
5563 no = FAIL;
5564 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005565 }
5566 if (no == OK)
5567 {
5568 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005569 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005570 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005571 if (status == RA_MATCH)
5572 {
5573 /* We did match, so subexpr may have been changed,
5574 * need to restore them for the next try. */
5575 status = RA_NOMATCH;
5576 restore_subexpr(((regbehind_T *)rp) - 1);
5577 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005578 }
5579 else
5580 {
5581 /* Can't advance. For NOBEHIND that's a match. */
5582 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5583 if (rp->rs_no == NOBEHIND)
5584 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005585 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5586 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005587 status = RA_MATCH;
5588 }
5589 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005590 {
5591 /* We do want a proper match. Need to restore the
5592 * subexpr if we had a match, because they may have
5593 * been set. */
5594 if (status == RA_MATCH)
5595 {
5596 status = RA_NOMATCH;
5597 restore_subexpr(((regbehind_T *)rp) - 1);
5598 }
5599 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005600 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005601 regstack.ga_len -= sizeof(regbehind_T);
5602 }
5603 }
5604 break;
5605
5606 case RS_STAR_LONG:
5607 case RS_STAR_SHORT:
5608 {
5609 regstar_T *rst = ((regstar_T *)rp) - 1;
5610
5611 if (status == RA_MATCH)
5612 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005613 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005614 regstack.ga_len -= sizeof(regstar_T);
5615 break;
5616 }
5617
5618 /* Tried once already, restore input pointers. */
5619 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005620 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005621
5622 /* Repeat until we found a position where it could match. */
5623 for (;;)
5624 {
5625 if (status != RA_BREAK)
5626 {
5627 /* Tried first position already, advance. */
5628 if (rp->rs_state == RS_STAR_LONG)
5629 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005630 /* Trying for longest match, but couldn't or
5631 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005632 if (--rst->count < rst->minval)
5633 break;
5634 if (reginput == regline)
5635 {
5636 /* backup to last char of previous line */
5637 --reglnum;
5638 regline = reg_getline(reglnum);
5639 /* Just in case regrepeat() didn't count
5640 * right. */
5641 if (regline == NULL)
5642 break;
5643 reginput = regline + STRLEN(regline);
5644 fast_breakcheck();
5645 }
5646 else
5647 mb_ptr_back(regline, reginput);
5648 }
5649 else
5650 {
5651 /* Range is backwards, use shortest match first.
5652 * Careful: maxval and minval are exchanged!
5653 * Couldn't or didn't match: try advancing one
5654 * char. */
5655 if (rst->count == rst->minval
5656 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5657 break;
5658 ++rst->count;
5659 }
5660 if (got_int)
5661 break;
5662 }
5663 else
5664 status = RA_NOMATCH;
5665
5666 /* If it could match, try it. */
5667 if (rst->nextb == NUL || *reginput == rst->nextb
5668 || *reginput == rst->nextb_ic)
5669 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005670 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005671 scan = regnext(rp->rs_scan);
5672 status = RA_CONT;
5673 break;
5674 }
5675 }
5676 if (status != RA_CONT)
5677 {
5678 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005679 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005680 regstack.ga_len -= sizeof(regstar_T);
5681 status = RA_NOMATCH;
5682 }
5683 }
5684 break;
5685 }
5686
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005687 /* If we want to continue the inner loop or didn't pop a state
5688 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005689 if (status == RA_CONT || rp == (regitem_T *)
5690 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5691 break;
5692 }
5693
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005694 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005695 if (status == RA_CONT)
5696 continue;
5697
5698 /*
5699 * If the regstack is empty or something failed we are done.
5700 */
5701 if (regstack.ga_len == 0 || status == RA_FAIL)
5702 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005703 if (scan == NULL)
5704 {
5705 /*
5706 * We get here only if there's trouble -- normally "case END" is
5707 * the terminating point.
5708 */
5709 EMSG(_(e_re_corr));
5710#ifdef DEBUG
5711 printf("Premature EOL\n");
5712#endif
5713 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005714 if (status == RA_FAIL)
5715 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005716 return (status == RA_MATCH);
5717 }
5718
5719 } /* End of loop until the regstack is empty. */
5720
5721 /* NOTREACHED */
5722}
5723
5724/*
5725 * Push an item onto the regstack.
5726 * Returns pointer to new item. Returns NULL when out of memory.
5727 */
5728 static regitem_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01005729regstack_push(regstate_T state, char_u *scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005730{
5731 regitem_T *rp;
5732
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005733 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005734 {
5735 EMSG(_(e_maxmempat));
5736 return NULL;
5737 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005738 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005739 return NULL;
5740
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005741 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005742 rp->rs_state = state;
5743 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005744
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005745 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005746 return rp;
5747}
5748
5749/*
5750 * Pop an item from the regstack.
5751 */
5752 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01005753regstack_pop(char_u **scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005754{
5755 regitem_T *rp;
5756
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005757 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005758 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005759
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005760 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005761}
5762
Bram Moolenaar071d4272004-06-13 20:20:40 +00005763/*
5764 * regrepeat - repeatedly match something simple, return how many.
5765 * Advances reginput (and reglnum) to just after the matched chars.
5766 */
5767 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01005768regrepeat(
5769 char_u *p,
5770 long maxcount) /* maximum number of matches allowed */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005771{
5772 long count = 0;
5773 char_u *scan;
5774 char_u *opnd;
5775 int mask;
5776 int testval = 0;
5777
5778 scan = reginput; /* Make local copy of reginput for speed. */
5779 opnd = OPERAND(p);
5780 switch (OP(p))
5781 {
5782 case ANY:
5783 case ANY + ADD_NL:
5784 while (count < maxcount)
5785 {
5786 /* Matching anything means we continue until end-of-line (or
5787 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5788 while (*scan != NUL && count < maxcount)
5789 {
5790 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005791 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005792 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005793 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5794 || rex.reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005795 break;
5796 ++count; /* count the line-break */
5797 reg_nextline();
5798 scan = reginput;
5799 if (got_int)
5800 break;
5801 }
5802 break;
5803
5804 case IDENT:
5805 case IDENT + ADD_NL:
5806 testval = TRUE;
5807 /*FALLTHROUGH*/
5808 case SIDENT:
5809 case SIDENT + ADD_NL:
5810 while (count < maxcount)
5811 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005812 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005813 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005814 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005815 }
5816 else if (*scan == NUL)
5817 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005818 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5819 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005820 break;
5821 reg_nextline();
5822 scan = reginput;
5823 if (got_int)
5824 break;
5825 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005826 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005827 ++scan;
5828 else
5829 break;
5830 ++count;
5831 }
5832 break;
5833
5834 case KWORD:
5835 case KWORD + ADD_NL:
5836 testval = TRUE;
5837 /*FALLTHROUGH*/
5838 case SKWORD:
5839 case SKWORD + ADD_NL:
5840 while (count < maxcount)
5841 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005842 if (vim_iswordp_buf(scan, rex.reg_buf)
Bram Moolenaarf813a182013-01-30 13:59:37 +01005843 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005844 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005845 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005846 }
5847 else if (*scan == NUL)
5848 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005849 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5850 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005851 break;
5852 reg_nextline();
5853 scan = reginput;
5854 if (got_int)
5855 break;
5856 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005857 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005858 ++scan;
5859 else
5860 break;
5861 ++count;
5862 }
5863 break;
5864
5865 case FNAME:
5866 case FNAME + ADD_NL:
5867 testval = TRUE;
5868 /*FALLTHROUGH*/
5869 case SFNAME:
5870 case SFNAME + ADD_NL:
5871 while (count < maxcount)
5872 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005873 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005874 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005875 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005876 }
5877 else if (*scan == NUL)
5878 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005879 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5880 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005881 break;
5882 reg_nextline();
5883 scan = reginput;
5884 if (got_int)
5885 break;
5886 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005887 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005888 ++scan;
5889 else
5890 break;
5891 ++count;
5892 }
5893 break;
5894
5895 case PRINT:
5896 case PRINT + ADD_NL:
5897 testval = TRUE;
5898 /*FALLTHROUGH*/
5899 case SPRINT:
5900 case SPRINT + ADD_NL:
5901 while (count < maxcount)
5902 {
5903 if (*scan == NUL)
5904 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005905 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5906 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005907 break;
5908 reg_nextline();
5909 scan = reginput;
5910 if (got_int)
5911 break;
5912 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005913 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5914 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005915 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005916 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005917 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005918 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005919 ++scan;
5920 else
5921 break;
5922 ++count;
5923 }
5924 break;
5925
5926 case WHITE:
5927 case WHITE + ADD_NL:
5928 testval = mask = RI_WHITE;
5929do_class:
5930 while (count < maxcount)
5931 {
5932#ifdef FEAT_MBYTE
5933 int l;
5934#endif
5935 if (*scan == NUL)
5936 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005937 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5938 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005939 break;
5940 reg_nextline();
5941 scan = reginput;
5942 if (got_int)
5943 break;
5944 }
5945#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005946 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005947 {
5948 if (testval != 0)
5949 break;
5950 scan += l;
5951 }
5952#endif
5953 else if ((class_tab[*scan] & mask) == testval)
5954 ++scan;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005955 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005956 ++scan;
5957 else
5958 break;
5959 ++count;
5960 }
5961 break;
5962
5963 case NWHITE:
5964 case NWHITE + ADD_NL:
5965 mask = RI_WHITE;
5966 goto do_class;
5967 case DIGIT:
5968 case DIGIT + ADD_NL:
5969 testval = mask = RI_DIGIT;
5970 goto do_class;
5971 case NDIGIT:
5972 case NDIGIT + ADD_NL:
5973 mask = RI_DIGIT;
5974 goto do_class;
5975 case HEX:
5976 case HEX + ADD_NL:
5977 testval = mask = RI_HEX;
5978 goto do_class;
5979 case NHEX:
5980 case NHEX + ADD_NL:
5981 mask = RI_HEX;
5982 goto do_class;
5983 case OCTAL:
5984 case OCTAL + ADD_NL:
5985 testval = mask = RI_OCTAL;
5986 goto do_class;
5987 case NOCTAL:
5988 case NOCTAL + ADD_NL:
5989 mask = RI_OCTAL;
5990 goto do_class;
5991 case WORD:
5992 case WORD + ADD_NL:
5993 testval = mask = RI_WORD;
5994 goto do_class;
5995 case NWORD:
5996 case NWORD + ADD_NL:
5997 mask = RI_WORD;
5998 goto do_class;
5999 case HEAD:
6000 case HEAD + ADD_NL:
6001 testval = mask = RI_HEAD;
6002 goto do_class;
6003 case NHEAD:
6004 case NHEAD + ADD_NL:
6005 mask = RI_HEAD;
6006 goto do_class;
6007 case ALPHA:
6008 case ALPHA + ADD_NL:
6009 testval = mask = RI_ALPHA;
6010 goto do_class;
6011 case NALPHA:
6012 case NALPHA + ADD_NL:
6013 mask = RI_ALPHA;
6014 goto do_class;
6015 case LOWER:
6016 case LOWER + ADD_NL:
6017 testval = mask = RI_LOWER;
6018 goto do_class;
6019 case NLOWER:
6020 case NLOWER + ADD_NL:
6021 mask = RI_LOWER;
6022 goto do_class;
6023 case UPPER:
6024 case UPPER + ADD_NL:
6025 testval = mask = RI_UPPER;
6026 goto do_class;
6027 case NUPPER:
6028 case NUPPER + ADD_NL:
6029 mask = RI_UPPER;
6030 goto do_class;
6031
6032 case EXACTLY:
6033 {
6034 int cu, cl;
6035
6036 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006037 * would have been used for it. It does handle single-byte
6038 * characters, such as latin1. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006039 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006040 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006041 cu = MB_TOUPPER(*opnd);
6042 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006043 while (count < maxcount && (*scan == cu || *scan == cl))
6044 {
6045 count++;
6046 scan++;
6047 }
6048 }
6049 else
6050 {
6051 cu = *opnd;
6052 while (count < maxcount && *scan == cu)
6053 {
6054 count++;
6055 scan++;
6056 }
6057 }
6058 break;
6059 }
6060
6061#ifdef FEAT_MBYTE
6062 case MULTIBYTECODE:
6063 {
6064 int i, len, cf = 0;
6065
6066 /* Safety check (just in case 'encoding' was changed since
6067 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006068 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006069 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006070 if (rex.reg_ic && enc_utf8)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006071 cf = utf_fold(utf_ptr2char(opnd));
Bram Moolenaar069dd082015-05-04 09:56:49 +02006072 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006073 {
6074 for (i = 0; i < len; ++i)
6075 if (opnd[i] != scan[i])
6076 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006077 if (i < len && (!rex.reg_ic || !enc_utf8
Bram Moolenaar071d4272004-06-13 20:20:40 +00006078 || utf_fold(utf_ptr2char(scan)) != cf))
6079 break;
6080 scan += len;
6081 ++count;
6082 }
6083 }
6084 }
6085 break;
6086#endif
6087
6088 case ANYOF:
6089 case ANYOF + ADD_NL:
6090 testval = TRUE;
6091 /*FALLTHROUGH*/
6092
6093 case ANYBUT:
6094 case ANYBUT + ADD_NL:
6095 while (count < maxcount)
6096 {
6097#ifdef FEAT_MBYTE
6098 int len;
6099#endif
6100 if (*scan == NUL)
6101 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006102 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
6103 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006104 break;
6105 reg_nextline();
6106 scan = reginput;
6107 if (got_int)
6108 break;
6109 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02006110 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006111 ++scan;
6112#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006113 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006114 {
6115 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6116 break;
6117 scan += len;
6118 }
6119#endif
6120 else
6121 {
6122 if ((cstrchr(opnd, *scan) == NULL) == testval)
6123 break;
6124 ++scan;
6125 }
6126 ++count;
6127 }
6128 break;
6129
6130 case NEWL:
6131 while (count < maxcount
Bram Moolenaar6100d022016-10-02 16:51:57 +02006132 && ((*scan == NUL && reglnum <= rex.reg_maxline
6133 && !rex.reg_line_lbr && REG_MULTI)
6134 || (*scan == '\n' && rex.reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006135 {
6136 count++;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006137 if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006138 ADVANCE_REGINPUT();
6139 else
6140 reg_nextline();
6141 scan = reginput;
6142 if (got_int)
6143 break;
6144 }
6145 break;
6146
6147 default: /* Oh dear. Called inappropriately. */
6148 EMSG(_(e_re_corr));
6149#ifdef DEBUG
6150 printf("Called regrepeat with op code %d\n", OP(p));
6151#endif
6152 break;
6153 }
6154
6155 reginput = scan;
6156
6157 return (int)count;
6158}
6159
6160/*
6161 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006162 * Returns NULL when calculating size, when there is no next item and when
6163 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006164 */
6165 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006166regnext(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006167{
6168 int offset;
6169
Bram Moolenaard3005802009-11-25 17:21:32 +00006170 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006171 return NULL;
6172
6173 offset = NEXT(p);
6174 if (offset == 0)
6175 return NULL;
6176
Bram Moolenaar582fd852005-03-28 20:58:01 +00006177 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006178 return p - offset;
6179 else
6180 return p + offset;
6181}
6182
6183/*
6184 * Check the regexp program for its magic number.
6185 * Return TRUE if it's wrong.
6186 */
6187 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006188prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006189{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006190 regprog_T *prog;
6191
Bram Moolenaar6100d022016-10-02 16:51:57 +02006192 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006193 if (prog->engine == &nfa_regengine)
6194 /* For NFA matcher we don't check the magic */
6195 return FALSE;
6196
6197 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006198 {
6199 EMSG(_(e_re_corr));
6200 return TRUE;
6201 }
6202 return FALSE;
6203}
6204
6205/*
6206 * Cleanup the subexpressions, if this wasn't done yet.
6207 * This construction is used to clear the subexpressions only when they are
6208 * used (to increase speed).
6209 */
6210 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006211cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006212{
6213 if (need_clear_subexpr)
6214 {
6215 if (REG_MULTI)
6216 {
6217 /* Use 0xff to set lnum to -1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006218 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6219 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006220 }
6221 else
6222 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006223 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6224 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006225 }
6226 need_clear_subexpr = FALSE;
6227 }
6228}
6229
6230#ifdef FEAT_SYN_HL
6231 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006232cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006233{
6234 if (need_clear_zsubexpr)
6235 {
6236 if (REG_MULTI)
6237 {
6238 /* Use 0xff to set lnum to -1 */
6239 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6240 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6241 }
6242 else
6243 {
6244 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6245 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6246 }
6247 need_clear_zsubexpr = FALSE;
6248 }
6249}
6250#endif
6251
6252/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006253 * Save the current subexpr to "bp", so that they can be restored
6254 * later by restore_subexpr().
6255 */
6256 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006257save_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006258{
6259 int i;
6260
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006261 /* When "need_clear_subexpr" is set we don't need to save the values, only
6262 * remember that this flag needs to be set again when restoring. */
6263 bp->save_need_clear_subexpr = need_clear_subexpr;
6264 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006265 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006266 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006267 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006268 if (REG_MULTI)
6269 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006270 bp->save_start[i].se_u.pos = rex.reg_startpos[i];
6271 bp->save_end[i].se_u.pos = rex.reg_endpos[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006272 }
6273 else
6274 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006275 bp->save_start[i].se_u.ptr = rex.reg_startp[i];
6276 bp->save_end[i].se_u.ptr = rex.reg_endp[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006277 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006278 }
6279 }
6280}
6281
6282/*
6283 * Restore the subexpr from "bp".
6284 */
6285 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006286restore_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006287{
6288 int i;
6289
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006290 /* Only need to restore saved values when they are not to be cleared. */
6291 need_clear_subexpr = bp->save_need_clear_subexpr;
6292 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006293 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006294 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006295 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006296 if (REG_MULTI)
6297 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006298 rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
6299 rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006300 }
6301 else
6302 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006303 rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
6304 rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006305 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006306 }
6307 }
6308}
6309
6310/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006311 * Advance reglnum, regline and reginput to the next line.
6312 */
6313 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006314reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006315{
6316 regline = reg_getline(++reglnum);
6317 reginput = regline;
6318 fast_breakcheck();
6319}
6320
6321/*
6322 * Save the input line and position in a regsave_T.
6323 */
6324 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006325reg_save(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006326{
6327 if (REG_MULTI)
6328 {
6329 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6330 save->rs_u.pos.lnum = reglnum;
6331 }
6332 else
6333 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006334 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006335}
6336
6337/*
6338 * Restore the input line and position from a regsave_T.
6339 */
6340 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006341reg_restore(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006342{
6343 if (REG_MULTI)
6344 {
6345 if (reglnum != save->rs_u.pos.lnum)
6346 {
6347 /* only call reg_getline() when the line number changed to save
6348 * a bit of time */
6349 reglnum = save->rs_u.pos.lnum;
6350 regline = reg_getline(reglnum);
6351 }
6352 reginput = regline + save->rs_u.pos.col;
6353 }
6354 else
6355 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006356 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006357}
6358
6359/*
6360 * Return TRUE if current position is equal to saved position.
6361 */
6362 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006363reg_save_equal(regsave_T *save)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006364{
6365 if (REG_MULTI)
6366 return reglnum == save->rs_u.pos.lnum
6367 && reginput == regline + save->rs_u.pos.col;
6368 return reginput == save->rs_u.ptr;
6369}
6370
6371/*
6372 * Tentatively set the sub-expression start to the current position (after
6373 * calling regmatch() they will have changed). Need to save the existing
6374 * values for when there is no match.
6375 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6376 * depending on REG_MULTI.
6377 */
6378 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006379save_se_multi(save_se_T *savep, lpos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006380{
6381 savep->se_u.pos = *posp;
6382 posp->lnum = reglnum;
6383 posp->col = (colnr_T)(reginput - regline);
6384}
6385
6386 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006387save_se_one(save_se_T *savep, char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006388{
6389 savep->se_u.ptr = *pp;
6390 *pp = reginput;
6391}
6392
6393/*
6394 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6395 */
6396 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006397re_num_cmp(long_u val, char_u *scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006398{
6399 long_u n = OPERAND_MIN(scan);
6400
6401 if (OPERAND_CMP(scan) == '>')
6402 return val > n;
6403 if (OPERAND_CMP(scan) == '<')
6404 return val < n;
6405 return val == n;
6406}
6407
Bram Moolenaar580abea2013-06-14 20:31:28 +02006408/*
6409 * Check whether a backreference matches.
6410 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006411 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6412 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006413 */
6414 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006415match_with_backref(
6416 linenr_T start_lnum,
6417 colnr_T start_col,
6418 linenr_T end_lnum,
6419 colnr_T end_col,
6420 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006421{
6422 linenr_T clnum = start_lnum;
6423 colnr_T ccol = start_col;
6424 int len;
6425 char_u *p;
6426
6427 if (bytelen != NULL)
6428 *bytelen = 0;
6429 for (;;)
6430 {
6431 /* Since getting one line may invalidate the other, need to make copy.
6432 * Slow! */
6433 if (regline != reg_tofree)
6434 {
6435 len = (int)STRLEN(regline);
6436 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6437 {
6438 len += 50; /* get some extra */
6439 vim_free(reg_tofree);
6440 reg_tofree = alloc(len);
6441 if (reg_tofree == NULL)
6442 return RA_FAIL; /* out of memory!*/
6443 reg_tofreelen = len;
6444 }
6445 STRCPY(reg_tofree, regline);
6446 reginput = reg_tofree + (reginput - regline);
6447 regline = reg_tofree;
6448 }
6449
6450 /* Get the line to compare with. */
6451 p = reg_getline(clnum);
6452 if (clnum == end_lnum)
6453 len = end_col - ccol;
6454 else
6455 len = (int)STRLEN(p + ccol);
6456
6457 if (cstrncmp(p + ccol, reginput, &len) != 0)
6458 return RA_NOMATCH; /* doesn't match */
6459 if (bytelen != NULL)
6460 *bytelen += len;
6461 if (clnum == end_lnum)
6462 break; /* match and at end! */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006463 if (reglnum >= rex.reg_maxline)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006464 return RA_NOMATCH; /* text too short */
6465
6466 /* Advance to next line. */
6467 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006468 if (bytelen != NULL)
6469 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006470 ++clnum;
6471 ccol = 0;
6472 if (got_int)
6473 return RA_FAIL;
6474 }
6475
6476 /* found a match! Note that regline may now point to a copy of the line,
6477 * that should not matter. */
6478 return RA_MATCH;
6479}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006480
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006481#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006482
6483/*
6484 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6485 */
6486 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006487regdump(char_u *pattern, bt_regprog_T *r)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006488{
6489 char_u *s;
6490 int op = EXACTLY; /* Arbitrary non-END op. */
6491 char_u *next;
6492 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006493 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006494
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006495#ifdef BT_REGEXP_LOG
6496 f = fopen("bt_regexp_log.log", "a");
6497#else
6498 f = stdout;
6499#endif
6500 if (f == NULL)
6501 return;
6502 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006503
6504 s = r->program + 1;
6505 /*
6506 * Loop until we find the END that isn't before a referred next (an END
6507 * can also appear in a NOMATCH operand).
6508 */
6509 while (op != END || s <= end)
6510 {
6511 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006512 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006513 next = regnext(s);
6514 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006515 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006516 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006517 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006518 if (end < next)
6519 end = next;
6520 if (op == BRACE_LIMITS)
6521 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006522 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006523 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006524 s += 8;
6525 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006526 else if (op == BEHIND || op == NOBEHIND)
6527 {
6528 /* one int */
6529 fprintf(f, " count %ld", OPERAND_MIN(s));
6530 s += 4;
6531 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006532 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6533 {
6534 /* one int plus comperator */
6535 fprintf(f, " count %ld", OPERAND_MIN(s));
6536 s += 5;
6537 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006538 s += 3;
6539 if (op == ANYOF || op == ANYOF + ADD_NL
6540 || op == ANYBUT || op == ANYBUT + ADD_NL
6541 || op == EXACTLY)
6542 {
6543 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006544 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006545 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006546 fprintf(f, "%c", *s++);
6547 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006548 s++;
6549 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006550 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006551 }
6552
6553 /* Header fields of interest. */
6554 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006555 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006556 ? (char *)transchar(r->regstart)
6557 : "multibyte", r->regstart);
6558 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006559 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006560 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006561 fprintf(f, "must have \"%s\"", r->regmust);
6562 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006563
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006564#ifdef BT_REGEXP_LOG
6565 fclose(f);
6566#endif
6567}
6568#endif /* BT_REGEXP_DUMP */
6569
6570#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006571/*
6572 * regprop - printable representation of opcode
6573 */
6574 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006575regprop(char_u *op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006576{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006577 char *p;
6578 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006579
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006580 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006581
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006582 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006583 {
6584 case BOL:
6585 p = "BOL";
6586 break;
6587 case EOL:
6588 p = "EOL";
6589 break;
6590 case RE_BOF:
6591 p = "BOF";
6592 break;
6593 case RE_EOF:
6594 p = "EOF";
6595 break;
6596 case CURSOR:
6597 p = "CURSOR";
6598 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006599 case RE_VISUAL:
6600 p = "RE_VISUAL";
6601 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006602 case RE_LNUM:
6603 p = "RE_LNUM";
6604 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006605 case RE_MARK:
6606 p = "RE_MARK";
6607 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006608 case RE_COL:
6609 p = "RE_COL";
6610 break;
6611 case RE_VCOL:
6612 p = "RE_VCOL";
6613 break;
6614 case BOW:
6615 p = "BOW";
6616 break;
6617 case EOW:
6618 p = "EOW";
6619 break;
6620 case ANY:
6621 p = "ANY";
6622 break;
6623 case ANY + ADD_NL:
6624 p = "ANY+NL";
6625 break;
6626 case ANYOF:
6627 p = "ANYOF";
6628 break;
6629 case ANYOF + ADD_NL:
6630 p = "ANYOF+NL";
6631 break;
6632 case ANYBUT:
6633 p = "ANYBUT";
6634 break;
6635 case ANYBUT + ADD_NL:
6636 p = "ANYBUT+NL";
6637 break;
6638 case IDENT:
6639 p = "IDENT";
6640 break;
6641 case IDENT + ADD_NL:
6642 p = "IDENT+NL";
6643 break;
6644 case SIDENT:
6645 p = "SIDENT";
6646 break;
6647 case SIDENT + ADD_NL:
6648 p = "SIDENT+NL";
6649 break;
6650 case KWORD:
6651 p = "KWORD";
6652 break;
6653 case KWORD + ADD_NL:
6654 p = "KWORD+NL";
6655 break;
6656 case SKWORD:
6657 p = "SKWORD";
6658 break;
6659 case SKWORD + ADD_NL:
6660 p = "SKWORD+NL";
6661 break;
6662 case FNAME:
6663 p = "FNAME";
6664 break;
6665 case FNAME + ADD_NL:
6666 p = "FNAME+NL";
6667 break;
6668 case SFNAME:
6669 p = "SFNAME";
6670 break;
6671 case SFNAME + ADD_NL:
6672 p = "SFNAME+NL";
6673 break;
6674 case PRINT:
6675 p = "PRINT";
6676 break;
6677 case PRINT + ADD_NL:
6678 p = "PRINT+NL";
6679 break;
6680 case SPRINT:
6681 p = "SPRINT";
6682 break;
6683 case SPRINT + ADD_NL:
6684 p = "SPRINT+NL";
6685 break;
6686 case WHITE:
6687 p = "WHITE";
6688 break;
6689 case WHITE + ADD_NL:
6690 p = "WHITE+NL";
6691 break;
6692 case NWHITE:
6693 p = "NWHITE";
6694 break;
6695 case NWHITE + ADD_NL:
6696 p = "NWHITE+NL";
6697 break;
6698 case DIGIT:
6699 p = "DIGIT";
6700 break;
6701 case DIGIT + ADD_NL:
6702 p = "DIGIT+NL";
6703 break;
6704 case NDIGIT:
6705 p = "NDIGIT";
6706 break;
6707 case NDIGIT + ADD_NL:
6708 p = "NDIGIT+NL";
6709 break;
6710 case HEX:
6711 p = "HEX";
6712 break;
6713 case HEX + ADD_NL:
6714 p = "HEX+NL";
6715 break;
6716 case NHEX:
6717 p = "NHEX";
6718 break;
6719 case NHEX + ADD_NL:
6720 p = "NHEX+NL";
6721 break;
6722 case OCTAL:
6723 p = "OCTAL";
6724 break;
6725 case OCTAL + ADD_NL:
6726 p = "OCTAL+NL";
6727 break;
6728 case NOCTAL:
6729 p = "NOCTAL";
6730 break;
6731 case NOCTAL + ADD_NL:
6732 p = "NOCTAL+NL";
6733 break;
6734 case WORD:
6735 p = "WORD";
6736 break;
6737 case WORD + ADD_NL:
6738 p = "WORD+NL";
6739 break;
6740 case NWORD:
6741 p = "NWORD";
6742 break;
6743 case NWORD + ADD_NL:
6744 p = "NWORD+NL";
6745 break;
6746 case HEAD:
6747 p = "HEAD";
6748 break;
6749 case HEAD + ADD_NL:
6750 p = "HEAD+NL";
6751 break;
6752 case NHEAD:
6753 p = "NHEAD";
6754 break;
6755 case NHEAD + ADD_NL:
6756 p = "NHEAD+NL";
6757 break;
6758 case ALPHA:
6759 p = "ALPHA";
6760 break;
6761 case ALPHA + ADD_NL:
6762 p = "ALPHA+NL";
6763 break;
6764 case NALPHA:
6765 p = "NALPHA";
6766 break;
6767 case NALPHA + ADD_NL:
6768 p = "NALPHA+NL";
6769 break;
6770 case LOWER:
6771 p = "LOWER";
6772 break;
6773 case LOWER + ADD_NL:
6774 p = "LOWER+NL";
6775 break;
6776 case NLOWER:
6777 p = "NLOWER";
6778 break;
6779 case NLOWER + ADD_NL:
6780 p = "NLOWER+NL";
6781 break;
6782 case UPPER:
6783 p = "UPPER";
6784 break;
6785 case UPPER + ADD_NL:
6786 p = "UPPER+NL";
6787 break;
6788 case NUPPER:
6789 p = "NUPPER";
6790 break;
6791 case NUPPER + ADD_NL:
6792 p = "NUPPER+NL";
6793 break;
6794 case BRANCH:
6795 p = "BRANCH";
6796 break;
6797 case EXACTLY:
6798 p = "EXACTLY";
6799 break;
6800 case NOTHING:
6801 p = "NOTHING";
6802 break;
6803 case BACK:
6804 p = "BACK";
6805 break;
6806 case END:
6807 p = "END";
6808 break;
6809 case MOPEN + 0:
6810 p = "MATCH START";
6811 break;
6812 case MOPEN + 1:
6813 case MOPEN + 2:
6814 case MOPEN + 3:
6815 case MOPEN + 4:
6816 case MOPEN + 5:
6817 case MOPEN + 6:
6818 case MOPEN + 7:
6819 case MOPEN + 8:
6820 case MOPEN + 9:
6821 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6822 p = NULL;
6823 break;
6824 case MCLOSE + 0:
6825 p = "MATCH END";
6826 break;
6827 case MCLOSE + 1:
6828 case MCLOSE + 2:
6829 case MCLOSE + 3:
6830 case MCLOSE + 4:
6831 case MCLOSE + 5:
6832 case MCLOSE + 6:
6833 case MCLOSE + 7:
6834 case MCLOSE + 8:
6835 case MCLOSE + 9:
6836 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6837 p = NULL;
6838 break;
6839 case BACKREF + 1:
6840 case BACKREF + 2:
6841 case BACKREF + 3:
6842 case BACKREF + 4:
6843 case BACKREF + 5:
6844 case BACKREF + 6:
6845 case BACKREF + 7:
6846 case BACKREF + 8:
6847 case BACKREF + 9:
6848 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6849 p = NULL;
6850 break;
6851 case NOPEN:
6852 p = "NOPEN";
6853 break;
6854 case NCLOSE:
6855 p = "NCLOSE";
6856 break;
6857#ifdef FEAT_SYN_HL
6858 case ZOPEN + 1:
6859 case ZOPEN + 2:
6860 case ZOPEN + 3:
6861 case ZOPEN + 4:
6862 case ZOPEN + 5:
6863 case ZOPEN + 6:
6864 case ZOPEN + 7:
6865 case ZOPEN + 8:
6866 case ZOPEN + 9:
6867 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6868 p = NULL;
6869 break;
6870 case ZCLOSE + 1:
6871 case ZCLOSE + 2:
6872 case ZCLOSE + 3:
6873 case ZCLOSE + 4:
6874 case ZCLOSE + 5:
6875 case ZCLOSE + 6:
6876 case ZCLOSE + 7:
6877 case ZCLOSE + 8:
6878 case ZCLOSE + 9:
6879 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6880 p = NULL;
6881 break;
6882 case ZREF + 1:
6883 case ZREF + 2:
6884 case ZREF + 3:
6885 case ZREF + 4:
6886 case ZREF + 5:
6887 case ZREF + 6:
6888 case ZREF + 7:
6889 case ZREF + 8:
6890 case ZREF + 9:
6891 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6892 p = NULL;
6893 break;
6894#endif
6895 case STAR:
6896 p = "STAR";
6897 break;
6898 case PLUS:
6899 p = "PLUS";
6900 break;
6901 case NOMATCH:
6902 p = "NOMATCH";
6903 break;
6904 case MATCH:
6905 p = "MATCH";
6906 break;
6907 case BEHIND:
6908 p = "BEHIND";
6909 break;
6910 case NOBEHIND:
6911 p = "NOBEHIND";
6912 break;
6913 case SUBPAT:
6914 p = "SUBPAT";
6915 break;
6916 case BRACE_LIMITS:
6917 p = "BRACE_LIMITS";
6918 break;
6919 case BRACE_SIMPLE:
6920 p = "BRACE_SIMPLE";
6921 break;
6922 case BRACE_COMPLEX + 0:
6923 case BRACE_COMPLEX + 1:
6924 case BRACE_COMPLEX + 2:
6925 case BRACE_COMPLEX + 3:
6926 case BRACE_COMPLEX + 4:
6927 case BRACE_COMPLEX + 5:
6928 case BRACE_COMPLEX + 6:
6929 case BRACE_COMPLEX + 7:
6930 case BRACE_COMPLEX + 8:
6931 case BRACE_COMPLEX + 9:
6932 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6933 p = NULL;
6934 break;
6935#ifdef FEAT_MBYTE
6936 case MULTIBYTECODE:
6937 p = "MULTIBYTECODE";
6938 break;
6939#endif
6940 case NEWL:
6941 p = "NEWL";
6942 break;
6943 default:
6944 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6945 p = NULL;
6946 break;
6947 }
6948 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006949 STRCAT(buf, p);
6950 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006951}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006952#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006953
Bram Moolenaarfb031402014-09-09 17:18:49 +02006954/*
6955 * Used in a place where no * or \+ can follow.
6956 */
6957 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006958re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02006959{
6960 if (re_multi_type(peekchr()) == MULTI_MULT)
6961 EMSG2_RET_FAIL(_("E888: (NFA regexp) cannot repeat %s"), what);
6962 return OK;
6963}
6964
Bram Moolenaar071d4272004-06-13 20:20:40 +00006965#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01006966static void mb_decompose(int c, int *c1, int *c2, int *c3);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006967
6968typedef struct
6969{
6970 int a, b, c;
6971} decomp_T;
6972
6973
6974/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006975static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006976{
6977 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6978 {0x5d0,0,0}, /* 0xfb21 alt alef */
6979 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6980 {0x5d4,0,0}, /* 0xfb23 alt he */
6981 {0x5db,0,0}, /* 0xfb24 alt kaf */
6982 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6983 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6984 {0x5e8,0,0}, /* 0xfb27 alt resh */
6985 {0x5ea,0,0}, /* 0xfb28 alt tav */
6986 {'+', 0, 0}, /* 0xfb29 alt plus */
6987 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6988 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6989 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6990 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6991 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6992 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6993 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6994 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6995 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6996 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6997 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6998 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6999 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
7000 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
7001 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
7002 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
7003 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
7004 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
7005 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
7006 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
7007 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
7008 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
7009 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
7010 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
7011 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
7012 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
7013 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
7014 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
7015 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
7016 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
7017 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7018 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7019 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7020 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7021 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7022 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7023 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7024 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7025};
7026
7027 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01007028mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007029{
7030 decomp_T d;
7031
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007032 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007033 {
7034 d = decomp_table[c - 0xfb20];
7035 *c1 = d.a;
7036 *c2 = d.b;
7037 *c3 = d.c;
7038 }
7039 else
7040 {
7041 *c1 = c;
7042 *c2 = *c3 = 0;
7043 }
7044}
7045#endif
7046
7047/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02007048 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00007049 * Return 0 if strings match, non-zero otherwise.
7050 * Correct the length "*n" when composing characters are ignored.
7051 */
7052 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007053cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007054{
7055 int result;
7056
Bram Moolenaar6100d022016-10-02 16:51:57 +02007057 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007058 result = STRNCMP(s1, s2, *n);
7059 else
7060 result = MB_STRNICMP(s1, s2, *n);
7061
7062#ifdef FEAT_MBYTE
7063 /* if it failed and it's utf8 and we want to combineignore: */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007064 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007065 {
7066 char_u *str1, *str2;
7067 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007068 int junk;
7069
7070 /* we have to handle the strcmp ourselves, since it is necessary to
7071 * deal with the composing characters by ignoring them: */
7072 str1 = s1;
7073 str2 = s2;
7074 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007075 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007076 {
7077 c1 = mb_ptr2char_adv(&str1);
7078 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007079
7080 /* decompose the character if necessary, into 'base' characters
7081 * because I don't care about Arabic, I will hard-code the Hebrew
7082 * which I *do* care about! So sue me... */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007083 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007084 {
7085 /* decomposition necessary? */
7086 mb_decompose(c1, &c11, &junk, &junk);
7087 mb_decompose(c2, &c12, &junk, &junk);
7088 c1 = c11;
7089 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007090 if (c11 != c12
7091 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007092 break;
7093 }
7094 }
7095 result = c2 - c1;
7096 if (result == 0)
7097 *n = (int)(str2 - s2);
7098 }
7099#endif
7100
7101 return result;
7102}
7103
7104/*
7105 * cstrchr: This function is used a lot for simple searches, keep it fast!
7106 */
7107 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007108cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007109{
7110 char_u *p;
7111 int cc;
7112
Bram Moolenaar6100d022016-10-02 16:51:57 +02007113 if (!rex.reg_ic
Bram Moolenaar071d4272004-06-13 20:20:40 +00007114#ifdef FEAT_MBYTE
7115 || (!enc_utf8 && mb_char2len(c) > 1)
7116#endif
7117 )
7118 return vim_strchr(s, c);
7119
7120 /* tolower() and toupper() can be slow, comparing twice should be a lot
7121 * faster (esp. when using MS Visual C++!).
7122 * For UTF-8 need to use folded case. */
7123#ifdef FEAT_MBYTE
7124 if (enc_utf8 && c > 0x80)
7125 cc = utf_fold(c);
7126 else
7127#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007128 if (MB_ISUPPER(c))
7129 cc = MB_TOLOWER(c);
7130 else if (MB_ISLOWER(c))
7131 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007132 else
7133 return vim_strchr(s, c);
7134
7135#ifdef FEAT_MBYTE
7136 if (has_mbyte)
7137 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007138 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007139 {
7140 if (enc_utf8 && c > 0x80)
7141 {
7142 if (utf_fold(utf_ptr2char(p)) == cc)
7143 return p;
7144 }
7145 else if (*p == c || *p == cc)
7146 return p;
7147 }
7148 }
7149 else
7150#endif
7151 /* Faster version for when there are no multi-byte characters. */
7152 for (p = s; *p != NUL; ++p)
7153 if (*p == c || *p == cc)
7154 return p;
7155
7156 return NULL;
7157}
7158
7159/***************************************************************
7160 * regsub stuff *
7161 ***************************************************************/
7162
Bram Moolenaar071d4272004-06-13 20:20:40 +00007163/*
7164 * We should define ftpr as a pointer to a function returning a pointer to
7165 * a function returning a pointer to a function ...
7166 * This is impossible, so we declare a pointer to a function returning a
7167 * pointer to a function returning void. This should work for all compilers.
7168 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007169typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007170
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007171static fptr_T do_upper(int *, int);
7172static fptr_T do_Upper(int *, int);
7173static fptr_T do_lower(int *, int);
7174static fptr_T do_Lower(int *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007175
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007176static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007177
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007178 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007179do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007180{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007181 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007182
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007183 return (fptr_T)NULL;
7184}
7185
7186 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007187do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007188{
7189 *d = MB_TOUPPER(c);
7190
7191 return (fptr_T)do_Upper;
7192}
7193
7194 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007195do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007196{
7197 *d = MB_TOLOWER(c);
7198
7199 return (fptr_T)NULL;
7200}
7201
7202 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007203do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007204{
7205 *d = MB_TOLOWER(c);
7206
7207 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007208}
7209
7210/*
7211 * regtilde(): Replace tildes in the pattern by the old pattern.
7212 *
7213 * Short explanation of the tilde: It stands for the previous replacement
7214 * pattern. If that previous pattern also contains a ~ we should go back a
7215 * step further... But we insert the previous pattern into the current one
7216 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007217 * This still does not handle the case where "magic" changes. So require the
7218 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007219 *
7220 * The tildes are parsed once before the first call to vim_regsub().
7221 */
7222 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007223regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007224{
7225 char_u *newsub = source;
7226 char_u *tmpsub;
7227 char_u *p;
7228 int len;
7229 int prevlen;
7230
7231 for (p = newsub; *p; ++p)
7232 {
7233 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7234 {
7235 if (reg_prev_sub != NULL)
7236 {
7237 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7238 prevlen = (int)STRLEN(reg_prev_sub);
7239 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7240 if (tmpsub != NULL)
7241 {
7242 /* copy prefix */
7243 len = (int)(p - newsub); /* not including ~ */
7244 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007245 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007246 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7247 /* copy postfix */
7248 if (!magic)
7249 ++p; /* back off \ */
7250 STRCPY(tmpsub + len + prevlen, p + 1);
7251
7252 if (newsub != source) /* already allocated newsub */
7253 vim_free(newsub);
7254 newsub = tmpsub;
7255 p = newsub + len + prevlen;
7256 }
7257 }
7258 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007259 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007260 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007261 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007262 --p;
7263 }
7264 else
7265 {
7266 if (*p == '\\' && p[1]) /* skip escaped characters */
7267 ++p;
7268#ifdef FEAT_MBYTE
7269 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007270 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007271#endif
7272 }
7273 }
7274
7275 vim_free(reg_prev_sub);
7276 if (newsub != source) /* newsub was allocated, just keep it */
7277 reg_prev_sub = newsub;
7278 else /* no ~ found, need to save newsub */
7279 reg_prev_sub = vim_strsave(newsub);
7280 return newsub;
7281}
7282
7283#ifdef FEAT_EVAL
7284static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7285
Bram Moolenaar6100d022016-10-02 16:51:57 +02007286/* These pointers are used for reg_submatch(). Needed for when the
7287 * substitution string is an expression that contains a call to substitute()
7288 * and submatch(). */
7289typedef struct {
7290 regmatch_T *sm_match;
7291 regmmatch_T *sm_mmatch;
7292 linenr_T sm_firstlnum;
7293 linenr_T sm_maxline;
7294 int sm_line_lbr;
7295} regsubmatch_T;
7296
7297static regsubmatch_T rsm; /* can only be used when can_f_submatch is TRUE */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007298#endif
7299
7300#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007301
7302/*
7303 * Put the submatches in "argv[0]" which is a list passed into call_func() by
7304 * vim_regsub_both().
7305 */
7306 static int
7307fill_submatch_list(int argc UNUSED, typval_T *argv, int argcount)
7308{
7309 listitem_T *li;
7310 int i;
7311 char_u *s;
7312
7313 if (argcount == 0)
7314 /* called function doesn't take an argument */
7315 return 0;
7316
7317 /* Relies on sl_list to be the first item in staticList10_T. */
7318 init_static_list((staticList10_T *)(argv->vval.v_list));
7319
7320 /* There are always 10 list items in staticList10_T. */
7321 li = argv->vval.v_list->lv_first;
7322 for (i = 0; i < 10; ++i)
7323 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007324 s = rsm.sm_match->startp[i];
7325 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007326 s = NULL;
7327 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007328 s = vim_strnsave(s, (int)(rsm.sm_match->endp[i] - s));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007329 li->li_tv.v_type = VAR_STRING;
7330 li->li_tv.vval.v_string = s;
7331 li = li->li_next;
7332 }
7333 return 1;
7334}
7335
7336 static void
7337clear_submatch_list(staticList10_T *sl)
7338{
7339 int i;
7340
7341 for (i = 0; i < 10; ++i)
7342 vim_free(sl->sl_items[i].li_tv.vval.v_string);
7343}
7344
Bram Moolenaar071d4272004-06-13 20:20:40 +00007345/*
7346 * vim_regsub() - perform substitutions after a vim_regexec() or
7347 * vim_regexec_multi() match.
7348 *
7349 * If "copy" is TRUE really copy into "dest".
7350 * If "copy" is FALSE nothing is copied, this is just to find out the length
7351 * of the result.
7352 *
7353 * If "backslash" is TRUE, a backslash will be removed later, need to double
7354 * them to keep them, and insert a backslash before a CR to avoid it being
7355 * replaced with a line break later.
7356 *
7357 * Note: The matched text must not change between the call of
7358 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7359 * references invalid!
7360 *
7361 * Returns the size of the replacement, including terminating NUL.
7362 */
7363 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007364vim_regsub(
7365 regmatch_T *rmp,
7366 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007367 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007368 char_u *dest,
7369 int copy,
7370 int magic,
7371 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007372{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007373 int result;
7374 regexec_T rex_save;
7375 int rex_in_use_save = rex_in_use;
7376
7377 if (rex_in_use)
7378 /* Being called recursively, save the state. */
7379 rex_save = rex;
7380 rex_in_use = TRUE;
7381
7382 rex.reg_match = rmp;
7383 rex.reg_mmatch = NULL;
7384 rex.reg_maxline = 0;
7385 rex.reg_buf = curbuf;
7386 rex.reg_line_lbr = TRUE;
7387 result = vim_regsub_both(source, expr, dest, copy, magic, backslash);
7388
7389 rex_in_use = rex_in_use_save;
7390 if (rex_in_use)
7391 rex = rex_save;
7392
7393 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007394}
7395#endif
7396
7397 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007398vim_regsub_multi(
7399 regmmatch_T *rmp,
7400 linenr_T lnum,
7401 char_u *source,
7402 char_u *dest,
7403 int copy,
7404 int magic,
7405 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007406{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007407 int result;
7408 regexec_T rex_save;
7409 int rex_in_use_save = rex_in_use;
7410
7411 if (rex_in_use)
7412 /* Being called recursively, save the state. */
7413 rex_save = rex;
7414 rex_in_use = TRUE;
7415
7416 rex.reg_match = NULL;
7417 rex.reg_mmatch = rmp;
7418 rex.reg_buf = curbuf; /* always works on the current buffer! */
7419 rex.reg_firstlnum = lnum;
7420 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7421 rex.reg_line_lbr = FALSE;
7422 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash);
7423
7424 rex_in_use = rex_in_use_save;
7425 if (rex_in_use)
7426 rex = rex_save;
7427
7428 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007429}
7430
7431 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007432vim_regsub_both(
7433 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007434 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007435 char_u *dest,
7436 int copy,
7437 int magic,
7438 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007439{
7440 char_u *src;
7441 char_u *dst;
7442 char_u *s;
7443 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007444 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007445 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007446 fptr_T func_all = (fptr_T)NULL;
7447 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007448 linenr_T clnum = 0; /* init for GCC */
7449 int len = 0; /* init for GCC */
7450#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007451 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007452#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007453
7454 /* Be paranoid... */
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007455 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007456 {
7457 EMSG(_(e_null));
7458 return 0;
7459 }
7460 if (prog_magic_wrong())
7461 return 0;
7462 src = source;
7463 dst = dest;
7464
7465 /*
7466 * When the substitute part starts with "\=" evaluate it as an expression.
7467 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007468 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007469 {
7470#ifdef FEAT_EVAL
7471 /* To make sure that the length doesn't change between checking the
7472 * length and copying the string, and to speed up things, the
7473 * resulting string is saved from the call with "copy" == FALSE to the
7474 * call with "copy" == TRUE. */
7475 if (copy)
7476 {
7477 if (eval_result != NULL)
7478 {
7479 STRCPY(dest, eval_result);
7480 dst += STRLEN(eval_result);
7481 vim_free(eval_result);
7482 eval_result = NULL;
7483 }
7484 }
7485 else
7486 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007487 int prev_can_f_submatch = can_f_submatch;
7488 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007489
7490 vim_free(eval_result);
7491
7492 /* The expression may contain substitute(), which calls us
7493 * recursively. Make sure submatch() gets the text from the first
Bram Moolenaar6100d022016-10-02 16:51:57 +02007494 * level. */
7495 if (can_f_submatch)
7496 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007497 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007498 rsm.sm_match = rex.reg_match;
7499 rsm.sm_mmatch = rex.reg_mmatch;
7500 rsm.sm_firstlnum = rex.reg_firstlnum;
7501 rsm.sm_maxline = rex.reg_maxline;
7502 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007503
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007504 if (expr != NULL)
7505 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007506 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007507 int dummy;
7508 char_u buf[NUMBUFLEN];
7509 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007510 staticList10_T matchList;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007511
7512 rettv.v_type = VAR_STRING;
7513 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007514 argv[0].v_type = VAR_LIST;
7515 argv[0].vval.v_list = &matchList.sl_list;
7516 matchList.sl_list.lv_len = 0;
7517 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007518 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007519 s = expr->vval.v_string;
7520 call_func(s, (int)STRLEN(s), &rettv,
7521 1, argv, fill_submatch_list,
7522 0L, 0L, &dummy, TRUE, NULL, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007523 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007524 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007525 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007526 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007527
Bram Moolenaar6100d022016-10-02 16:51:57 +02007528 s = partial_name(partial);
7529 call_func(s, (int)STRLEN(s), &rettv,
7530 1, argv, fill_submatch_list,
7531 0L, 0L, &dummy, TRUE, partial, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007532 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007533 if (matchList.sl_list.lv_len > 0)
7534 /* fill_submatch_list() was called */
7535 clear_submatch_list(&matchList);
7536
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007537 eval_result = get_tv_string_buf_chk(&rettv, buf);
7538 if (eval_result != NULL)
7539 eval_result = vim_strsave(eval_result);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007540 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007541 }
7542 else
7543 eval_result = eval_to_string(source + 2, NULL, TRUE);
7544
Bram Moolenaar071d4272004-06-13 20:20:40 +00007545 if (eval_result != NULL)
7546 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007547 int had_backslash = FALSE;
7548
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00007549 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007550 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007551 /* Change NL to CR, so that it becomes a line break,
7552 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007553 * Skip over a backslashed character. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007554 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007555 *s = CAR;
7556 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007557 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007558 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007559 /* Change NL to CR here too, so that this works:
7560 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7561 * abc\
7562 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007563 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007564 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007565 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007566 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007567 had_backslash = TRUE;
7568 }
7569 }
7570 if (had_backslash && backslash)
7571 {
7572 /* Backslashes will be consumed, need to double them. */
7573 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7574 if (s != NULL)
7575 {
7576 vim_free(eval_result);
7577 eval_result = s;
7578 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007579 }
7580
7581 dst += STRLEN(eval_result);
7582 }
7583
Bram Moolenaar6100d022016-10-02 16:51:57 +02007584 can_f_submatch = prev_can_f_submatch;
7585 if (can_f_submatch)
7586 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007587 }
7588#endif
7589 }
7590 else
7591 while ((c = *src++) != NUL)
7592 {
7593 if (c == '&' && magic)
7594 no = 0;
7595 else if (c == '\\' && *src != NUL)
7596 {
7597 if (*src == '&' && !magic)
7598 {
7599 ++src;
7600 no = 0;
7601 }
7602 else if ('0' <= *src && *src <= '9')
7603 {
7604 no = *src++ - '0';
7605 }
7606 else if (vim_strchr((char_u *)"uUlLeE", *src))
7607 {
7608 switch (*src++)
7609 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007610 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007611 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007612 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007613 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007614 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007615 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007616 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007617 continue;
7618 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007619 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007620 continue;
7621 }
7622 }
7623 }
7624 if (no < 0) /* Ordinary character. */
7625 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007626 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7627 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007628 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007629 if (copy)
7630 {
7631 *dst++ = c;
7632 *dst++ = *src++;
7633 *dst++ = *src++;
7634 }
7635 else
7636 {
7637 dst += 3;
7638 src += 2;
7639 }
7640 continue;
7641 }
7642
Bram Moolenaar071d4272004-06-13 20:20:40 +00007643 if (c == '\\' && *src != NUL)
7644 {
7645 /* Check for abbreviations -- webb */
7646 switch (*src)
7647 {
7648 case 'r': c = CAR; ++src; break;
7649 case 'n': c = NL; ++src; break;
7650 case 't': c = TAB; ++src; break;
7651 /* Oh no! \e already has meaning in subst pat :-( */
7652 /* case 'e': c = ESC; ++src; break; */
7653 case 'b': c = Ctrl_H; ++src; break;
7654
7655 /* If "backslash" is TRUE the backslash will be removed
7656 * later. Used to insert a literal CR. */
7657 default: if (backslash)
7658 {
7659 if (copy)
7660 *dst = '\\';
7661 ++dst;
7662 }
7663 c = *src++;
7664 }
7665 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007666#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007667 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007668 c = mb_ptr2char(src - 1);
7669#endif
7670
Bram Moolenaardb552d602006-03-23 22:59:57 +00007671 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007672 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007673 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007674 func_one = (fptr_T)(func_one(&cc, c));
7675 else if (func_all != (fptr_T)NULL)
7676 /* Turbo C complains without the typecast */
7677 func_all = (fptr_T)(func_all(&cc, c));
7678 else /* just copy */
7679 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007680
7681#ifdef FEAT_MBYTE
7682 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007683 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007684 int totlen = mb_ptr2len(src - 1);
7685
Bram Moolenaar071d4272004-06-13 20:20:40 +00007686 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007687 mb_char2bytes(cc, dst);
7688 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007689 if (enc_utf8)
7690 {
7691 int clen = utf_ptr2len(src - 1);
7692
7693 /* If the character length is shorter than "totlen", there
7694 * are composing characters; copy them as-is. */
7695 if (clen < totlen)
7696 {
7697 if (copy)
7698 mch_memmove(dst + 1, src - 1 + clen,
7699 (size_t)(totlen - clen));
7700 dst += totlen - clen;
7701 }
7702 }
7703 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007704 }
7705 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007706#endif
7707 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007708 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007709 dst++;
7710 }
7711 else
7712 {
7713 if (REG_MULTI)
7714 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007715 clnum = rex.reg_mmatch->startpos[no].lnum;
7716 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007717 s = NULL;
7718 else
7719 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007720 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
7721 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7722 len = rex.reg_mmatch->endpos[no].col
7723 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007724 else
7725 len = (int)STRLEN(s);
7726 }
7727 }
7728 else
7729 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007730 s = rex.reg_match->startp[no];
7731 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007732 s = NULL;
7733 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007734 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007735 }
7736 if (s != NULL)
7737 {
7738 for (;;)
7739 {
7740 if (len == 0)
7741 {
7742 if (REG_MULTI)
7743 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007744 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007745 break;
7746 if (copy)
7747 *dst = CAR;
7748 ++dst;
7749 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02007750 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7751 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007752 else
7753 len = (int)STRLEN(s);
7754 }
7755 else
7756 break;
7757 }
7758 else if (*s == NUL) /* we hit NUL. */
7759 {
7760 if (copy)
7761 EMSG(_(e_re_damg));
7762 goto exit;
7763 }
7764 else
7765 {
7766 if (backslash && (*s == CAR || *s == '\\'))
7767 {
7768 /*
7769 * Insert a backslash in front of a CR, otherwise
7770 * it will be replaced by a line break.
7771 * Number of backslashes will be halved later,
7772 * double them here.
7773 */
7774 if (copy)
7775 {
7776 dst[0] = '\\';
7777 dst[1] = *s;
7778 }
7779 dst += 2;
7780 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007781 else
7782 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007783#ifdef FEAT_MBYTE
7784 if (has_mbyte)
7785 c = mb_ptr2char(s);
7786 else
7787#endif
7788 c = *s;
7789
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007790 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007791 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007792 func_one = (fptr_T)(func_one(&cc, c));
7793 else if (func_all != (fptr_T)NULL)
7794 /* Turbo C complains without the typecast */
7795 func_all = (fptr_T)(func_all(&cc, c));
7796 else /* just copy */
7797 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007798
7799#ifdef FEAT_MBYTE
7800 if (has_mbyte)
7801 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007802 int l;
7803
7804 /* Copy composing characters separately, one
7805 * at a time. */
7806 if (enc_utf8)
7807 l = utf_ptr2len(s) - 1;
7808 else
7809 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007810
7811 s += l;
7812 len -= l;
7813 if (copy)
7814 mb_char2bytes(cc, dst);
7815 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007816 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007817 else
7818#endif
7819 if (copy)
7820 *dst = cc;
7821 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007822 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007823
Bram Moolenaar071d4272004-06-13 20:20:40 +00007824 ++s;
7825 --len;
7826 }
7827 }
7828 }
7829 no = -1;
7830 }
7831 }
7832 if (copy)
7833 *dst = NUL;
7834
7835exit:
7836 return (int)((dst - dest) + 1);
7837}
7838
7839#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007840static char_u *reg_getline_submatch(linenr_T lnum);
Bram Moolenaard32a3192009-11-26 19:40:49 +00007841
Bram Moolenaar071d4272004-06-13 20:20:40 +00007842/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007843 * Call reg_getline() with the line numbers from the submatch. If a
7844 * substitute() was used the reg_maxline and other values have been
7845 * overwritten.
7846 */
7847 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007848reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007849{
7850 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007851 linenr_T save_first = rex.reg_firstlnum;
7852 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007853
Bram Moolenaar6100d022016-10-02 16:51:57 +02007854 rex.reg_firstlnum = rsm.sm_firstlnum;
7855 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007856
7857 s = reg_getline(lnum);
7858
Bram Moolenaar6100d022016-10-02 16:51:57 +02007859 rex.reg_firstlnum = save_first;
7860 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007861 return s;
7862}
7863
7864/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007865 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007866 * allocated memory.
7867 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7868 */
7869 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007870reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007871{
7872 char_u *retval = NULL;
7873 char_u *s;
7874 int len;
7875 int round;
7876 linenr_T lnum;
7877
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007878 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007879 return NULL;
7880
Bram Moolenaar6100d022016-10-02 16:51:57 +02007881 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007882 {
7883 /*
7884 * First round: compute the length and allocate memory.
7885 * Second round: copy the text.
7886 */
7887 for (round = 1; round <= 2; ++round)
7888 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007889 lnum = rsm.sm_mmatch->startpos[no].lnum;
7890 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007891 return NULL;
7892
Bram Moolenaar6100d022016-10-02 16:51:57 +02007893 s = reg_getline_submatch(lnum) + rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007894 if (s == NULL) /* anti-crash check, cannot happen? */
7895 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007896 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007897 {
7898 /* Within one line: take form start to end col. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007899 len = rsm.sm_mmatch->endpos[no].col
7900 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007901 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007902 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007903 ++len;
7904 }
7905 else
7906 {
7907 /* Multiple lines: take start line from start col, middle
7908 * lines completely and end line up to end col. */
7909 len = (int)STRLEN(s);
7910 if (round == 2)
7911 {
7912 STRCPY(retval, s);
7913 retval[len] = '\n';
7914 }
7915 ++len;
7916 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007917 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007918 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007919 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007920 if (round == 2)
7921 STRCPY(retval + len, s);
7922 len += (int)STRLEN(s);
7923 if (round == 2)
7924 retval[len] = '\n';
7925 ++len;
7926 }
7927 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007928 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02007929 rsm.sm_mmatch->endpos[no].col);
7930 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007931 if (round == 2)
7932 retval[len] = NUL;
7933 ++len;
7934 }
7935
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007936 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007937 {
7938 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007939 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007940 return NULL;
7941 }
7942 }
7943 }
7944 else
7945 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007946 s = rsm.sm_match->startp[no];
7947 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007948 retval = NULL;
7949 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007950 retval = vim_strnsave(s, (int)(rsm.sm_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007951 }
7952
7953 return retval;
7954}
Bram Moolenaar41571762014-04-02 19:00:58 +02007955
7956/*
7957 * Used for the submatch() function with the optional non-zero argument: get
7958 * the list of strings from the n'th submatch in allocated memory with NULs
7959 * represented in NLs.
7960 * Returns a list of allocated strings. Returns NULL when not in a ":s"
7961 * command, for a non-existing submatch and for any error.
7962 */
7963 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007964reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02007965{
7966 char_u *s;
7967 linenr_T slnum;
7968 linenr_T elnum;
7969 colnr_T scol;
7970 colnr_T ecol;
7971 int i;
7972 list_T *list;
7973 int error = FALSE;
7974
7975 if (!can_f_submatch || no < 0)
7976 return NULL;
7977
Bram Moolenaar6100d022016-10-02 16:51:57 +02007978 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02007979 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007980 slnum = rsm.sm_mmatch->startpos[no].lnum;
7981 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02007982 if (slnum < 0 || elnum < 0)
7983 return NULL;
7984
Bram Moolenaar6100d022016-10-02 16:51:57 +02007985 scol = rsm.sm_mmatch->startpos[no].col;
7986 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02007987
7988 list = list_alloc();
7989 if (list == NULL)
7990 return NULL;
7991
7992 s = reg_getline_submatch(slnum) + scol;
7993 if (slnum == elnum)
7994 {
7995 if (list_append_string(list, s, ecol - scol) == FAIL)
7996 error = TRUE;
7997 }
7998 else
7999 {
8000 if (list_append_string(list, s, -1) == FAIL)
8001 error = TRUE;
8002 for (i = 1; i < elnum - slnum; i++)
8003 {
8004 s = reg_getline_submatch(slnum + i);
8005 if (list_append_string(list, s, -1) == FAIL)
8006 error = TRUE;
8007 }
8008 s = reg_getline_submatch(elnum);
8009 if (list_append_string(list, s, ecol) == FAIL)
8010 error = TRUE;
8011 }
8012 }
8013 else
8014 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02008015 s = rsm.sm_match->startp[no];
8016 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008017 return NULL;
8018 list = list_alloc();
8019 if (list == NULL)
8020 return NULL;
8021 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02008022 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008023 error = TRUE;
8024 }
8025
8026 if (error)
8027 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02008028 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02008029 return NULL;
8030 }
8031 return list;
8032}
Bram Moolenaar071d4272004-06-13 20:20:40 +00008033#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008034
8035static regengine_T bt_regengine =
8036{
8037 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008038 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008039 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008040 bt_regexec_multi,
8041 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008042};
8043
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008044#include "regexp_nfa.c"
8045
8046static regengine_T nfa_regengine =
8047{
8048 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008049 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008050 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008051 nfa_regexec_multi,
8052 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008053};
8054
8055/* Which regexp engine to use? Needed for vim_regcomp().
8056 * Must match with 'regexpengine'. */
8057static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008058
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008059#ifdef DEBUG
8060static char_u regname[][30] = {
8061 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02008062 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008063 "NFA Regexp Engine"
8064 };
8065#endif
8066
8067/*
8068 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02008069 * Returns the program in allocated memory.
8070 * Use vim_regfree() to free the memory.
8071 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008072 */
8073 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01008074vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008075{
8076 regprog_T *prog = NULL;
8077 char_u *expr = expr_arg;
8078
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008079 regexp_engine = p_re;
8080
8081 /* Check for prefix "\%#=", that sets the regexp engine */
8082 if (STRNCMP(expr, "\\%#=", 4) == 0)
8083 {
8084 int newengine = expr[4] - '0';
8085
8086 if (newengine == AUTOMATIC_ENGINE
8087 || newengine == BACKTRACKING_ENGINE
8088 || newengine == NFA_ENGINE)
8089 {
8090 regexp_engine = expr[4] - '0';
8091 expr += 5;
8092#ifdef DEBUG
Bram Moolenaar6e132072014-05-13 16:46:32 +02008093 smsg((char_u *)"New regexp mode selected (%d): %s",
8094 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008095#endif
8096 }
8097 else
8098 {
8099 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
8100 regexp_engine = AUTOMATIC_ENGINE;
8101 }
8102 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008103 bt_regengine.expr = expr;
8104 nfa_regengine.expr = expr;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008105
8106 /*
8107 * First try the NFA engine, unless backtracking was requested.
8108 */
8109 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008110 prog = nfa_regengine.regcomp(expr,
8111 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008112 else
8113 prog = bt_regengine.regcomp(expr, re_flags);
8114
Bram Moolenaarfda37292014-11-05 14:27:36 +01008115 /* Check for error compiling regexp with initial engine. */
8116 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008117 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008118#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008119 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8120 {
8121 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008122 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008123 if (f)
8124 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008125 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008126 fclose(f);
8127 }
8128 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008129 EMSG2("(NFA) Could not open \"%s\" to write !!!",
8130 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008131 }
8132#endif
8133 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01008134 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008135 * The NFA engine also fails for patterns that it can't handle well
8136 * but are still valid patterns, thus a retry should work.
8137 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008138 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008139 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008140 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008141 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008142 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008143 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008144
Bram Moolenaarfda37292014-11-05 14:27:36 +01008145 if (prog != NULL)
8146 {
8147 /* Store the info needed to call regcomp() again when the engine turns
8148 * out to be very slow when executing it. */
8149 prog->re_engine = regexp_engine;
8150 prog->re_flags = re_flags;
8151 }
8152
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008153 return prog;
8154}
8155
8156/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008157 * Free a compiled regexp program, returned by vim_regcomp().
8158 */
8159 void
Bram Moolenaar05540972016-01-30 20:31:25 +01008160vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02008161{
8162 if (prog != NULL)
8163 prog->engine->regfree(prog);
8164}
8165
Bram Moolenaarfda37292014-11-05 14:27:36 +01008166#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01008167static void report_re_switch(char_u *pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008168
8169 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01008170report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008171{
8172 if (p_verbose > 0)
8173 {
8174 verbose_enter();
8175 MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
8176 MSG_PUTS(pat);
8177 verbose_leave();
8178 }
8179}
8180#endif
8181
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01008182static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, int nl);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008183
Bram Moolenaar473de612013-06-08 18:19:48 +02008184/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008185 * Match a regexp against a string.
8186 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008187 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008188 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01008189 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008190 *
8191 * Return TRUE if there is a match, FALSE if not.
8192 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01008193 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01008194vim_regexec_both(
8195 regmatch_T *rmp,
8196 char_u *line, /* string to match against */
8197 colnr_T col, /* column to start looking for match */
8198 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008199{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008200 int result;
8201 regexec_T rex_save;
8202 int rex_in_use_save = rex_in_use;
8203
8204 if (rex_in_use)
8205 /* Being called recursively, save the state. */
8206 rex_save = rex;
8207 rex_in_use = TRUE;
8208 rex.reg_startp = NULL;
8209 rex.reg_endp = NULL;
8210 rex.reg_startpos = NULL;
8211 rex.reg_endpos = NULL;
8212
8213 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008214
8215 /* NFA engine aborted because it's very slow. */
8216 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8217 && result == NFA_TOO_EXPENSIVE)
8218 {
8219 int save_p_re = p_re;
8220 int re_flags = rmp->regprog->re_flags;
8221 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8222
8223 p_re = BACKTRACKING_ENGINE;
8224 vim_regfree(rmp->regprog);
8225 if (pat != NULL)
8226 {
8227#ifdef FEAT_EVAL
8228 report_re_switch(pat);
8229#endif
8230 rmp->regprog = vim_regcomp(pat, re_flags);
8231 if (rmp->regprog != NULL)
8232 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8233 vim_free(pat);
8234 }
8235
8236 p_re = save_p_re;
8237 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02008238
8239 rex_in_use = rex_in_use_save;
8240 if (rex_in_use)
8241 rex = rex_save;
8242
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008243 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008244}
8245
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008246/*
8247 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008248 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008249 */
8250 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008251vim_regexec_prog(
8252 regprog_T **prog,
8253 int ignore_case,
8254 char_u *line,
8255 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008256{
8257 int r;
8258 regmatch_T regmatch;
8259
8260 regmatch.regprog = *prog;
8261 regmatch.rm_ic = ignore_case;
8262 r = vim_regexec_both(&regmatch, line, col, FALSE);
8263 *prog = regmatch.regprog;
8264 return r;
8265}
8266
8267/*
8268 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008269 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008270 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008271 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008272vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008273{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008274 return vim_regexec_both(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008275}
8276
8277#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8278 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8279/*
8280 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008281 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008282 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008283 */
8284 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008285vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008286{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008287 return vim_regexec_both(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008288}
8289#endif
8290
8291/*
8292 * Match a regexp against multiple lines.
8293 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008294 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008295 * Uses curbuf for line count and 'iskeyword'.
8296 *
8297 * Return zero if there is no match. Return number of lines contained in the
8298 * match otherwise.
8299 */
8300 long
Bram Moolenaar05540972016-01-30 20:31:25 +01008301vim_regexec_multi(
8302 regmmatch_T *rmp,
8303 win_T *win, /* window in which to search or NULL */
8304 buf_T *buf, /* buffer in which to search */
8305 linenr_T lnum, /* nr of line to start looking for match */
8306 colnr_T col, /* column to start looking for match */
8307 proftime_T *tm) /* timeout limit or NULL */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008308{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008309 int result;
8310 regexec_T rex_save;
8311 int rex_in_use_save = rex_in_use;
8312
8313 if (rex_in_use)
8314 /* Being called recursively, save the state. */
8315 rex_save = rex;
8316 rex_in_use = TRUE;
8317
8318 result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008319
8320 /* NFA engine aborted because it's very slow. */
8321 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8322 && result == NFA_TOO_EXPENSIVE)
8323 {
8324 int save_p_re = p_re;
8325 int re_flags = rmp->regprog->re_flags;
8326 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8327
8328 p_re = BACKTRACKING_ENGINE;
8329 vim_regfree(rmp->regprog);
8330 if (pat != NULL)
8331 {
8332#ifdef FEAT_EVAL
8333 report_re_switch(pat);
8334#endif
8335 rmp->regprog = vim_regcomp(pat, re_flags);
8336 if (rmp->regprog != NULL)
8337 result = rmp->regprog->engine->regexec_multi(
8338 rmp, win, buf, lnum, col, tm);
8339 vim_free(pat);
8340 }
8341 p_re = save_p_re;
8342 }
8343
Bram Moolenaar6100d022016-10-02 16:51:57 +02008344 rex_in_use = rex_in_use_save;
8345 if (rex_in_use)
8346 rex = rex_save;
8347
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008348 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008349}