blob: de066a1015c8dfaf6096d3ca854ff711266e927d [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
Bram Moolenaar071d4272004-06-13 20:20:40 +0000258 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100259no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260{
261 if (is_Magic(x))
262 return un_Magic(x);
263 return x;
264}
265
266 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100267toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000268{
269 if (is_Magic(x))
270 return un_Magic(x);
271 return Magic(x);
272}
273
274/*
275 * The first byte of the regexp internal "program" is actually this magic
276 * number; the start node begins in the second byte. It's used to catch the
277 * most severe mutilation of the program by the caller.
278 */
279
280#define REGMAGIC 0234
281
282/*
283 * Opcode notes:
284 *
285 * BRANCH The set of branches constituting a single choice are hooked
286 * together with their "next" pointers, since precedence prevents
287 * anything being concatenated to any individual branch. The
288 * "next" pointer of the last BRANCH in a choice points to the
289 * thing following the whole choice. This is also where the
290 * final "next" pointer of each individual branch points; each
291 * branch starts with the operand node of a BRANCH node.
292 *
293 * BACK Normal "next" pointers all implicitly point forward; BACK
294 * exists to make loop structures possible.
295 *
296 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
297 * BRANCH structures using BACK. Simple cases (one character
298 * per match) are implemented with STAR and PLUS for speed
299 * and to minimize recursive plunges.
300 *
301 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
302 * node, and defines the min and max limits to be used for that
303 * node.
304 *
305 * MOPEN,MCLOSE ...are numbered at compile time.
306 * ZOPEN,ZCLOSE ...ditto
307 */
308
309/*
310 * A node is one char of opcode followed by two chars of "next" pointer.
311 * "Next" pointers are stored as two 8-bit bytes, high order first. The
312 * value is a positive offset from the opcode of the node containing it.
313 * An operand, if any, simply follows the node. (Note that much of the
314 * code generation knows about this implicit relationship.)
315 *
316 * Using two bytes for the "next" pointer is vast overkill for most things,
317 * but allows patterns to get big without disasters.
318 */
319#define OP(p) ((int)*(p))
320#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
321#define OPERAND(p) ((p) + 3)
322/* Obtain an operand that was stored as four bytes, MSB first. */
323#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
324 + ((long)(p)[5] << 8) + (long)(p)[6])
325/* Obtain a second operand stored as four bytes. */
326#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
327/* Obtain a second single-byte operand stored after a four bytes operand. */
328#define OPERAND_CMP(p) (p)[7]
329
330/*
331 * Utility definitions.
332 */
333#define UCHARAT(p) ((int)*(char_u *)(p))
334
335/* Used for an error (down from) vim_regcomp(): give the error message, set
336 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000337#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar95f09602016-11-10 20:01:45 +0100338#define IEMSG_RET_NULL(m) return (IEMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000339#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200340#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
341#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
342#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000343
Bram Moolenaar95f09602016-11-10 20:01:45 +0100344
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345#define MAX_LIMIT (32767L << 16L)
346
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100347static int re_multi_type(int);
348static int cstrncmp(char_u *s1, char_u *s2, int *n);
349static char_u *cstrchr(char_u *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000350
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200351#ifdef BT_REGEXP_DUMP
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100352static void regdump(char_u *, bt_regprog_T *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200353#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354#ifdef DEBUG
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100355static char_u *regprop(char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356#endif
357
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100358static int re_mult_next(char *what);
Bram Moolenaarfb031402014-09-09 17:18:49 +0200359
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200360static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200361static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
362static char_u e_large_class[] = N_("E945: Range too large in character class");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200363static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
364static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
365static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200366#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200367static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
368static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200369#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200370static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200371static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar071d4272004-06-13 20:20:40 +0000372#define NOT_MULTI 0
373#define MULTI_ONE 1
374#define MULTI_MULT 2
375/*
376 * Return NOT_MULTI if c is not a "multi" operator.
377 * Return MULTI_ONE if c is a single "multi" operator.
378 * Return MULTI_MULT if c is a multi "multi" operator.
379 */
380 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100381re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000382{
383 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
384 return MULTI_ONE;
385 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
386 return MULTI_MULT;
387 return NOT_MULTI;
388}
389
390/*
391 * Flags to be passed up and down.
392 */
393#define HASWIDTH 0x1 /* Known never to match null string. */
394#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
395#define SPSTART 0x4 /* Starts with * or +. */
396#define HASNL 0x8 /* Contains some \n. */
397#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
398#define WORST 0 /* Worst case. */
399
400/*
401 * When regcode is set to this value, code is not emitted and size is computed
402 * instead.
403 */
404#define JUST_CALC_SIZE ((char_u *) -1)
405
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000406static char_u *reg_prev_sub = NULL;
407
Bram Moolenaar071d4272004-06-13 20:20:40 +0000408/*
409 * REGEXP_INRANGE contains all characters which are always special in a []
410 * range after '\'.
411 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
412 * These are:
413 * \n - New line (NL).
414 * \r - Carriage Return (CR).
415 * \t - Tab (TAB).
416 * \e - Escape (ESC).
417 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000418 * \d - Character code in decimal, eg \d123
419 * \o - Character code in octal, eg \o80
420 * \x - Character code in hex, eg \x4a
421 * \u - Multibyte character code, eg \u20ac
422 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000423 */
424static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000425static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100427static int backslash_trans(int c);
428static int get_char_class(char_u **pp);
429static int get_equi_class(char_u **pp);
430static void reg_equi_class(int c);
431static int get_coll_element(char_u **pp);
432static char_u *skip_anyof(char_u *p);
433static void init_class_tab(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434
435/*
436 * Translate '\x' to its control character, except "\n", which is Magic.
437 */
438 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100439backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000440{
441 switch (c)
442 {
443 case 'r': return CAR;
444 case 't': return TAB;
445 case 'e': return ESC;
446 case 'b': return BS;
447 }
448 return c;
449}
450
451/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000452 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
454 * recognized. Otherwise "pp" is advanced to after the item.
455 */
456 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100457get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458{
459 static const char *(class_names[]) =
460 {
461 "alnum:]",
462#define CLASS_ALNUM 0
463 "alpha:]",
464#define CLASS_ALPHA 1
465 "blank:]",
466#define CLASS_BLANK 2
467 "cntrl:]",
468#define CLASS_CNTRL 3
469 "digit:]",
470#define CLASS_DIGIT 4
471 "graph:]",
472#define CLASS_GRAPH 5
473 "lower:]",
474#define CLASS_LOWER 6
475 "print:]",
476#define CLASS_PRINT 7
477 "punct:]",
478#define CLASS_PUNCT 8
479 "space:]",
480#define CLASS_SPACE 9
481 "upper:]",
482#define CLASS_UPPER 10
483 "xdigit:]",
484#define CLASS_XDIGIT 11
485 "tab:]",
486#define CLASS_TAB 12
487 "return:]",
488#define CLASS_RETURN 13
489 "backspace:]",
490#define CLASS_BACKSPACE 14
491 "escape:]",
492#define CLASS_ESCAPE 15
493 };
494#define CLASS_NONE 99
495 int i;
496
497 if ((*pp)[1] == ':')
498 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000499 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000500 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
501 {
502 *pp += STRLEN(class_names[i]) + 2;
503 return i;
504 }
505 }
506 return CLASS_NONE;
507}
508
509/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000510 * Specific version of character class functions.
511 * Using a table to keep this fast.
512 */
513static short class_tab[256];
514
515#define RI_DIGIT 0x01
516#define RI_HEX 0x02
517#define RI_OCTAL 0x04
518#define RI_WORD 0x08
519#define RI_HEAD 0x10
520#define RI_ALPHA 0x20
521#define RI_LOWER 0x40
522#define RI_UPPER 0x80
523#define RI_WHITE 0x100
524
525 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100526init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000527{
528 int i;
529 static int done = FALSE;
530
531 if (done)
532 return;
533
534 for (i = 0; i < 256; ++i)
535 {
536 if (i >= '0' && i <= '7')
537 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
538 else if (i >= '8' && i <= '9')
539 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
540 else if (i >= 'a' && i <= 'f')
541 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
542#ifdef EBCDIC
543 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
544 || (i >= 's' && i <= 'z'))
545#else
546 else if (i >= 'g' && i <= 'z')
547#endif
548 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
549 else if (i >= 'A' && i <= 'F')
550 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
551#ifdef EBCDIC
552 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
553 || (i >= 'S' && i <= 'Z'))
554#else
555 else if (i >= 'G' && i <= 'Z')
556#endif
557 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
558 else if (i == '_')
559 class_tab[i] = RI_WORD + RI_HEAD;
560 else
561 class_tab[i] = 0;
562 }
563 class_tab[' '] |= RI_WHITE;
564 class_tab['\t'] |= RI_WHITE;
565 done = TRUE;
566}
567
568#ifdef FEAT_MBYTE
569# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
570# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
571# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
572# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
573# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
574# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
575# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
576# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
577# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
578#else
579# define ri_digit(c) (class_tab[c] & RI_DIGIT)
580# define ri_hex(c) (class_tab[c] & RI_HEX)
581# define ri_octal(c) (class_tab[c] & RI_OCTAL)
582# define ri_word(c) (class_tab[c] & RI_WORD)
583# define ri_head(c) (class_tab[c] & RI_HEAD)
584# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
585# define ri_lower(c) (class_tab[c] & RI_LOWER)
586# define ri_upper(c) (class_tab[c] & RI_UPPER)
587# define ri_white(c) (class_tab[c] & RI_WHITE)
588#endif
589
590/* flags for regflags */
591#define RF_ICASE 1 /* ignore case */
592#define RF_NOICASE 2 /* don't ignore case */
593#define RF_HASNL 4 /* can match a NL */
594#define RF_ICOMBINE 8 /* ignore combining characters */
595#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
596
597/*
598 * Global work variables for vim_regcomp().
599 */
600
601static char_u *regparse; /* Input-scan pointer. */
602static int prevchr_len; /* byte length of previous char */
603static int num_complex_braces; /* Complex \{...} count */
604static int regnpar; /* () count. */
605#ifdef FEAT_SYN_HL
606static int regnzpar; /* \z() count. */
607static int re_has_z; /* \z item detected */
608#endif
609static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
610static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000611static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000612static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
613static unsigned regflags; /* RF_ flags for prog */
614static long brace_min[10]; /* Minimums for complex brace repeats */
615static long brace_max[10]; /* Maximums for complex brace repeats */
616static int brace_count[10]; /* Current counts for complex brace repeats */
617#if defined(FEAT_SYN_HL) || defined(PROTO)
618static int had_eol; /* TRUE when EOL found by vim_regcomp() */
619#endif
620static int one_exactly = FALSE; /* only do one char for EXACTLY */
621
622static int reg_magic; /* magicness of the pattern: */
623#define MAGIC_NONE 1 /* "\V" very unmagic */
624#define MAGIC_OFF 2 /* "\M" or 'magic' off */
625#define MAGIC_ON 3 /* "\m" or 'magic' */
626#define MAGIC_ALL 4 /* "\v" very magic */
627
628static int reg_string; /* matching with a string instead of a buffer
629 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000630static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000631
632/*
633 * META contains all characters that may be magic, except '^' and '$'.
634 */
635
636#ifdef EBCDIC
637static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
638#else
639/* META[] is used often enough to justify turning it into a table. */
640static char_u META_flags[] = {
641 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
643/* % & ( ) * + . */
644 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
645/* 1 2 3 4 5 6 7 8 9 < = > ? */
646 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
647/* @ A C D F H I K L M O */
648 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
649/* P S U V W X Z [ _ */
650 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
651/* a c d f h i k l m n o */
652 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
653/* p s u v w x z { | ~ */
654 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
655};
656#endif
657
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200658static int curchr; /* currently parsed character */
659/* Previous character. Note: prevchr is sometimes -1 when we are not at the
660 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
661 * because ^ was taken to be magic -- webb */
662static int prevchr;
663static int prevprevchr; /* previous-previous character */
664static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000665
666/* arguments for reg() */
667#define REG_NOPAREN 0 /* toplevel reg() */
668#define REG_PAREN 1 /* \(\) */
669#define REG_ZPAREN 2 /* \z(\) */
670#define REG_NPAREN 3 /* \%(\) */
671
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200672typedef struct
673{
674 char_u *regparse;
675 int prevchr_len;
676 int curchr;
677 int prevchr;
678 int prevprevchr;
679 int nextchr;
680 int at_start;
681 int prev_at_start;
682 int regnpar;
683} parse_state_T;
684
Bram Moolenaar071d4272004-06-13 20:20:40 +0000685/*
686 * Forward declarations for vim_regcomp()'s friends.
687 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100688static void initchr(char_u *);
689static void save_parse_state(parse_state_T *ps);
690static void restore_parse_state(parse_state_T *ps);
691static int getchr(void);
692static void skipchr_keepstart(void);
693static int peekchr(void);
694static void skipchr(void);
695static void ungetchr(void);
696static int gethexchrs(int maxinputlen);
697static int getoctchrs(void);
698static int getdecchrs(void);
699static int coll_get_char(void);
700static void regcomp_start(char_u *expr, int flags);
701static char_u *reg(int, int *);
702static char_u *regbranch(int *flagp);
703static char_u *regconcat(int *flagp);
704static char_u *regpiece(int *);
705static char_u *regatom(int *);
706static char_u *regnode(int);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000707#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100708static int use_multibytecode(int c);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000709#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100710static int prog_magic_wrong(void);
711static char_u *regnext(char_u *);
712static void regc(int b);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000713#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100714static void regmbc(int c);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200715# define REGMBC(x) regmbc(x);
716# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000717#else
718# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200719# define REGMBC(x)
720# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000721#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100722static void reginsert(int, char_u *);
723static void reginsert_nr(int op, long val, char_u *opnd);
724static void reginsert_limits(int, long, long, char_u *);
725static char_u *re_put_long(char_u *pr, long_u val);
726static int read_limits(long *, long *);
727static void regtail(char_u *, char_u *);
728static void regoptail(char_u *, char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000729
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200730static regengine_T bt_regengine;
731static regengine_T nfa_regengine;
732
Bram Moolenaar071d4272004-06-13 20:20:40 +0000733/*
734 * Return TRUE if compiled regular expression "prog" can match a line break.
735 */
736 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100737re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000738{
739 return (prog->regflags & RF_HASNL);
740}
741
742/*
743 * Return TRUE if compiled regular expression "prog" looks before the start
744 * position (pattern contains "\@<=" or "\@<!").
745 */
746 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100747re_lookbehind(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000748{
749 return (prog->regflags & RF_LOOKBH);
750}
751
752/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000753 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
754 * Returns a character representing the class. Zero means that no item was
755 * recognized. Otherwise "pp" is advanced to after the item.
756 */
757 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100758get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000759{
760 int c;
761 int l = 1;
762 char_u *p = *pp;
763
764 if (p[1] == '=')
765 {
766#ifdef FEAT_MBYTE
767 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000768 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000769#endif
770 if (p[l + 2] == '=' && p[l + 3] == ']')
771 {
772#ifdef FEAT_MBYTE
773 if (has_mbyte)
774 c = mb_ptr2char(p + 2);
775 else
776#endif
777 c = p[2];
778 *pp += l + 4;
779 return c;
780 }
781 }
782 return 0;
783}
784
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200785#ifdef EBCDIC
786/*
787 * Table for equivalence class "c". (IBM-1047)
788 */
789char *EQUIVAL_CLASS_C[16] = {
790 "A\x62\x63\x64\x65\x66\x67",
791 "C\x68",
792 "E\x71\x72\x73\x74",
793 "I\x75\x76\x77\x78",
794 "N\x69",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200795 "O\xEB\xEC\xED\xEE\xEF\x80",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200796 "U\xFB\xFC\xFD\xFE",
797 "Y\xBA",
798 "a\x42\x43\x44\x45\x46\x47",
799 "c\x48",
800 "e\x51\x52\x53\x54",
801 "i\x55\x56\x57\x58",
802 "n\x49",
Bram Moolenaar22e42152016-04-03 14:02:02 +0200803 "o\xCB\xCC\xCD\xCE\xCF\x70",
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200804 "u\xDB\xDC\xDD\xDE",
805 "y\x8D\xDF",
806};
807#endif
808
Bram Moolenaardf177f62005-02-22 08:39:57 +0000809/*
810 * Produce the bytes for equivalence class "c".
811 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200812 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000813 */
814 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100815reg_equi_class(int c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000816{
817#ifdef FEAT_MBYTE
818 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000819 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000820#endif
821 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200822#ifdef EBCDIC
823 int i;
824
825 /* This might be slower than switch/case below. */
826 for (i = 0; i < 16; i++)
827 {
828 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
829 {
830 char *p = EQUIVAL_CLASS_C[i];
831
832 while (*p != 0)
833 regmbc(*p++);
834 return;
835 }
836 }
837#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000838 switch (c)
839 {
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200840 /* Do not use '\300' style, it results in a negative number. */
841 case 'A': case 0xc0: case 0xc1: case 0xc2:
842 case 0xc3: case 0xc4: case 0xc5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200843 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
844 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200845 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
846 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
847 regmbc(0xc5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200848 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
849 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
850 REGMBC(0x1ea2)
851 return;
852 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
853 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000854 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200855 case 'C': case 0xc7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200856 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200857 regmbc('C'); regmbc(0xc7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200858 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
859 REGMBC(0x10c)
860 return;
861 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
862 CASEMBC(0x1e0e) CASEMBC(0x1e10)
863 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
864 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000865 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200866 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200867 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
868 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200869 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
870 regmbc(0xca); regmbc(0xcb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200871 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
872 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
873 REGMBC(0x1ebc)
874 return;
875 case 'F': CASEMBC(0x1e1e)
876 regmbc('F'); REGMBC(0x1e1e)
877 return;
878 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
879 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
880 CASEMBC(0x1e20)
881 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
882 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
883 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
884 return;
885 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
886 CASEMBC(0x1e26) CASEMBC(0x1e28)
887 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
888 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000889 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200890 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200891 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
892 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200893 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
894 regmbc(0xce); regmbc(0xcf);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200895 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
896 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
897 REGMBC(0x1ec8)
898 return;
899 case 'J': CASEMBC(0x134)
900 regmbc('J'); REGMBC(0x134)
901 return;
902 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
903 CASEMBC(0x1e34)
904 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
905 REGMBC(0x1e30) REGMBC(0x1e34)
906 return;
907 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
908 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
909 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
910 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
911 REGMBC(0x1e3a)
912 return;
913 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
914 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000915 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200916 case 'N': case 0xd1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200917 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
918 CASEMBC(0x1e48)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200919 regmbc('N'); regmbc(0xd1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200920 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
921 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000922 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200923 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
924 case 0xd6: case 0xd8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200925 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
926 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200927 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
928 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
929 regmbc(0xd8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200930 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
931 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
932 REGMBC(0x1ec) REGMBC(0x1ece)
933 return;
934 case 'P': case 0x1e54: case 0x1e56:
935 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
936 return;
937 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
938 CASEMBC(0x1e58) CASEMBC(0x1e5e)
939 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
940 REGMBC(0x1e58) REGMBC(0x1e5e)
941 return;
942 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
943 CASEMBC(0x160) CASEMBC(0x1e60)
944 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
945 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
946 return;
947 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
948 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
949 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
950 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000951 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200952 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200953 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
954 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
955 CASEMBC(0x1ee6)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200956 regmbc('U'); regmbc(0xd9); regmbc(0xda);
957 regmbc(0xdb); regmbc(0xdc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200958 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
959 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
960 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
961 return;
962 case 'V': CASEMBC(0x1e7c)
963 regmbc('V'); REGMBC(0x1e7c)
964 return;
965 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
966 CASEMBC(0x1e84) CASEMBC(0x1e86)
967 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
968 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
969 return;
970 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
971 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000972 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200973 case 'Y': case 0xdd:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200974 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
975 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200976 regmbc('Y'); regmbc(0xdd);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200977 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
978 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
979 return;
980 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
981 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
982 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
983 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
984 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000985 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200986 case 'a': case 0xe0: case 0xe1: case 0xe2:
987 case 0xe3: case 0xe4: case 0xe5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200988 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
989 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200990 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
991 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
992 regmbc(0xe5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200993 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
994 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
995 REGMBC(0x1ea3)
996 return;
997 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
998 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000999 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001000 case 'c': case 0xe7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001001 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001002 regmbc('c'); regmbc(0xe7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001003 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
1004 REGMBC(0x10d)
1005 return;
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001006 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
1007 CASEMBC(0x1e0f) CASEMBC(0x1e11)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001008 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001009 REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001010 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001011 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001012 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1013 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001014 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
1015 regmbc(0xea); regmbc(0xeb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001016 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1017 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1018 REGMBC(0x1ebd)
1019 return;
1020 case 'f': CASEMBC(0x1e1f)
1021 regmbc('f'); REGMBC(0x1e1f)
1022 return;
1023 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1024 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1025 CASEMBC(0x1e21)
1026 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1027 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1028 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1029 return;
1030 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1031 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1032 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1033 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1034 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001035 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001036 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001037 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1038 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001039 regmbc('i'); regmbc(0xec); regmbc(0xed);
1040 regmbc(0xee); regmbc(0xef);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001041 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1042 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1043 return;
1044 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1045 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1046 return;
1047 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1048 CASEMBC(0x1e35)
1049 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1050 REGMBC(0x1e31) REGMBC(0x1e35)
1051 return;
1052 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1053 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1054 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1055 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1056 REGMBC(0x1e3b)
1057 return;
1058 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1059 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001060 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001061 case 'n': case 0xf1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001062 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1063 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001064 regmbc('n'); regmbc(0xf1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001065 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1066 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001067 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001068 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1069 case 0xf6: case 0xf8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001070 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1071 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001072 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1073 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1074 regmbc(0xf8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001075 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1076 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1077 REGMBC(0x1ed) REGMBC(0x1ecf)
1078 return;
1079 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1080 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1081 return;
1082 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1083 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1084 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1085 REGMBC(0x1e59) REGMBC(0x1e5f)
1086 return;
1087 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1088 CASEMBC(0x161) CASEMBC(0x1e61)
1089 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1090 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1091 return;
1092 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1093 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1094 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1095 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001096 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001097 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001098 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1099 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1100 CASEMBC(0x1ee7)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001101 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1102 regmbc(0xfb); regmbc(0xfc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001103 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1104 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1105 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1106 return;
1107 case 'v': CASEMBC(0x1e7d)
1108 regmbc('v'); REGMBC(0x1e7d)
1109 return;
1110 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1111 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1112 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1113 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1114 REGMBC(0x1e98)
1115 return;
1116 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1117 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001118 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001119 case 'y': case 0xfd: case 0xff:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001120 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1121 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001122 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001123 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1124 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1125 return;
1126 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1127 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1128 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1129 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1130 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001131 return;
1132 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001133#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001134 }
1135 regmbc(c);
1136}
1137
1138/*
1139 * Check for a collating element "[.a.]". "pp" points to the '['.
1140 * Returns a character. Zero means that no item was recognized. Otherwise
1141 * "pp" is advanced to after the item.
1142 * Currently only single characters are recognized!
1143 */
1144 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001145get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001146{
1147 int c;
1148 int l = 1;
1149 char_u *p = *pp;
1150
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001151 if (p[0] != NUL && p[1] == '.')
Bram Moolenaardf177f62005-02-22 08:39:57 +00001152 {
1153#ifdef FEAT_MBYTE
1154 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001155 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001156#endif
1157 if (p[l + 2] == '.' && p[l + 3] == ']')
1158 {
1159#ifdef FEAT_MBYTE
1160 if (has_mbyte)
1161 c = mb_ptr2char(p + 2);
1162 else
1163#endif
1164 c = p[2];
1165 *pp += l + 4;
1166 return c;
1167 }
1168 }
1169 return 0;
1170}
1171
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001172static void get_cpo_flags(void);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001173static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1174static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1175
1176 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001177get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001178{
1179 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1180 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1181}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001182
1183/*
1184 * Skip over a "[]" range.
1185 * "p" must point to the character after the '['.
1186 * The returned pointer is on the matching ']', or the terminating NUL.
1187 */
1188 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001189skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001190{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001191#ifdef FEAT_MBYTE
1192 int l;
1193#endif
1194
Bram Moolenaardf177f62005-02-22 08:39:57 +00001195 if (*p == '^') /* Complement of range. */
1196 ++p;
1197 if (*p == ']' || *p == '-')
1198 ++p;
1199 while (*p != NUL && *p != ']')
1200 {
1201#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001202 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001203 p += l;
1204 else
1205#endif
1206 if (*p == '-')
1207 {
1208 ++p;
1209 if (*p != ']' && *p != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001210 MB_PTR_ADV(p);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001211 }
1212 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001213 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001214 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001215 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001216 p += 2;
1217 else if (*p == '[')
1218 {
1219 if (get_char_class(&p) == CLASS_NONE
1220 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001221 && get_coll_element(&p) == 0
1222 && *p != NUL)
1223 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001224 }
1225 else
1226 ++p;
1227 }
1228
1229 return p;
1230}
1231
1232/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001233 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001234 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001235 * Take care of characters with a backslash in front of it.
1236 * Skip strings inside [ and ].
1237 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1238 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1239 * is changed in-place.
1240 */
1241 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001242skip_regexp(
1243 char_u *startp,
1244 int dirc,
1245 int magic,
1246 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001247{
1248 int mymagic;
1249 char_u *p = startp;
1250
1251 if (magic)
1252 mymagic = MAGIC_ON;
1253 else
1254 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001255 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001256
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001257 for (; p[0] != NUL; MB_PTR_ADV(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001258 {
1259 if (p[0] == dirc) /* found end of regexp */
1260 break;
1261 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1262 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1263 {
1264 p = skip_anyof(p + 1);
1265 if (p[0] == NUL)
1266 break;
1267 }
1268 else if (p[0] == '\\' && p[1] != NUL)
1269 {
1270 if (dirc == '?' && newp != NULL && p[1] == '?')
1271 {
1272 /* change "\?" to "?", make a copy first. */
1273 if (*newp == NULL)
1274 {
1275 *newp = vim_strsave(startp);
1276 if (*newp != NULL)
1277 p = *newp + (p - startp);
1278 }
1279 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001280 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001281 else
1282 ++p;
1283 }
1284 else
1285 ++p; /* skip next character */
1286 if (*p == 'v')
1287 mymagic = MAGIC_ALL;
1288 else if (*p == 'V')
1289 mymagic = MAGIC_NONE;
1290 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001291 }
1292 return p;
1293}
1294
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001295static regprog_T *bt_regcomp(char_u *expr, int re_flags);
1296static void bt_regfree(regprog_T *prog);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001297
Bram Moolenaar071d4272004-06-13 20:20:40 +00001298/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001299 * bt_regcomp() - compile a regular expression into internal code for the
1300 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001301 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001302 *
1303 * We can't allocate space until we know how big the compiled form will be,
1304 * but we can't compile it (and thus know how big it is) until we've got a
1305 * place to put the code. So we cheat: we compile it twice, once with code
1306 * generation turned off and size counting turned on, and once "for real".
1307 * This also means that we don't allocate space until we are sure that the
1308 * thing really will compile successfully, and we never have to move the
1309 * code and thus invalidate pointers into it. (Note that it has to be in
1310 * one piece because vim_free() must be able to free it all.)
1311 *
1312 * Whether upper/lower case is to be ignored is decided when executing the
1313 * program, it does not matter here.
1314 *
1315 * Beware that the optimization-preparation code in here knows about some
1316 * of the structure of the compiled regexp.
1317 * "re_flags": RE_MAGIC and/or RE_STRING.
1318 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001319 static regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001320bt_regcomp(char_u *expr, int re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001321{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001322 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001323 char_u *scan;
1324 char_u *longest;
1325 int len;
1326 int flags;
1327
1328 if (expr == NULL)
1329 EMSG_RET_NULL(_(e_null));
1330
1331 init_class_tab();
1332
1333 /*
1334 * First pass: determine size, legality.
1335 */
1336 regcomp_start(expr, re_flags);
1337 regcode = JUST_CALC_SIZE;
1338 regc(REGMAGIC);
1339 if (reg(REG_NOPAREN, &flags) == NULL)
1340 return NULL;
1341
Bram Moolenaar071d4272004-06-13 20:20:40 +00001342 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001343 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001344 if (r == NULL)
1345 return NULL;
1346
1347 /*
1348 * Second pass: emit code.
1349 */
1350 regcomp_start(expr, re_flags);
1351 regcode = r->program;
1352 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001353 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001354 {
1355 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001356 if (reg_toolong)
1357 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001358 return NULL;
1359 }
1360
1361 /* Dig out information for optimizations. */
1362 r->regstart = NUL; /* Worst-case defaults. */
1363 r->reganch = 0;
1364 r->regmust = NULL;
1365 r->regmlen = 0;
1366 r->regflags = regflags;
1367 if (flags & HASNL)
1368 r->regflags |= RF_HASNL;
1369 if (flags & HASLOOKBH)
1370 r->regflags |= RF_LOOKBH;
1371#ifdef FEAT_SYN_HL
1372 /* Remember whether this pattern has any \z specials in it. */
1373 r->reghasz = re_has_z;
1374#endif
1375 scan = r->program + 1; /* First BRANCH. */
1376 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1377 {
1378 scan = OPERAND(scan);
1379
1380 /* Starting-point info. */
1381 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1382 {
1383 r->reganch++;
1384 scan = regnext(scan);
1385 }
1386
1387 if (OP(scan) == EXACTLY)
1388 {
1389#ifdef FEAT_MBYTE
1390 if (has_mbyte)
1391 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1392 else
1393#endif
1394 r->regstart = *OPERAND(scan);
1395 }
1396 else if ((OP(scan) == BOW
1397 || OP(scan) == EOW
1398 || OP(scan) == NOTHING
1399 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1400 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1401 && OP(regnext(scan)) == EXACTLY)
1402 {
1403#ifdef FEAT_MBYTE
1404 if (has_mbyte)
1405 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1406 else
1407#endif
1408 r->regstart = *OPERAND(regnext(scan));
1409 }
1410
1411 /*
1412 * If there's something expensive in the r.e., find the longest
1413 * literal string that must appear and make it the regmust. Resolve
1414 * ties in favor of later strings, since the regstart check works
1415 * with the beginning of the r.e. and avoiding duplication
1416 * strengthens checking. Not a strong reason, but sufficient in the
1417 * absence of others.
1418 */
1419 /*
1420 * When the r.e. starts with BOW, it is faster to look for a regmust
1421 * first. Used a lot for "#" and "*" commands. (Added by mool).
1422 */
1423 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1424 && !(flags & HASNL))
1425 {
1426 longest = NULL;
1427 len = 0;
1428 for (; scan != NULL; scan = regnext(scan))
1429 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1430 {
1431 longest = OPERAND(scan);
1432 len = (int)STRLEN(OPERAND(scan));
1433 }
1434 r->regmust = longest;
1435 r->regmlen = len;
1436 }
1437 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001438#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001439 regdump(expr, r);
1440#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001441 r->engine = &bt_regengine;
1442 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001443}
1444
1445/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001446 * Free a compiled regexp program, returned by bt_regcomp().
1447 */
1448 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001449bt_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02001450{
1451 vim_free(prog);
1452}
1453
1454/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001455 * Setup to parse the regexp. Used once to get the length and once to do it.
1456 */
1457 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001458regcomp_start(
1459 char_u *expr,
1460 int re_flags) /* see vim_regcomp() */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001461{
1462 initchr(expr);
1463 if (re_flags & RE_MAGIC)
1464 reg_magic = MAGIC_ON;
1465 else
1466 reg_magic = MAGIC_OFF;
1467 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001468 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001469 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001470
1471 num_complex_braces = 0;
1472 regnpar = 1;
1473 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1474#ifdef FEAT_SYN_HL
1475 regnzpar = 1;
1476 re_has_z = 0;
1477#endif
1478 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001479 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001480 regflags = 0;
1481#if defined(FEAT_SYN_HL) || defined(PROTO)
1482 had_eol = FALSE;
1483#endif
1484}
1485
1486#if defined(FEAT_SYN_HL) || defined(PROTO)
1487/*
1488 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1489 * found. This is messy, but it works fine.
1490 */
1491 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001492vim_regcomp_had_eol(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001493{
1494 return had_eol;
1495}
1496#endif
1497
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001498/* variables for parsing reginput */
1499static int at_start; /* True when on the first character */
1500static int prev_at_start; /* True when on the second character */
1501
Bram Moolenaar071d4272004-06-13 20:20:40 +00001502/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001503 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001504 *
1505 * Caller must absorb opening parenthesis.
1506 *
1507 * Combining parenthesis handling with the base level of regular expression
1508 * is a trifle forced, but the need to tie the tails of the branches to what
1509 * follows makes it hard to avoid.
1510 */
1511 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001512reg(
1513 int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1514 int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001515{
1516 char_u *ret;
1517 char_u *br;
1518 char_u *ender;
1519 int parno = 0;
1520 int flags;
1521
1522 *flagp = HASWIDTH; /* Tentatively. */
1523
1524#ifdef FEAT_SYN_HL
1525 if (paren == REG_ZPAREN)
1526 {
1527 /* Make a ZOPEN node. */
1528 if (regnzpar >= NSUBEXP)
1529 EMSG_RET_NULL(_("E50: Too many \\z("));
1530 parno = regnzpar;
1531 regnzpar++;
1532 ret = regnode(ZOPEN + parno);
1533 }
1534 else
1535#endif
1536 if (paren == REG_PAREN)
1537 {
1538 /* Make a MOPEN node. */
1539 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001540 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001541 parno = regnpar;
1542 ++regnpar;
1543 ret = regnode(MOPEN + parno);
1544 }
1545 else if (paren == REG_NPAREN)
1546 {
1547 /* Make a NOPEN node. */
1548 ret = regnode(NOPEN);
1549 }
1550 else
1551 ret = NULL;
1552
1553 /* Pick up the branches, linking them together. */
1554 br = regbranch(&flags);
1555 if (br == NULL)
1556 return NULL;
1557 if (ret != NULL)
1558 regtail(ret, br); /* [MZ]OPEN -> first. */
1559 else
1560 ret = br;
1561 /* If one of the branches can be zero-width, the whole thing can.
1562 * If one of the branches has * at start or matches a line-break, the
1563 * whole thing can. */
1564 if (!(flags & HASWIDTH))
1565 *flagp &= ~HASWIDTH;
1566 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1567 while (peekchr() == Magic('|'))
1568 {
1569 skipchr();
1570 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001571 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001572 return NULL;
1573 regtail(ret, br); /* BRANCH -> BRANCH. */
1574 if (!(flags & HASWIDTH))
1575 *flagp &= ~HASWIDTH;
1576 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1577 }
1578
1579 /* Make a closing node, and hook it on the end. */
1580 ender = regnode(
1581#ifdef FEAT_SYN_HL
1582 paren == REG_ZPAREN ? ZCLOSE + parno :
1583#endif
1584 paren == REG_PAREN ? MCLOSE + parno :
1585 paren == REG_NPAREN ? NCLOSE : END);
1586 regtail(ret, ender);
1587
1588 /* Hook the tails of the branches to the closing node. */
1589 for (br = ret; br != NULL; br = regnext(br))
1590 regoptail(br, ender);
1591
1592 /* Check for proper termination. */
1593 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1594 {
1595#ifdef FEAT_SYN_HL
1596 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001597 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001598 else
1599#endif
1600 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001601 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001602 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001603 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001604 }
1605 else if (paren == REG_NOPAREN && peekchr() != NUL)
1606 {
1607 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001608 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001610 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001611 /* NOTREACHED */
1612 }
1613 /*
1614 * Here we set the flag allowing back references to this set of
1615 * parentheses.
1616 */
1617 if (paren == REG_PAREN)
1618 had_endbrace[parno] = TRUE; /* have seen the close paren */
1619 return ret;
1620}
1621
1622/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001623 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001624 * Implements the & operator.
1625 */
1626 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001627regbranch(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001628{
1629 char_u *ret;
1630 char_u *chain = NULL;
1631 char_u *latest;
1632 int flags;
1633
1634 *flagp = WORST | HASNL; /* Tentatively. */
1635
1636 ret = regnode(BRANCH);
1637 for (;;)
1638 {
1639 latest = regconcat(&flags);
1640 if (latest == NULL)
1641 return NULL;
1642 /* If one of the branches has width, the whole thing has. If one of
1643 * the branches anchors at start-of-line, the whole thing does.
1644 * If one of the branches uses look-behind, the whole thing does. */
1645 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1646 /* If one of the branches doesn't match a line-break, the whole thing
1647 * doesn't. */
1648 *flagp &= ~HASNL | (flags & HASNL);
1649 if (chain != NULL)
1650 regtail(chain, latest);
1651 if (peekchr() != Magic('&'))
1652 break;
1653 skipchr();
1654 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001655 if (reg_toolong)
1656 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001657 reginsert(MATCH, latest);
1658 chain = latest;
1659 }
1660
1661 return ret;
1662}
1663
1664/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001665 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001666 * Implements the concatenation operator.
1667 */
1668 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001669regconcat(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001670{
1671 char_u *first = NULL;
1672 char_u *chain = NULL;
1673 char_u *latest;
1674 int flags;
1675 int cont = TRUE;
1676
1677 *flagp = WORST; /* Tentatively. */
1678
1679 while (cont)
1680 {
1681 switch (peekchr())
1682 {
1683 case NUL:
1684 case Magic('|'):
1685 case Magic('&'):
1686 case Magic(')'):
1687 cont = FALSE;
1688 break;
1689 case Magic('Z'):
1690#ifdef FEAT_MBYTE
1691 regflags |= RF_ICOMBINE;
1692#endif
1693 skipchr_keepstart();
1694 break;
1695 case Magic('c'):
1696 regflags |= RF_ICASE;
1697 skipchr_keepstart();
1698 break;
1699 case Magic('C'):
1700 regflags |= RF_NOICASE;
1701 skipchr_keepstart();
1702 break;
1703 case Magic('v'):
1704 reg_magic = MAGIC_ALL;
1705 skipchr_keepstart();
1706 curchr = -1;
1707 break;
1708 case Magic('m'):
1709 reg_magic = MAGIC_ON;
1710 skipchr_keepstart();
1711 curchr = -1;
1712 break;
1713 case Magic('M'):
1714 reg_magic = MAGIC_OFF;
1715 skipchr_keepstart();
1716 curchr = -1;
1717 break;
1718 case Magic('V'):
1719 reg_magic = MAGIC_NONE;
1720 skipchr_keepstart();
1721 curchr = -1;
1722 break;
1723 default:
1724 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001725 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001726 return NULL;
1727 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1728 if (chain == NULL) /* First piece. */
1729 *flagp |= flags & SPSTART;
1730 else
1731 regtail(chain, latest);
1732 chain = latest;
1733 if (first == NULL)
1734 first = latest;
1735 break;
1736 }
1737 }
1738 if (first == NULL) /* Loop ran zero times. */
1739 first = regnode(NOTHING);
1740 return first;
1741}
1742
1743/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001744 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001745 *
1746 * Note that the branching code sequences used for = and the general cases
1747 * of * and + are somewhat optimized: they use the same NOTHING node as
1748 * both the endmarker for their branch list and the body of the last branch.
1749 * It might seem that this node could be dispensed with entirely, but the
1750 * endmarker role is not redundant.
1751 */
1752 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001753regpiece(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001754{
1755 char_u *ret;
1756 int op;
1757 char_u *next;
1758 int flags;
1759 long minval;
1760 long maxval;
1761
1762 ret = regatom(&flags);
1763 if (ret == NULL)
1764 return NULL;
1765
1766 op = peekchr();
1767 if (re_multi_type(op) == NOT_MULTI)
1768 {
1769 *flagp = flags;
1770 return ret;
1771 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001772 /* default flags */
1773 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1774
1775 skipchr();
1776 switch (op)
1777 {
1778 case Magic('*'):
1779 if (flags & SIMPLE)
1780 reginsert(STAR, ret);
1781 else
1782 {
1783 /* Emit x* as (x&|), where & means "self". */
1784 reginsert(BRANCH, ret); /* Either x */
1785 regoptail(ret, regnode(BACK)); /* and loop */
1786 regoptail(ret, ret); /* back */
1787 regtail(ret, regnode(BRANCH)); /* or */
1788 regtail(ret, regnode(NOTHING)); /* null. */
1789 }
1790 break;
1791
1792 case Magic('+'):
1793 if (flags & SIMPLE)
1794 reginsert(PLUS, ret);
1795 else
1796 {
1797 /* Emit x+ as x(&|), where & means "self". */
1798 next = regnode(BRANCH); /* Either */
1799 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001800 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001801 regtail(next, regnode(BRANCH)); /* or */
1802 regtail(ret, regnode(NOTHING)); /* null. */
1803 }
1804 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1805 break;
1806
1807 case Magic('@'):
1808 {
1809 int lop = END;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001810 int nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001811
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001812 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001813 switch (no_Magic(getchr()))
1814 {
1815 case '=': lop = MATCH; break; /* \@= */
1816 case '!': lop = NOMATCH; break; /* \@! */
1817 case '>': lop = SUBPAT; break; /* \@> */
1818 case '<': switch (no_Magic(getchr()))
1819 {
1820 case '=': lop = BEHIND; break; /* \@<= */
1821 case '!': lop = NOBEHIND; break; /* \@<! */
1822 }
1823 }
1824 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001825 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001826 reg_magic == MAGIC_ALL);
1827 /* Look behind must match with behind_pos. */
1828 if (lop == BEHIND || lop == NOBEHIND)
1829 {
1830 regtail(ret, regnode(BHPOS));
1831 *flagp |= HASLOOKBH;
1832 }
1833 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001834 if (lop == BEHIND || lop == NOBEHIND)
1835 {
1836 if (nr < 0)
1837 nr = 0; /* no limit is same as zero limit */
1838 reginsert_nr(lop, nr, ret);
1839 }
1840 else
1841 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001842 break;
1843 }
1844
1845 case Magic('?'):
1846 case Magic('='):
1847 /* Emit x= as (x|) */
1848 reginsert(BRANCH, ret); /* Either x */
1849 regtail(ret, regnode(BRANCH)); /* or */
1850 next = regnode(NOTHING); /* null. */
1851 regtail(ret, next);
1852 regoptail(ret, next);
1853 break;
1854
1855 case Magic('{'):
1856 if (!read_limits(&minval, &maxval))
1857 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001858 if (flags & SIMPLE)
1859 {
1860 reginsert(BRACE_SIMPLE, ret);
1861 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1862 }
1863 else
1864 {
1865 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001866 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001867 reg_magic == MAGIC_ALL);
1868 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1869 regoptail(ret, regnode(BACK));
1870 regoptail(ret, ret);
1871 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1872 ++num_complex_braces;
1873 }
1874 if (minval > 0 && maxval > 0)
1875 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1876 break;
1877 }
1878 if (re_multi_type(peekchr()) != NOT_MULTI)
1879 {
1880 /* Can't have a multi follow a multi. */
1881 if (peekchr() == Magic('*'))
1882 sprintf((char *)IObuff, _("E61: Nested %s*"),
1883 reg_magic >= MAGIC_ON ? "" : "\\");
1884 else
1885 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1886 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1887 EMSG_RET_NULL(IObuff);
1888 }
1889
1890 return ret;
1891}
1892
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001893/* When making changes to classchars also change nfa_classcodes. */
1894static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1895static int classcodes[] = {
1896 ANY, IDENT, SIDENT, KWORD, SKWORD,
1897 FNAME, SFNAME, PRINT, SPRINT,
1898 WHITE, NWHITE, DIGIT, NDIGIT,
1899 HEX, NHEX, OCTAL, NOCTAL,
1900 WORD, NWORD, HEAD, NHEAD,
1901 ALPHA, NALPHA, LOWER, NLOWER,
1902 UPPER, NUPPER
1903};
1904
Bram Moolenaar071d4272004-06-13 20:20:40 +00001905/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001906 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001907 *
1908 * Optimization: gobbles an entire sequence of ordinary characters so that
1909 * it can turn them into a single node, which is smaller to store and
1910 * faster to run. Don't do this when one_exactly is set.
1911 */
1912 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001913regatom(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001914{
1915 char_u *ret;
1916 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001917 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001918 char_u *p;
1919 int extra = 0;
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001920 int save_prev_at_start = prev_at_start;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001921
1922 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923
1924 c = getchr();
1925 switch (c)
1926 {
1927 case Magic('^'):
1928 ret = regnode(BOL);
1929 break;
1930
1931 case Magic('$'):
1932 ret = regnode(EOL);
1933#if defined(FEAT_SYN_HL) || defined(PROTO)
1934 had_eol = TRUE;
1935#endif
1936 break;
1937
1938 case Magic('<'):
1939 ret = regnode(BOW);
1940 break;
1941
1942 case Magic('>'):
1943 ret = regnode(EOW);
1944 break;
1945
1946 case Magic('_'):
1947 c = no_Magic(getchr());
1948 if (c == '^') /* "\_^" is start-of-line */
1949 {
1950 ret = regnode(BOL);
1951 break;
1952 }
1953 if (c == '$') /* "\_$" is end-of-line */
1954 {
1955 ret = regnode(EOL);
1956#if defined(FEAT_SYN_HL) || defined(PROTO)
1957 had_eol = TRUE;
1958#endif
1959 break;
1960 }
1961
1962 extra = ADD_NL;
1963 *flagp |= HASNL;
1964
1965 /* "\_[" is character range plus newline */
1966 if (c == '[')
1967 goto collection;
1968
1969 /* "\_x" is character class plus newline */
1970 /*FALLTHROUGH*/
1971
1972 /*
1973 * Character classes.
1974 */
1975 case Magic('.'):
1976 case Magic('i'):
1977 case Magic('I'):
1978 case Magic('k'):
1979 case Magic('K'):
1980 case Magic('f'):
1981 case Magic('F'):
1982 case Magic('p'):
1983 case Magic('P'):
1984 case Magic('s'):
1985 case Magic('S'):
1986 case Magic('d'):
1987 case Magic('D'):
1988 case Magic('x'):
1989 case Magic('X'):
1990 case Magic('o'):
1991 case Magic('O'):
1992 case Magic('w'):
1993 case Magic('W'):
1994 case Magic('h'):
1995 case Magic('H'):
1996 case Magic('a'):
1997 case Magic('A'):
1998 case Magic('l'):
1999 case Magic('L'):
2000 case Magic('u'):
2001 case Magic('U'):
2002 p = vim_strchr(classchars, no_Magic(c));
2003 if (p == NULL)
2004 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002005#ifdef FEAT_MBYTE
2006 /* When '.' is followed by a composing char ignore the dot, so that
2007 * the composing char is matched here. */
2008 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2009 {
2010 c = getchr();
2011 goto do_multibyte;
2012 }
2013#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002014 ret = regnode(classcodes[p - classchars] + extra);
2015 *flagp |= HASWIDTH | SIMPLE;
2016 break;
2017
2018 case Magic('n'):
2019 if (reg_string)
2020 {
2021 /* In a string "\n" matches a newline character. */
2022 ret = regnode(EXACTLY);
2023 regc(NL);
2024 regc(NUL);
2025 *flagp |= HASWIDTH | SIMPLE;
2026 }
2027 else
2028 {
2029 /* In buffer text "\n" matches the end of a line. */
2030 ret = regnode(NEWL);
2031 *flagp |= HASWIDTH | HASNL;
2032 }
2033 break;
2034
2035 case Magic('('):
2036 if (one_exactly)
2037 EMSG_ONE_RET_NULL;
2038 ret = reg(REG_PAREN, &flags);
2039 if (ret == NULL)
2040 return NULL;
2041 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2042 break;
2043
2044 case NUL:
2045 case Magic('|'):
2046 case Magic('&'):
2047 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002048 if (one_exactly)
2049 EMSG_ONE_RET_NULL;
Bram Moolenaar95f09602016-11-10 20:01:45 +01002050 IEMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002051 /* NOTREACHED */
2052
2053 case Magic('='):
2054 case Magic('?'):
2055 case Magic('+'):
2056 case Magic('@'):
2057 case Magic('{'):
2058 case Magic('*'):
2059 c = no_Magic(c);
2060 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2061 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2062 ? "" : "\\", c);
2063 EMSG_RET_NULL(IObuff);
2064 /* NOTREACHED */
2065
2066 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002067 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002068 {
2069 char_u *lp;
2070
2071 ret = regnode(EXACTLY);
2072 lp = reg_prev_sub;
2073 while (*lp != NUL)
2074 regc(*lp++);
2075 regc(NUL);
2076 if (*reg_prev_sub != NUL)
2077 {
2078 *flagp |= HASWIDTH;
2079 if ((lp - reg_prev_sub) == 1)
2080 *flagp |= SIMPLE;
2081 }
2082 }
2083 else
2084 EMSG_RET_NULL(_(e_nopresub));
2085 break;
2086
2087 case Magic('1'):
2088 case Magic('2'):
2089 case Magic('3'):
2090 case Magic('4'):
2091 case Magic('5'):
2092 case Magic('6'):
2093 case Magic('7'):
2094 case Magic('8'):
2095 case Magic('9'):
2096 {
2097 int refnum;
2098
2099 refnum = c - Magic('0');
2100 /*
2101 * Check if the back reference is legal. We must have seen the
2102 * close brace.
2103 * TODO: Should also check that we don't refer to something
2104 * that is repeated (+*=): what instance of the repetition
2105 * should we match?
2106 */
2107 if (!had_endbrace[refnum])
2108 {
2109 /* Trick: check if "@<=" or "@<!" follows, in which case
2110 * the \1 can appear before the referenced match. */
2111 for (p = regparse; *p != NUL; ++p)
2112 if (p[0] == '@' && p[1] == '<'
2113 && (p[2] == '!' || p[2] == '='))
2114 break;
2115 if (*p == NUL)
2116 EMSG_RET_NULL(_("E65: Illegal back reference"));
2117 }
2118 ret = regnode(BACKREF + refnum);
2119 }
2120 break;
2121
Bram Moolenaar071d4272004-06-13 20:20:40 +00002122 case Magic('z'):
2123 {
2124 c = no_Magic(getchr());
2125 switch (c)
2126 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002127#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002128 case '(': if (reg_do_extmatch != REX_SET)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002129 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002130 if (one_exactly)
2131 EMSG_ONE_RET_NULL;
2132 ret = reg(REG_ZPAREN, &flags);
2133 if (ret == NULL)
2134 return NULL;
2135 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2136 re_has_z = REX_SET;
2137 break;
2138
2139 case '1':
2140 case '2':
2141 case '3':
2142 case '4':
2143 case '5':
2144 case '6':
2145 case '7':
2146 case '8':
2147 case '9': if (reg_do_extmatch != REX_USE)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002148 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002149 ret = regnode(ZREF + c - '0');
2150 re_has_z = REX_USE;
2151 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002152#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002153
2154 case 's': ret = regnode(MOPEN + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002155 if (re_mult_next("\\zs") == FAIL)
2156 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002157 break;
2158
2159 case 'e': ret = regnode(MCLOSE + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002160 if (re_mult_next("\\ze") == FAIL)
2161 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002162 break;
2163
2164 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2165 }
2166 }
2167 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002168
2169 case Magic('%'):
2170 {
2171 c = no_Magic(getchr());
2172 switch (c)
2173 {
2174 /* () without a back reference */
2175 case '(':
2176 if (one_exactly)
2177 EMSG_ONE_RET_NULL;
2178 ret = reg(REG_NPAREN, &flags);
2179 if (ret == NULL)
2180 return NULL;
2181 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2182 break;
2183
2184 /* Catch \%^ and \%$ regardless of where they appear in the
2185 * pattern -- regardless of whether or not it makes sense. */
2186 case '^':
2187 ret = regnode(RE_BOF);
2188 break;
2189
2190 case '$':
2191 ret = regnode(RE_EOF);
2192 break;
2193
2194 case '#':
2195 ret = regnode(CURSOR);
2196 break;
2197
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002198 case 'V':
2199 ret = regnode(RE_VISUAL);
2200 break;
2201
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002202 case 'C':
2203 ret = regnode(RE_COMPOSING);
2204 break;
2205
Bram Moolenaar071d4272004-06-13 20:20:40 +00002206 /* \%[abc]: Emit as a list of branches, all ending at the last
2207 * branch which matches nothing. */
2208 case '[':
2209 if (one_exactly) /* doesn't nest */
2210 EMSG_ONE_RET_NULL;
2211 {
2212 char_u *lastbranch;
2213 char_u *lastnode = NULL;
2214 char_u *br;
2215
2216 ret = NULL;
2217 while ((c = getchr()) != ']')
2218 {
2219 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002220 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002221 reg_magic == MAGIC_ALL);
2222 br = regnode(BRANCH);
2223 if (ret == NULL)
2224 ret = br;
2225 else
2226 regtail(lastnode, br);
2227
2228 ungetchr();
2229 one_exactly = TRUE;
2230 lastnode = regatom(flagp);
2231 one_exactly = FALSE;
2232 if (lastnode == NULL)
2233 return NULL;
2234 }
2235 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002236 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002237 reg_magic == MAGIC_ALL);
2238 lastbranch = regnode(BRANCH);
2239 br = regnode(NOTHING);
2240 if (ret != JUST_CALC_SIZE)
2241 {
2242 regtail(lastnode, br);
2243 regtail(lastbranch, br);
2244 /* connect all branches to the NOTHING
2245 * branch at the end */
2246 for (br = ret; br != lastnode; )
2247 {
2248 if (OP(br) == BRANCH)
2249 {
2250 regtail(br, lastbranch);
2251 br = OPERAND(br);
2252 }
2253 else
2254 br = regnext(br);
2255 }
2256 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002257 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002258 break;
2259 }
2260
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002261 case 'd': /* %d123 decimal */
2262 case 'o': /* %o123 octal */
2263 case 'x': /* %xab hex 2 */
2264 case 'u': /* %uabcd hex 4 */
2265 case 'U': /* %U1234abcd hex 8 */
2266 {
2267 int i;
2268
2269 switch (c)
2270 {
2271 case 'd': i = getdecchrs(); break;
2272 case 'o': i = getoctchrs(); break;
2273 case 'x': i = gethexchrs(2); break;
2274 case 'u': i = gethexchrs(4); break;
2275 case 'U': i = gethexchrs(8); break;
2276 default: i = -1; break;
2277 }
2278
2279 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002280 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002281 _("E678: Invalid character after %s%%[dxouU]"),
2282 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002283#ifdef FEAT_MBYTE
2284 if (use_multibytecode(i))
2285 ret = regnode(MULTIBYTECODE);
2286 else
2287#endif
2288 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002289 if (i == 0)
2290 regc(0x0a);
2291 else
2292#ifdef FEAT_MBYTE
2293 regmbc(i);
2294#else
2295 regc(i);
2296#endif
2297 regc(NUL);
2298 *flagp |= HASWIDTH;
2299 break;
2300 }
2301
Bram Moolenaar071d4272004-06-13 20:20:40 +00002302 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002303 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2304 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002305 {
2306 long_u n = 0;
2307 int cmp;
2308
2309 cmp = c;
2310 if (cmp == '<' || cmp == '>')
2311 c = getchr();
2312 while (VIM_ISDIGIT(c))
2313 {
2314 n = n * 10 + (c - '0');
2315 c = getchr();
2316 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002317 if (c == '\'' && n == 0)
2318 {
2319 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2320 c = getchr();
2321 ret = regnode(RE_MARK);
2322 if (ret == JUST_CALC_SIZE)
2323 regsize += 2;
2324 else
2325 {
2326 *regcode++ = c;
2327 *regcode++ = cmp;
2328 }
2329 break;
2330 }
2331 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002332 {
2333 if (c == 'l')
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002334 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002335 ret = regnode(RE_LNUM);
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002336 if (save_prev_at_start)
2337 at_start = TRUE;
2338 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002339 else if (c == 'c')
2340 ret = regnode(RE_COL);
2341 else
2342 ret = regnode(RE_VCOL);
2343 if (ret == JUST_CALC_SIZE)
2344 regsize += 5;
2345 else
2346 {
2347 /* put the number and the optional
2348 * comparator after the opcode */
2349 regcode = re_put_long(regcode, n);
2350 *regcode++ = cmp;
2351 }
2352 break;
2353 }
2354 }
2355
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002356 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002357 reg_magic == MAGIC_ALL);
2358 }
2359 }
2360 break;
2361
2362 case Magic('['):
2363collection:
2364 {
2365 char_u *lp;
2366
2367 /*
2368 * If there is no matching ']', we assume the '[' is a normal
2369 * character. This makes 'incsearch' and ":help [" work.
2370 */
2371 lp = skip_anyof(regparse);
2372 if (*lp == ']') /* there is a matching ']' */
2373 {
2374 int startc = -1; /* > 0 when next '-' is a range */
2375 int endc;
2376
2377 /*
2378 * In a character class, different parsing rules apply.
2379 * Not even \ is special anymore, nothing is.
2380 */
2381 if (*regparse == '^') /* Complement of range. */
2382 {
2383 ret = regnode(ANYBUT + extra);
2384 regparse++;
2385 }
2386 else
2387 ret = regnode(ANYOF + extra);
2388
2389 /* At the start ']' and '-' mean the literal character. */
2390 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002391 {
2392 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002393 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002394 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002395
2396 while (*regparse != NUL && *regparse != ']')
2397 {
2398 if (*regparse == '-')
2399 {
2400 ++regparse;
2401 /* The '-' is not used for a range at the end and
2402 * after or before a '\n'. */
2403 if (*regparse == ']' || *regparse == NUL
2404 || startc == -1
2405 || (regparse[0] == '\\' && regparse[1] == 'n'))
2406 {
2407 regc('-');
2408 startc = '-'; /* [--x] is a range */
2409 }
2410 else
2411 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002412 /* Also accept "a-[.z.]" */
2413 endc = 0;
2414 if (*regparse == '[')
2415 endc = get_coll_element(&regparse);
2416 if (endc == 0)
2417 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002418#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002419 if (has_mbyte)
2420 endc = mb_ptr2char_adv(&regparse);
2421 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002422#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002423 endc = *regparse++;
2424 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002425
2426 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002427 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002428 endc = coll_get_char();
2429
Bram Moolenaar071d4272004-06-13 20:20:40 +00002430 if (startc > endc)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002431 EMSG_RET_NULL(_(e_reverse_range));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002432#ifdef FEAT_MBYTE
2433 if (has_mbyte && ((*mb_char2len)(startc) > 1
2434 || (*mb_char2len)(endc) > 1))
2435 {
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002436 /* Limit to a range of 256 chars. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002437 if (endc > startc + 256)
Bram Moolenaar966e58e2017-06-05 16:54:08 +02002438 EMSG_RET_NULL(_(e_large_class));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002439 while (++startc <= endc)
2440 regmbc(startc);
2441 }
2442 else
2443#endif
2444 {
2445#ifdef EBCDIC
2446 int alpha_only = FALSE;
2447
2448 /* for alphabetical range skip the gaps
2449 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2450 if (isalpha(startc) && isalpha(endc))
2451 alpha_only = TRUE;
2452#endif
2453 while (++startc <= endc)
2454#ifdef EBCDIC
2455 if (!alpha_only || isalpha(startc))
2456#endif
2457 regc(startc);
2458 }
2459 startc = -1;
2460 }
2461 }
2462 /*
2463 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2464 * accepts "\t", "\e", etc., but only when the 'l' flag in
2465 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002466 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002467 */
2468 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002469 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002470 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002471 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002472 && vim_strchr(REGEXP_ABBR,
2473 regparse[1]) != NULL)))
2474 {
2475 regparse++;
2476 if (*regparse == 'n')
2477 {
2478 /* '\n' in range: also match NL */
2479 if (ret != JUST_CALC_SIZE)
2480 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002481 /* Using \n inside [^] does not change what
2482 * matches. "[^\n]" is the same as ".". */
2483 if (*ret == ANYOF)
2484 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002485 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002486 *flagp |= HASNL;
2487 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002488 /* else: must have had a \n already */
2489 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002490 regparse++;
2491 startc = -1;
2492 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002493 else if (*regparse == 'd'
2494 || *regparse == 'o'
2495 || *regparse == 'x'
2496 || *regparse == 'u'
2497 || *regparse == 'U')
2498 {
2499 startc = coll_get_char();
2500 if (startc == 0)
2501 regc(0x0a);
2502 else
2503#ifdef FEAT_MBYTE
2504 regmbc(startc);
2505#else
2506 regc(startc);
2507#endif
2508 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002509 else
2510 {
2511 startc = backslash_trans(*regparse++);
2512 regc(startc);
2513 }
2514 }
2515 else if (*regparse == '[')
2516 {
2517 int c_class;
2518 int cu;
2519
Bram Moolenaardf177f62005-02-22 08:39:57 +00002520 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002521 startc = -1;
2522 /* Characters assumed to be 8 bits! */
2523 switch (c_class)
2524 {
2525 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002526 c_class = get_equi_class(&regparse);
2527 if (c_class != 0)
2528 {
2529 /* produce equivalence class */
2530 reg_equi_class(c_class);
2531 }
2532 else if ((c_class =
2533 get_coll_element(&regparse)) != 0)
2534 {
2535 /* produce a collating element */
2536 regmbc(c_class);
2537 }
2538 else
2539 {
2540 /* literal '[', allow [[-x] as a range */
2541 startc = *regparse++;
2542 regc(startc);
2543 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002544 break;
2545 case CLASS_ALNUM:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002546 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002547 if (isalnum(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002548 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002549 break;
2550 case CLASS_ALPHA:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002551 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002552 if (isalpha(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002553 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002554 break;
2555 case CLASS_BLANK:
2556 regc(' ');
2557 regc('\t');
2558 break;
2559 case CLASS_CNTRL:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002560 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002561 if (iscntrl(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002562 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002563 break;
2564 case CLASS_DIGIT:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002565 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002566 if (VIM_ISDIGIT(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002567 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002568 break;
2569 case CLASS_GRAPH:
Bram Moolenaar0c078fc2017-03-29 15:31:20 +02002570 for (cu = 1; cu <= 127; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002571 if (isgraph(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002572 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002573 break;
2574 case CLASS_LOWER:
2575 for (cu = 1; cu <= 255; cu++)
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002576 if (MB_ISLOWER(cu) && cu != 170
2577 && cu != 186)
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002578 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002579 break;
2580 case CLASS_PRINT:
2581 for (cu = 1; cu <= 255; cu++)
2582 if (vim_isprintc(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002583 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002584 break;
2585 case CLASS_PUNCT:
Bram Moolenaare8aee7d2016-04-26 21:39:13 +02002586 for (cu = 1; cu < 128; cu++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002587 if (ispunct(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002588 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002589 break;
2590 case CLASS_SPACE:
2591 for (cu = 9; cu <= 13; cu++)
2592 regc(cu);
2593 regc(' ');
2594 break;
2595 case CLASS_UPPER:
2596 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002597 if (MB_ISUPPER(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002598 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002599 break;
2600 case CLASS_XDIGIT:
2601 for (cu = 1; cu <= 255; cu++)
2602 if (vim_isxdigit(cu))
Bram Moolenaaraf98a492016-04-24 14:40:12 +02002603 regmbc(cu);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002604 break;
2605 case CLASS_TAB:
2606 regc('\t');
2607 break;
2608 case CLASS_RETURN:
2609 regc('\r');
2610 break;
2611 case CLASS_BACKSPACE:
2612 regc('\b');
2613 break;
2614 case CLASS_ESCAPE:
2615 regc('\033');
2616 break;
2617 }
2618 }
2619 else
2620 {
2621#ifdef FEAT_MBYTE
2622 if (has_mbyte)
2623 {
2624 int len;
2625
2626 /* produce a multibyte character, including any
2627 * following composing characters */
2628 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002629 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002630 if (enc_utf8 && utf_char2len(startc) != len)
2631 startc = -1; /* composing chars */
2632 while (--len >= 0)
2633 regc(*regparse++);
2634 }
2635 else
2636#endif
2637 {
2638 startc = *regparse++;
2639 regc(startc);
2640 }
2641 }
2642 }
2643 regc(NUL);
2644 prevchr_len = 1; /* last char was the ']' */
2645 if (*regparse != ']')
2646 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2647 skipchr(); /* let's be friends with the lexer again */
2648 *flagp |= HASWIDTH | SIMPLE;
2649 break;
2650 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002651 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002652 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002653 }
2654 /* FALLTHROUGH */
2655
2656 default:
2657 {
2658 int len;
2659
2660#ifdef FEAT_MBYTE
2661 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002662 * before a multi and when it's a composing char. */
2663 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002664 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002665do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002666 ret = regnode(MULTIBYTECODE);
2667 regmbc(c);
2668 *flagp |= HASWIDTH | SIMPLE;
2669 break;
2670 }
2671#endif
2672
2673 ret = regnode(EXACTLY);
2674
2675 /*
2676 * Append characters as long as:
2677 * - there is no following multi, we then need the character in
2678 * front of it as a single character operand
2679 * - not running into a Magic character
2680 * - "one_exactly" is not set
2681 * But always emit at least one character. Might be a Multi,
2682 * e.g., a "[" without matching "]".
2683 */
2684 for (len = 0; c != NUL && (len == 0
2685 || (re_multi_type(peekchr()) == NOT_MULTI
2686 && !one_exactly
2687 && !is_Magic(c))); ++len)
2688 {
2689 c = no_Magic(c);
2690#ifdef FEAT_MBYTE
2691 if (has_mbyte)
2692 {
2693 regmbc(c);
2694 if (enc_utf8)
2695 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002696 int l;
2697
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002698 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002699 for (;;)
2700 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002701 l = utf_ptr2len(regparse);
2702 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002703 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002704 regmbc(utf_ptr2char(regparse));
2705 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002706 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002707 }
2708 }
2709 else
2710#endif
2711 regc(c);
2712 c = getchr();
2713 }
2714 ungetchr();
2715
2716 regc(NUL);
2717 *flagp |= HASWIDTH;
2718 if (len == 1)
2719 *flagp |= SIMPLE;
2720 }
2721 break;
2722 }
2723
2724 return ret;
2725}
2726
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002727#ifdef FEAT_MBYTE
2728/*
2729 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2730 * character "c".
2731 */
2732 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002733use_multibytecode(int c)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002734{
2735 return has_mbyte && (*mb_char2len)(c) > 1
2736 && (re_multi_type(peekchr()) != NOT_MULTI
2737 || (enc_utf8 && utf_iscomposing(c)));
2738}
2739#endif
2740
Bram Moolenaar071d4272004-06-13 20:20:40 +00002741/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002742 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002743 * Return pointer to generated code.
2744 */
2745 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002746regnode(int op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002747{
2748 char_u *ret;
2749
2750 ret = regcode;
2751 if (ret == JUST_CALC_SIZE)
2752 regsize += 3;
2753 else
2754 {
2755 *regcode++ = op;
2756 *regcode++ = NUL; /* Null "next" pointer. */
2757 *regcode++ = NUL;
2758 }
2759 return ret;
2760}
2761
2762/*
2763 * Emit (if appropriate) a byte of code
2764 */
2765 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002766regc(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002767{
2768 if (regcode == JUST_CALC_SIZE)
2769 regsize++;
2770 else
2771 *regcode++ = b;
2772}
2773
2774#ifdef FEAT_MBYTE
2775/*
2776 * Emit (if appropriate) a multi-byte character of code
2777 */
2778 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002779regmbc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002780{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002781 if (!has_mbyte && c > 0xff)
2782 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002783 if (regcode == JUST_CALC_SIZE)
2784 regsize += (*mb_char2len)(c);
2785 else
2786 regcode += (*mb_char2bytes)(c, regcode);
2787}
2788#endif
2789
2790/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002791 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002792 *
2793 * Means relocating the operand.
2794 */
2795 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002796reginsert(int op, char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002797{
2798 char_u *src;
2799 char_u *dst;
2800 char_u *place;
2801
2802 if (regcode == JUST_CALC_SIZE)
2803 {
2804 regsize += 3;
2805 return;
2806 }
2807 src = regcode;
2808 regcode += 3;
2809 dst = regcode;
2810 while (src > opnd)
2811 *--dst = *--src;
2812
2813 place = opnd; /* Op node, where operand used to be. */
2814 *place++ = op;
2815 *place++ = NUL;
2816 *place = NUL;
2817}
2818
2819/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002820 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002821 * Add a number to the operator.
2822 */
2823 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002824reginsert_nr(int op, long val, char_u *opnd)
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002825{
2826 char_u *src;
2827 char_u *dst;
2828 char_u *place;
2829
2830 if (regcode == JUST_CALC_SIZE)
2831 {
2832 regsize += 7;
2833 return;
2834 }
2835 src = regcode;
2836 regcode += 7;
2837 dst = regcode;
2838 while (src > opnd)
2839 *--dst = *--src;
2840
2841 place = opnd; /* Op node, where operand used to be. */
2842 *place++ = op;
2843 *place++ = NUL;
2844 *place++ = NUL;
2845 place = re_put_long(place, (long_u)val);
2846}
2847
2848/*
2849 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002850 * The operator has the given limit values as operands. Also set next pointer.
2851 *
2852 * Means relocating the operand.
2853 */
2854 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002855reginsert_limits(
2856 int op,
2857 long minval,
2858 long maxval,
2859 char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002860{
2861 char_u *src;
2862 char_u *dst;
2863 char_u *place;
2864
2865 if (regcode == JUST_CALC_SIZE)
2866 {
2867 regsize += 11;
2868 return;
2869 }
2870 src = regcode;
2871 regcode += 11;
2872 dst = regcode;
2873 while (src > opnd)
2874 *--dst = *--src;
2875
2876 place = opnd; /* Op node, where operand used to be. */
2877 *place++ = op;
2878 *place++ = NUL;
2879 *place++ = NUL;
2880 place = re_put_long(place, (long_u)minval);
2881 place = re_put_long(place, (long_u)maxval);
2882 regtail(opnd, place);
2883}
2884
2885/*
2886 * Write a long as four bytes at "p" and return pointer to the next char.
2887 */
2888 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002889re_put_long(char_u *p, long_u val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002890{
2891 *p++ = (char_u) ((val >> 24) & 0377);
2892 *p++ = (char_u) ((val >> 16) & 0377);
2893 *p++ = (char_u) ((val >> 8) & 0377);
2894 *p++ = (char_u) (val & 0377);
2895 return p;
2896}
2897
2898/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002899 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002900 */
2901 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002902regtail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002903{
2904 char_u *scan;
2905 char_u *temp;
2906 int offset;
2907
2908 if (p == JUST_CALC_SIZE)
2909 return;
2910
2911 /* Find last node. */
2912 scan = p;
2913 for (;;)
2914 {
2915 temp = regnext(scan);
2916 if (temp == NULL)
2917 break;
2918 scan = temp;
2919 }
2920
Bram Moolenaar582fd852005-03-28 20:58:01 +00002921 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002922 offset = (int)(scan - val);
2923 else
2924 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002925 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002926 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002927 * values in too many places. */
2928 if (offset > 0xffff)
2929 reg_toolong = TRUE;
2930 else
2931 {
2932 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2933 *(scan + 2) = (char_u) (offset & 0377);
2934 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002935}
2936
2937/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002938 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002939 */
2940 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002941regoptail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002942{
2943 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2944 if (p == NULL || p == JUST_CALC_SIZE
2945 || (OP(p) != BRANCH
2946 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2947 return;
2948 regtail(OPERAND(p), val);
2949}
2950
2951/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002952 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002953 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002954/*
2955 * Start parsing at "str".
2956 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002957 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002958initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002959{
2960 regparse = str;
2961 prevchr_len = 0;
2962 curchr = prevprevchr = prevchr = nextchr = -1;
2963 at_start = TRUE;
2964 prev_at_start = FALSE;
2965}
2966
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002967/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002968 * Save the current parse state, so that it can be restored and parsing
2969 * starts in the same state again.
2970 */
2971 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002972save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002973{
2974 ps->regparse = regparse;
2975 ps->prevchr_len = prevchr_len;
2976 ps->curchr = curchr;
2977 ps->prevchr = prevchr;
2978 ps->prevprevchr = prevprevchr;
2979 ps->nextchr = nextchr;
2980 ps->at_start = at_start;
2981 ps->prev_at_start = prev_at_start;
2982 ps->regnpar = regnpar;
2983}
2984
2985/*
2986 * Restore a previously saved parse state.
2987 */
2988 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002989restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002990{
2991 regparse = ps->regparse;
2992 prevchr_len = ps->prevchr_len;
2993 curchr = ps->curchr;
2994 prevchr = ps->prevchr;
2995 prevprevchr = ps->prevprevchr;
2996 nextchr = ps->nextchr;
2997 at_start = ps->at_start;
2998 prev_at_start = ps->prev_at_start;
2999 regnpar = ps->regnpar;
3000}
3001
3002
3003/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003004 * Get the next character without advancing.
3005 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003006 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003007peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003008{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003009 static int after_slash = FALSE;
3010
Bram Moolenaar071d4272004-06-13 20:20:40 +00003011 if (curchr == -1)
3012 {
3013 switch (curchr = regparse[0])
3014 {
3015 case '.':
3016 case '[':
3017 case '~':
3018 /* magic when 'magic' is on */
3019 if (reg_magic >= MAGIC_ON)
3020 curchr = Magic(curchr);
3021 break;
3022 case '(':
3023 case ')':
3024 case '{':
3025 case '%':
3026 case '+':
3027 case '=':
3028 case '?':
3029 case '@':
3030 case '!':
3031 case '&':
3032 case '|':
3033 case '<':
3034 case '>':
3035 case '#': /* future ext. */
3036 case '"': /* future ext. */
3037 case '\'': /* future ext. */
3038 case ',': /* future ext. */
3039 case '-': /* future ext. */
3040 case ':': /* future ext. */
3041 case ';': /* future ext. */
3042 case '`': /* future ext. */
3043 case '/': /* Can't be used in / command */
3044 /* magic only after "\v" */
3045 if (reg_magic == MAGIC_ALL)
3046 curchr = Magic(curchr);
3047 break;
3048 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003049 /* * is not magic as the very first character, eg "?*ptr", when
3050 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3051 * "\(\*" is not magic, thus must be magic if "after_slash" */
3052 if (reg_magic >= MAGIC_ON
3053 && !at_start
3054 && !(prev_at_start && prevchr == Magic('^'))
3055 && (after_slash
3056 || (prevchr != Magic('(')
3057 && prevchr != Magic('&')
3058 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003059 curchr = Magic('*');
3060 break;
3061 case '^':
3062 /* '^' is only magic as the very first character and if it's after
3063 * "\(", "\|", "\&' or "\n" */
3064 if (reg_magic >= MAGIC_OFF
3065 && (at_start
3066 || reg_magic == MAGIC_ALL
3067 || prevchr == Magic('(')
3068 || prevchr == Magic('|')
3069 || prevchr == Magic('&')
3070 || prevchr == Magic('n')
3071 || (no_Magic(prevchr) == '('
3072 && prevprevchr == Magic('%'))))
3073 {
3074 curchr = Magic('^');
3075 at_start = TRUE;
3076 prev_at_start = FALSE;
3077 }
3078 break;
3079 case '$':
3080 /* '$' is only magic as the very last char and if it's in front of
3081 * either "\|", "\)", "\&", or "\n" */
3082 if (reg_magic >= MAGIC_OFF)
3083 {
3084 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003085 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003086
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003087 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003088 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003089 || p[1] == 'm' || p[1] == 'M'
3090 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3091 {
3092 if (p[1] == 'v')
3093 is_magic_all = TRUE;
3094 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3095 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003096 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003097 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003098 if (p[0] == NUL
3099 || (p[0] == '\\'
3100 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3101 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003102 || (is_magic_all
3103 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003104 || reg_magic == MAGIC_ALL)
3105 curchr = Magic('$');
3106 }
3107 break;
3108 case '\\':
3109 {
3110 int c = regparse[1];
3111
3112 if (c == NUL)
3113 curchr = '\\'; /* trailing '\' */
3114 else if (
3115#ifdef EBCDIC
3116 vim_strchr(META, c)
3117#else
3118 c <= '~' && META_flags[c]
3119#endif
3120 )
3121 {
3122 /*
3123 * META contains everything that may be magic sometimes,
3124 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02003125 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +00003126 * magicness. Therefore, \ is so meta-magic that it is
3127 * not in META.
3128 */
3129 curchr = -1;
3130 prev_at_start = at_start;
3131 at_start = FALSE; /* be able to say "/\*ptr" */
3132 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003133 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003134 peekchr();
3135 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003136 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003137 curchr = toggle_Magic(curchr);
3138 }
3139 else if (vim_strchr(REGEXP_ABBR, c))
3140 {
3141 /*
3142 * Handle abbreviations, like "\t" for TAB -- webb
3143 */
3144 curchr = backslash_trans(c);
3145 }
3146 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3147 curchr = toggle_Magic(c);
3148 else
3149 {
3150 /*
3151 * Next character can never be (made) magic?
3152 * Then backslashing it won't do anything.
3153 */
3154#ifdef FEAT_MBYTE
3155 if (has_mbyte)
3156 curchr = (*mb_ptr2char)(regparse + 1);
3157 else
3158#endif
3159 curchr = c;
3160 }
3161 break;
3162 }
3163
3164#ifdef FEAT_MBYTE
3165 default:
3166 if (has_mbyte)
3167 curchr = (*mb_ptr2char)(regparse);
3168#endif
3169 }
3170 }
3171
3172 return curchr;
3173}
3174
3175/*
3176 * Eat one lexed character. Do this in a way that we can undo it.
3177 */
3178 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003179skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003180{
3181 /* peekchr() eats a backslash, do the same here */
3182 if (*regparse == '\\')
3183 prevchr_len = 1;
3184 else
3185 prevchr_len = 0;
3186 if (regparse[prevchr_len] != NUL)
3187 {
3188#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003189 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003190 /* exclude composing chars that mb_ptr2len does include */
3191 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003192 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003193 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003194 else
3195#endif
3196 ++prevchr_len;
3197 }
3198 regparse += prevchr_len;
3199 prev_at_start = at_start;
3200 at_start = FALSE;
3201 prevprevchr = prevchr;
3202 prevchr = curchr;
3203 curchr = nextchr; /* use previously unget char, or -1 */
3204 nextchr = -1;
3205}
3206
3207/*
3208 * Skip a character while keeping the value of prev_at_start for at_start.
3209 * prevchr and prevprevchr are also kept.
3210 */
3211 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003212skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003213{
3214 int as = prev_at_start;
3215 int pr = prevchr;
3216 int prpr = prevprevchr;
3217
3218 skipchr();
3219 at_start = as;
3220 prevchr = pr;
3221 prevprevchr = prpr;
3222}
3223
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003224/*
3225 * Get the next character from the pattern. We know about magic and such, so
3226 * therefore we need a lexical analyzer.
3227 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003228 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003229getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003230{
3231 int chr = peekchr();
3232
3233 skipchr();
3234 return chr;
3235}
3236
3237/*
3238 * put character back. Works only once!
3239 */
3240 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003241ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003242{
3243 nextchr = curchr;
3244 curchr = prevchr;
3245 prevchr = prevprevchr;
3246 at_start = prev_at_start;
3247 prev_at_start = FALSE;
3248
3249 /* Backup regparse, so that it's at the same position as before the
3250 * getchr(). */
3251 regparse -= prevchr_len;
3252}
3253
3254/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003255 * Get and return the value of the hex string at the current position.
3256 * Return -1 if there is no valid hex number.
3257 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003258 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003259 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003260 * The parameter controls the maximum number of input characters. This will be
3261 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3262 */
3263 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003264gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003265{
3266 int nr = 0;
3267 int c;
3268 int i;
3269
3270 for (i = 0; i < maxinputlen; ++i)
3271 {
3272 c = regparse[0];
3273 if (!vim_isxdigit(c))
3274 break;
3275 nr <<= 4;
3276 nr |= hex2nr(c);
3277 ++regparse;
3278 }
3279
3280 if (i == 0)
3281 return -1;
3282 return nr;
3283}
3284
3285/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003286 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003287 * current position. Return -1 for invalid. Consumes all digits.
3288 */
3289 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003290getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003291{
3292 int nr = 0;
3293 int c;
3294 int i;
3295
3296 for (i = 0; ; ++i)
3297 {
3298 c = regparse[0];
3299 if (c < '0' || c > '9')
3300 break;
3301 nr *= 10;
3302 nr += c - '0';
3303 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003304 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003305 }
3306
3307 if (i == 0)
3308 return -1;
3309 return nr;
3310}
3311
3312/*
3313 * get and return the value of the octal string immediately after the current
3314 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3315 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3316 * treat 8 or 9 as recognised characters. Position is updated:
3317 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003318 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003319 */
3320 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003321getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003322{
3323 int nr = 0;
3324 int c;
3325 int i;
3326
3327 for (i = 0; i < 3 && nr < 040; ++i)
3328 {
3329 c = regparse[0];
3330 if (c < '0' || c > '7')
3331 break;
3332 nr <<= 3;
3333 nr |= hex2nr(c);
3334 ++regparse;
3335 }
3336
3337 if (i == 0)
3338 return -1;
3339 return nr;
3340}
3341
3342/*
3343 * Get a number after a backslash that is inside [].
3344 * When nothing is recognized return a backslash.
3345 */
3346 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003347coll_get_char(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003348{
3349 int nr = -1;
3350
3351 switch (*regparse++)
3352 {
3353 case 'd': nr = getdecchrs(); break;
3354 case 'o': nr = getoctchrs(); break;
3355 case 'x': nr = gethexchrs(2); break;
3356 case 'u': nr = gethexchrs(4); break;
3357 case 'U': nr = gethexchrs(8); break;
3358 }
3359 if (nr < 0)
3360 {
3361 /* If getting the number fails be backwards compatible: the character
3362 * is a backslash. */
3363 --regparse;
3364 nr = '\\';
3365 }
3366 return nr;
3367}
3368
3369/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003370 * read_limits - Read two integers to be taken as a minimum and maximum.
3371 * If the first character is '-', then the range is reversed.
3372 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3373 * missing, a very big number is the default.
3374 */
3375 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003376read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003377{
3378 int reverse = FALSE;
3379 char_u *first_char;
3380 long tmp;
3381
3382 if (*regparse == '-')
3383 {
3384 /* Starts with '-', so reverse the range later */
3385 regparse++;
3386 reverse = TRUE;
3387 }
3388 first_char = regparse;
3389 *minval = getdigits(&regparse);
3390 if (*regparse == ',') /* There is a comma */
3391 {
3392 if (vim_isdigit(*++regparse))
3393 *maxval = getdigits(&regparse);
3394 else
3395 *maxval = MAX_LIMIT;
3396 }
3397 else if (VIM_ISDIGIT(*first_char))
3398 *maxval = *minval; /* It was \{n} or \{-n} */
3399 else
3400 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3401 if (*regparse == '\\')
3402 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003403 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003404 {
3405 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3406 reg_magic == MAGIC_ALL ? "" : "\\");
3407 EMSG_RET_FAIL(IObuff);
3408 }
3409
3410 /*
3411 * Reverse the range if there was a '-', or make sure it is in the right
3412 * order otherwise.
3413 */
3414 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3415 {
3416 tmp = *minval;
3417 *minval = *maxval;
3418 *maxval = tmp;
3419 }
3420 skipchr(); /* let's be friends with the lexer again */
3421 return OK;
3422}
3423
3424/*
3425 * vim_regexec and friends
3426 */
3427
3428/*
3429 * Global work variables for vim_regexec().
3430 */
3431
3432/* The current match-position is remembered with these variables: */
3433static linenr_T reglnum; /* line number, relative to first line */
3434static char_u *regline; /* start of current line */
3435static char_u *reginput; /* current input, points into "regline" */
3436
3437static int need_clear_subexpr; /* subexpressions still need to be
3438 * cleared */
3439#ifdef FEAT_SYN_HL
3440static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3441 * still need to be cleared */
3442#endif
3443
Bram Moolenaar071d4272004-06-13 20:20:40 +00003444/*
3445 * Structure used to save the current input state, when it needs to be
3446 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003447 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003448 */
3449typedef struct
3450{
3451 union
3452 {
3453 char_u *ptr; /* reginput pointer, for single-line regexp */
3454 lpos_T pos; /* reginput pos, for multi-line regexp */
3455 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003456 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003457} regsave_T;
3458
3459/* struct to save start/end pointer/position in for \(\) */
3460typedef struct
3461{
3462 union
3463 {
3464 char_u *ptr;
3465 lpos_T pos;
3466 } se_u;
3467} save_se_T;
3468
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003469/* used for BEHIND and NOBEHIND matching */
3470typedef struct regbehind_S
3471{
3472 regsave_T save_after;
3473 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003474 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003475 save_se_T save_start[NSUBEXP];
3476 save_se_T save_end[NSUBEXP];
3477} regbehind_T;
3478
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003479static char_u *reg_getline(linenr_T lnum);
3480static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm);
3481static long regtry(bt_regprog_T *prog, colnr_T col);
3482static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003483#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003484static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003485#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003486static void save_subexpr(regbehind_T *bp);
3487static void restore_subexpr(regbehind_T *bp);
3488static void reg_nextline(void);
3489static void reg_save(regsave_T *save, garray_T *gap);
3490static void reg_restore(regsave_T *save, garray_T *gap);
3491static int reg_save_equal(regsave_T *save);
3492static void save_se_multi(save_se_T *savep, lpos_T *posp);
3493static void save_se_one(save_se_T *savep, char_u **pp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003494
3495/* Save the sub-expressions before attempting a match. */
3496#define save_se(savep, posp, pp) \
3497 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3498
3499/* After a failed match restore the sub-expressions. */
3500#define restore_se(savep, posp, pp) { \
3501 if (REG_MULTI) \
3502 *(posp) = (savep)->se_u.pos; \
3503 else \
3504 *(pp) = (savep)->se_u.ptr; }
3505
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003506static int re_num_cmp(long_u val, char_u *scan);
3507static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
3508static int regmatch(char_u *prog);
3509static int regrepeat(char_u *p, long maxcount);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003510
3511#ifdef DEBUG
3512int regnarrate = 0;
3513#endif
3514
3515/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003516 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3517 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003518 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003519 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003520static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003521static unsigned reg_tofreelen;
3522
3523/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02003524 * Structure used to store the execution state of the regex engine.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003525 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003526 * done:
3527 * single-line multi-line
3528 * reg_match &regmatch_T NULL
3529 * reg_mmatch NULL &regmmatch_T
3530 * reg_startp reg_match->startp <invalid>
3531 * reg_endp reg_match->endp <invalid>
3532 * reg_startpos <invalid> reg_mmatch->startpos
3533 * reg_endpos <invalid> reg_mmatch->endpos
3534 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003535 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003536 * reg_firstlnum <invalid> first line in which to search
3537 * reg_maxline 0 last line nr
3538 * reg_line_lbr FALSE or TRUE FALSE
3539 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003540typedef struct {
3541 regmatch_T *reg_match;
3542 regmmatch_T *reg_mmatch;
3543 char_u **reg_startp;
3544 char_u **reg_endp;
3545 lpos_T *reg_startpos;
3546 lpos_T *reg_endpos;
3547 win_T *reg_win;
3548 buf_T *reg_buf;
3549 linenr_T reg_firstlnum;
3550 linenr_T reg_maxline;
3551 int reg_line_lbr; /* "\n" in string is line break */
3552
3553 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3554 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3555 * contains '\c' or '\C' the value is overruled. */
3556 int reg_ic;
3557
3558#ifdef FEAT_MBYTE
3559 /* Similar to rex.reg_ic, but only for 'combining' characters. Set with \Z
3560 * flag in the regexp. Defaults to false, always. */
3561 int reg_icombine;
3562#endif
3563
3564 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3565 * there is no maximum. */
3566 colnr_T reg_maxcol;
3567} regexec_T;
3568
3569static regexec_T rex;
3570static int rex_in_use = FALSE;
3571
Bram Moolenaar071d4272004-06-13 20:20:40 +00003572
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003573/* Values for rs_state in regitem_T. */
3574typedef enum regstate_E
3575{
3576 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3577 , RS_MOPEN /* MOPEN + [0-9] */
3578 , RS_MCLOSE /* MCLOSE + [0-9] */
3579#ifdef FEAT_SYN_HL
3580 , RS_ZOPEN /* ZOPEN + [0-9] */
3581 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3582#endif
3583 , RS_BRANCH /* BRANCH */
3584 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3585 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3586 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3587 , RS_NOMATCH /* NOMATCH */
3588 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3589 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3590 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3591 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3592} regstate_T;
3593
3594/*
3595 * When there are alternatives a regstate_T is put on the regstack to remember
3596 * what we are doing.
3597 * Before it may be another type of item, depending on rs_state, to remember
3598 * more things.
3599 */
3600typedef struct regitem_S
3601{
3602 regstate_T rs_state; /* what we are doing, one of RS_ above */
3603 char_u *rs_scan; /* current node in program */
3604 union
3605 {
3606 save_se_T sesave;
3607 regsave_T regsave;
3608 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003609 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003610} regitem_T;
3611
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003612static regitem_T *regstack_push(regstate_T state, char_u *scan);
3613static void regstack_pop(char_u **scan);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003614
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003615/* used for STAR, PLUS and BRACE_SIMPLE matching */
3616typedef struct regstar_S
3617{
3618 int nextb; /* next byte */
3619 int nextb_ic; /* next byte reverse case */
3620 long count;
3621 long minval;
3622 long maxval;
3623} regstar_T;
3624
3625/* used to store input position when a BACK was encountered, so that we now if
3626 * we made any progress since the last time. */
3627typedef struct backpos_S
3628{
3629 char_u *bp_scan; /* "scan" where BACK was encountered */
3630 regsave_T bp_pos; /* last input position */
3631} backpos_T;
3632
3633/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003634 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3635 * to avoid invoking malloc() and free() often.
3636 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3637 * or regbehind_T.
3638 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003639 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003640static garray_T regstack = {0, 0, 0, 0, NULL};
3641static garray_T backpos = {0, 0, 0, 0, NULL};
3642
3643/*
3644 * Both for regstack and backpos tables we use the following strategy of
3645 * allocation (to reduce malloc/free calls):
3646 * - Initial size is fairly small.
3647 * - When needed, the tables are grown bigger (8 times at first, double after
3648 * that).
3649 * - After executing the match we free the memory only if the array has grown.
3650 * Thus the memory is kept allocated when it's at the initial size.
3651 * This makes it fast while not keeping a lot of memory allocated.
3652 * A three times speed increase was observed when using many simple patterns.
3653 */
3654#define REGSTACK_INITIAL 2048
3655#define BACKPOS_INITIAL 64
3656
3657#if defined(EXITFREE) || defined(PROTO)
3658 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003659free_regexp_stuff(void)
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003660{
3661 ga_clear(&regstack);
3662 ga_clear(&backpos);
3663 vim_free(reg_tofree);
3664 vim_free(reg_prev_sub);
3665}
3666#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003667
Bram Moolenaar071d4272004-06-13 20:20:40 +00003668/*
3669 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3670 */
3671 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01003672reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003673{
3674 /* when looking behind for a match/no-match lnum is negative. But we
3675 * can't go before line 1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003676 if (rex.reg_firstlnum + lnum < 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003677 return NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02003678 if (lnum > rex.reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003679 /* Must have matched the "\n" in the last line. */
3680 return (char_u *)"";
Bram Moolenaar6100d022016-10-02 16:51:57 +02003681 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003682}
3683
3684static regsave_T behind_pos;
3685
3686#ifdef FEAT_SYN_HL
3687static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3688static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3689static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3690static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3691#endif
3692
3693/* TRUE if using multi-line regexp. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003694#define REG_MULTI (rex.reg_match == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003695
Bram Moolenaar071d4272004-06-13 20:20:40 +00003696/*
3697 * Match a regexp against a string.
3698 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3699 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003700 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003701 *
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003702 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003703 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003704 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003705bt_regexec_nl(
3706 regmatch_T *rmp,
3707 char_u *line, /* string to match against */
3708 colnr_T col, /* column to start looking for match */
3709 int line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003710{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003711 rex.reg_match = rmp;
3712 rex.reg_mmatch = NULL;
3713 rex.reg_maxline = 0;
3714 rex.reg_line_lbr = line_lbr;
3715 rex.reg_buf = curbuf;
3716 rex.reg_win = NULL;
3717 rex.reg_ic = rmp->rm_ic;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003718#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003719 rex.reg_icombine = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003720#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003721 rex.reg_maxcol = 0;
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003722
3723 return bt_regexec_both(line, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003724}
3725
Bram Moolenaar071d4272004-06-13 20:20:40 +00003726/*
3727 * Match a regexp against multiple lines.
3728 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3729 * Uses curbuf for line count and 'iskeyword'.
3730 *
3731 * Return zero if there is no match. Return number of lines contained in the
3732 * match otherwise.
3733 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003734 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003735bt_regexec_multi(
3736 regmmatch_T *rmp,
3737 win_T *win, /* window in which to search or NULL */
3738 buf_T *buf, /* buffer in which to search */
3739 linenr_T lnum, /* nr of line to start looking for match */
3740 colnr_T col, /* column to start looking for match */
3741 proftime_T *tm) /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003742{
Bram Moolenaar6100d022016-10-02 16:51:57 +02003743 rex.reg_match = NULL;
3744 rex.reg_mmatch = rmp;
3745 rex.reg_buf = buf;
3746 rex.reg_win = win;
3747 rex.reg_firstlnum = lnum;
3748 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
3749 rex.reg_line_lbr = FALSE;
3750 rex.reg_ic = rmp->rmm_ic;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003751#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003752 rex.reg_icombine = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003753#endif
Bram Moolenaar6100d022016-10-02 16:51:57 +02003754 rex.reg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003755
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003756 return bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003757}
3758
3759/*
3760 * Match a regexp against a string ("line" points to the string) or multiple
3761 * lines ("line" is NULL, use reg_getline()).
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003762 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003763 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003764 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003765bt_regexec_both(
3766 char_u *line,
3767 colnr_T col, /* column to start looking for match */
3768 proftime_T *tm UNUSED) /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003769{
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003770 bt_regprog_T *prog;
3771 char_u *s;
3772 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003773
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003774 /* Create "regstack" and "backpos" if they are not allocated yet.
3775 * We allocate *_INITIAL amount of bytes first and then set the grow size
3776 * to much bigger value to avoid many malloc calls in case of deep regular
3777 * expressions. */
3778 if (regstack.ga_data == NULL)
3779 {
3780 /* Use an item size of 1 byte, since we push different things
3781 * onto the regstack. */
3782 ga_init2(&regstack, 1, REGSTACK_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003783 (void)ga_grow(&regstack, REGSTACK_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003784 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3785 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003786
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003787 if (backpos.ga_data == NULL)
3788 {
3789 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003790 (void)ga_grow(&backpos, BACKPOS_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003791 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3792 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003793
Bram Moolenaar071d4272004-06-13 20:20:40 +00003794 if (REG_MULTI)
3795 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003796 prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003797 line = reg_getline((linenr_T)0);
Bram Moolenaar6100d022016-10-02 16:51:57 +02003798 rex.reg_startpos = rex.reg_mmatch->startpos;
3799 rex.reg_endpos = rex.reg_mmatch->endpos;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003800 }
3801 else
3802 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02003803 prog = (bt_regprog_T *)rex.reg_match->regprog;
3804 rex.reg_startp = rex.reg_match->startp;
3805 rex.reg_endp = rex.reg_match->endp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003806 }
3807
3808 /* Be paranoid... */
3809 if (prog == NULL || line == NULL)
3810 {
3811 EMSG(_(e_null));
3812 goto theend;
3813 }
3814
3815 /* Check validity of program. */
3816 if (prog_magic_wrong())
3817 goto theend;
3818
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003819 /* If the start column is past the maximum column: no need to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003820 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003821 goto theend;
3822
Bram Moolenaar6100d022016-10-02 16:51:57 +02003823 /* If pattern contains "\c" or "\C": overrule value of rex.reg_ic */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003824 if (prog->regflags & RF_ICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003825 rex.reg_ic = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003826 else if (prog->regflags & RF_NOICASE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003827 rex.reg_ic = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003828
3829#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003830 /* If pattern contains "\Z" overrule value of rex.reg_icombine */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003831 if (prog->regflags & RF_ICOMBINE)
Bram Moolenaar6100d022016-10-02 16:51:57 +02003832 rex.reg_icombine = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003833#endif
3834
3835 /* If there is a "must appear" string, look for it. */
3836 if (prog->regmust != NULL)
3837 {
3838 int c;
3839
3840#ifdef FEAT_MBYTE
3841 if (has_mbyte)
3842 c = (*mb_ptr2char)(prog->regmust);
3843 else
3844#endif
3845 c = *prog->regmust;
3846 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003847
3848 /*
3849 * This is used very often, esp. for ":global". Use three versions of
3850 * the loop to avoid overhead of conditions.
3851 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003852 if (!rex.reg_ic
Bram Moolenaar05159a02005-02-26 23:04:13 +00003853#ifdef FEAT_MBYTE
3854 && !has_mbyte
3855#endif
3856 )
3857 while ((s = vim_strbyte(s, c)) != NULL)
3858 {
3859 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3860 break; /* Found it. */
3861 ++s;
3862 }
3863#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02003864 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
Bram Moolenaar05159a02005-02-26 23:04:13 +00003865 while ((s = vim_strchr(s, c)) != NULL)
3866 {
3867 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3868 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003869 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003870 }
3871#endif
3872 else
3873 while ((s = cstrchr(s, c)) != NULL)
3874 {
3875 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3876 break; /* Found it. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003877 MB_PTR_ADV(s);
Bram Moolenaar05159a02005-02-26 23:04:13 +00003878 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003879 if (s == NULL) /* Not present. */
3880 goto theend;
3881 }
3882
3883 regline = line;
3884 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003885 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003886
3887 /* Simplest case: Anchored match need be tried only once. */
3888 if (prog->reganch)
3889 {
3890 int c;
3891
3892#ifdef FEAT_MBYTE
3893 if (has_mbyte)
3894 c = (*mb_ptr2char)(regline + col);
3895 else
3896#endif
3897 c = regline[col];
3898 if (prog->regstart == NUL
3899 || prog->regstart == c
Bram Moolenaar6100d022016-10-02 16:51:57 +02003900 || (rex.reg_ic && ((
Bram Moolenaar071d4272004-06-13 20:20:40 +00003901#ifdef FEAT_MBYTE
3902 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3903 || (c < 255 && prog->regstart < 255 &&
3904#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003905 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003906 retval = regtry(prog, col);
3907 else
3908 retval = 0;
3909 }
3910 else
3911 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003912#ifdef FEAT_RELTIME
3913 int tm_count = 0;
3914#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003915 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003916 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003917 {
3918 if (prog->regstart != NUL)
3919 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003920 /* Skip until the char we know it must start with.
3921 * Used often, do some work to avoid call overhead. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003922 if (!rex.reg_ic
Bram Moolenaar05159a02005-02-26 23:04:13 +00003923#ifdef FEAT_MBYTE
3924 && !has_mbyte
3925#endif
3926 )
3927 s = vim_strbyte(regline + col, prog->regstart);
3928 else
3929 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003930 if (s == NULL)
3931 {
3932 retval = 0;
3933 break;
3934 }
3935 col = (int)(s - regline);
3936 }
3937
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003938 /* Check for maximum column to try. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02003939 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003940 {
3941 retval = 0;
3942 break;
3943 }
3944
Bram Moolenaar071d4272004-06-13 20:20:40 +00003945 retval = regtry(prog, col);
3946 if (retval > 0)
3947 break;
3948
3949 /* if not currently on the first line, get it again */
3950 if (reglnum != 0)
3951 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003952 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003953 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003954 }
3955 if (regline[col] == NUL)
3956 break;
3957#ifdef FEAT_MBYTE
3958 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003959 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003960 else
3961#endif
3962 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003963#ifdef FEAT_RELTIME
3964 /* Check for timeout once in a twenty times to avoid overhead. */
3965 if (tm != NULL && ++tm_count == 20)
3966 {
3967 tm_count = 0;
3968 if (profile_passed_limit(tm))
3969 break;
3970 }
3971#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003972 }
3973 }
3974
Bram Moolenaar071d4272004-06-13 20:20:40 +00003975theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003976 /* Free "reg_tofree" when it's a bit big.
3977 * Free regstack and backpos if they are bigger than their initial size. */
3978 if (reg_tofreelen > 400)
3979 {
3980 vim_free(reg_tofree);
3981 reg_tofree = NULL;
3982 }
3983 if (regstack.ga_maxlen > REGSTACK_INITIAL)
3984 ga_clear(&regstack);
3985 if (backpos.ga_maxlen > BACKPOS_INITIAL)
3986 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003987
Bram Moolenaar071d4272004-06-13 20:20:40 +00003988 return retval;
3989}
3990
3991#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003992static reg_extmatch_T *make_extmatch(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003993
3994/*
3995 * Create a new extmatch and mark it as referenced once.
3996 */
3997 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01003998make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003999{
4000 reg_extmatch_T *em;
4001
4002 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4003 if (em != NULL)
4004 em->refcnt = 1;
4005 return em;
4006}
4007
4008/*
4009 * Add a reference to an extmatch.
4010 */
4011 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01004012ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004013{
4014 if (em != NULL)
4015 em->refcnt++;
4016 return em;
4017}
4018
4019/*
4020 * Remove a reference to an extmatch. If there are no references left, free
4021 * the info.
4022 */
4023 void
Bram Moolenaar05540972016-01-30 20:31:25 +01004024unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004025{
4026 int i;
4027
4028 if (em != NULL && --em->refcnt <= 0)
4029 {
4030 for (i = 0; i < NSUBEXP; ++i)
4031 vim_free(em->matches[i]);
4032 vim_free(em);
4033 }
4034}
4035#endif
4036
4037/*
4038 * regtry - try match of "prog" with at regline["col"].
4039 * Returns 0 for failure, number of lines contained in the match otherwise.
4040 */
4041 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01004042regtry(bt_regprog_T *prog, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004043{
4044 reginput = regline + col;
4045 need_clear_subexpr = TRUE;
4046#ifdef FEAT_SYN_HL
4047 /* Clear the external match subpointers if necessary. */
4048 if (prog->reghasz == REX_SET)
4049 need_clear_zsubexpr = TRUE;
4050#endif
4051
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004052 if (regmatch(prog->program + 1) == 0)
4053 return 0;
4054
4055 cleanup_subexpr();
4056 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004057 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004058 if (rex.reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004059 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004060 rex.reg_startpos[0].lnum = 0;
4061 rex.reg_startpos[0].col = col;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004062 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004063 if (rex.reg_endpos[0].lnum < 0)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004064 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004065 rex.reg_endpos[0].lnum = reglnum;
4066 rex.reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004067 }
4068 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004069 /* Use line number of "\ze". */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004070 reglnum = rex.reg_endpos[0].lnum;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004071 }
4072 else
4073 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004074 if (rex.reg_startp[0] == NULL)
4075 rex.reg_startp[0] = regline + col;
4076 if (rex.reg_endp[0] == NULL)
4077 rex.reg_endp[0] = reginput;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004078 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004079#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004080 /* Package any found \z(...\) matches for export. Default is none. */
4081 unref_extmatch(re_extmatch_out);
4082 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004083
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004084 if (prog->reghasz == REX_SET)
4085 {
4086 int i;
4087
4088 cleanup_zsubexpr();
4089 re_extmatch_out = make_extmatch();
4090 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004091 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004092 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004093 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004094 /* Only accept single line matches. */
4095 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004096 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4097 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004098 re_extmatch_out->matches[i] =
4099 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004100 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004101 reg_endzpos[i].col - reg_startzpos[i].col);
4102 }
4103 else
4104 {
4105 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4106 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004107 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004108 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004109 }
4110 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004111 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004112#endif
4113 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004114}
4115
4116#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004117static int reg_prev_class(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004118
Bram Moolenaar071d4272004-06-13 20:20:40 +00004119/*
4120 * Get class of previous character.
4121 */
4122 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004123reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004124{
4125 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004126 return mb_get_class_buf(reginput - 1
Bram Moolenaar6100d022016-10-02 16:51:57 +02004127 - (*mb_head_off)(regline, reginput - 1), rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004128 return -1;
4129}
Bram Moolenaar071d4272004-06-13 20:20:40 +00004130#endif
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004131
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004132static int reg_match_visual(void);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004133
4134/*
4135 * Return TRUE if the current reginput position matches the Visual area.
4136 */
4137 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004138reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004139{
4140 pos_T top, bot;
4141 linenr_T lnum;
4142 colnr_T col;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004143 win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004144 int mode;
4145 colnr_T start, end;
4146 colnr_T start2, end2;
4147 colnr_T cols;
4148
4149 /* Check if the buffer is the current buffer. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004150 if (rex.reg_buf != curbuf || VIsual.lnum == 0)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004151 return FALSE;
4152
4153 if (VIsual_active)
4154 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004155 if (LT_POS(VIsual, wp->w_cursor))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004156 {
4157 top = VIsual;
4158 bot = wp->w_cursor;
4159 }
4160 else
4161 {
4162 top = wp->w_cursor;
4163 bot = VIsual;
4164 }
4165 mode = VIsual_mode;
4166 }
4167 else
4168 {
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01004169 if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004170 {
4171 top = curbuf->b_visual.vi_start;
4172 bot = curbuf->b_visual.vi_end;
4173 }
4174 else
4175 {
4176 top = curbuf->b_visual.vi_end;
4177 bot = curbuf->b_visual.vi_start;
4178 }
4179 mode = curbuf->b_visual.vi_mode;
4180 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004181 lnum = reglnum + rex.reg_firstlnum;
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004182 if (lnum < top.lnum || lnum > bot.lnum)
4183 return FALSE;
4184
4185 if (mode == 'v')
4186 {
4187 col = (colnr_T)(reginput - regline);
4188 if ((lnum == top.lnum && col < top.col)
4189 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4190 return FALSE;
4191 }
4192 else if (mode == Ctrl_V)
4193 {
4194 getvvcol(wp, &top, &start, NULL, &end);
4195 getvvcol(wp, &bot, &start2, NULL, &end2);
4196 if (start2 < start)
4197 start = start2;
4198 if (end2 > end)
4199 end = end2;
4200 if (top.col == MAXCOL || bot.col == MAXCOL)
4201 end = MAXCOL;
4202 cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline));
4203 if (cols < start || cols > end - (*p_sel == 'e'))
4204 return FALSE;
4205 }
4206 return TRUE;
4207}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004208
Bram Moolenaar91acfff2017-03-12 19:22:36 +01004209#define ADVANCE_REGINPUT() MB_PTR_ADV(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004210
4211/*
4212 * The arguments from BRACE_LIMITS are stored here. They are actually local
4213 * to regmatch(), but they are here to reduce the amount of stack space used
4214 * (it can be called recursively many times).
4215 */
4216static long bl_minval;
4217static long bl_maxval;
4218
4219/*
4220 * regmatch - main matching routine
4221 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004222 * Conceptually the strategy is simple: Check to see whether the current node
4223 * matches, push an item onto the regstack and loop to see whether the rest
4224 * matches, and then act accordingly. In practice we make some effort to
4225 * avoid using the regstack, in particular by going through "ordinary" nodes
4226 * (that don't need to know whether the rest of the match failed) by a nested
4227 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004228 *
4229 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4230 * the last matched character.
4231 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4232 * undefined state!
4233 */
4234 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004235regmatch(
4236 char_u *scan) /* Current node. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004237{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004238 char_u *next; /* Next node. */
4239 int op;
4240 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004241 regitem_T *rp;
4242 int no;
4243 int status; /* one of the RA_ values: */
4244#define RA_FAIL 1 /* something failed, abort */
4245#define RA_CONT 2 /* continue in inner loop */
4246#define RA_BREAK 3 /* break inner loop */
4247#define RA_MATCH 4 /* successful match */
4248#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004249
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004250 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004251 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004252 regstack.ga_len = 0;
4253 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004254
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004255 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004256 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004257 */
4258 for (;;)
4259 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004260 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4261 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004262 fast_breakcheck();
4263
4264#ifdef DEBUG
4265 if (scan != NULL && regnarrate)
4266 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004267 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004268 mch_errmsg("(\n");
4269 }
4270#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004271
4272 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004273 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004274 * regstack.
4275 */
4276 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004277 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004278 if (got_int || scan == NULL)
4279 {
4280 status = RA_FAIL;
4281 break;
4282 }
4283 status = RA_CONT;
4284
Bram Moolenaar071d4272004-06-13 20:20:40 +00004285#ifdef DEBUG
4286 if (regnarrate)
4287 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004288 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004289 mch_errmsg("...\n");
4290# ifdef FEAT_SYN_HL
4291 if (re_extmatch_in != NULL)
4292 {
4293 int i;
4294
4295 mch_errmsg(_("External submatches:\n"));
4296 for (i = 0; i < NSUBEXP; i++)
4297 {
4298 mch_errmsg(" \"");
4299 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004300 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004301 mch_errmsg("\"\n");
4302 }
4303 }
4304# endif
4305 }
4306#endif
4307 next = regnext(scan);
4308
4309 op = OP(scan);
4310 /* Check for character class with NL added. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004311 if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
4312 && *reginput == NUL && reglnum <= rex.reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004313 {
4314 reg_nextline();
4315 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02004316 else if (rex.reg_line_lbr && WITH_NL(op) && *reginput == '\n')
Bram Moolenaar071d4272004-06-13 20:20:40 +00004317 {
4318 ADVANCE_REGINPUT();
4319 }
4320 else
4321 {
4322 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004323 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004324#ifdef FEAT_MBYTE
4325 if (has_mbyte)
4326 c = (*mb_ptr2char)(reginput);
4327 else
4328#endif
4329 c = *reginput;
4330 switch (op)
4331 {
4332 case BOL:
4333 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004334 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004335 break;
4336
4337 case EOL:
4338 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004339 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004340 break;
4341
4342 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004343 /* We're not at the beginning of the file when below the first
4344 * line where we started, not at the start of the line or we
4345 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004346 if (reglnum != 0 || reginput != regline
Bram Moolenaar6100d022016-10-02 16:51:57 +02004347 || (REG_MULTI && rex.reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004348 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004349 break;
4350
4351 case RE_EOF:
Bram Moolenaar6100d022016-10-02 16:51:57 +02004352 if (reglnum != rex.reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004353 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004354 break;
4355
4356 case CURSOR:
4357 /* Check if the buffer is in a window and compare the
Bram Moolenaar6100d022016-10-02 16:51:57 +02004358 * rex.reg_win->w_cursor position to the match position. */
4359 if (rex.reg_win == NULL
4360 || (reglnum + rex.reg_firstlnum
4361 != rex.reg_win->w_cursor.lnum)
4362 || ((colnr_T)(reginput - regline)
4363 != rex.reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004364 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004365 break;
4366
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004367 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004368 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004369 {
4370 int mark = OPERAND(scan)[0];
4371 int cmp = OPERAND(scan)[1];
4372 pos_T *pos;
4373
Bram Moolenaar6100d022016-10-02 16:51:57 +02004374 pos = getmark_buf(rex.reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004375 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004376 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004377 || (pos->lnum == reglnum + rex.reg_firstlnum
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004378 ? (pos->col == (colnr_T)(reginput - regline)
4379 ? (cmp == '<' || cmp == '>')
4380 : (pos->col < (colnr_T)(reginput - regline)
4381 ? cmp != '>'
4382 : cmp != '<'))
Bram Moolenaar6100d022016-10-02 16:51:57 +02004383 : (pos->lnum < reglnum + rex.reg_firstlnum
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004384 ? cmp != '>'
4385 : cmp != '<')))
4386 status = RA_NOMATCH;
4387 }
4388 break;
4389
4390 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004391 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004392 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004393 break;
4394
Bram Moolenaar071d4272004-06-13 20:20:40 +00004395 case RE_LNUM:
Bram Moolenaar6100d022016-10-02 16:51:57 +02004396 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + rex.reg_firstlnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00004397 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004398 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004399 break;
4400
4401 case RE_COL:
4402 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004403 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004404 break;
4405
4406 case RE_VCOL:
4407 if (!re_num_cmp((long_u)win_linetabsize(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004408 rex.reg_win == NULL ? curwin : rex.reg_win,
Bram Moolenaar071d4272004-06-13 20:20:40 +00004409 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004410 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004411 break;
4412
4413 case BOW: /* \<word; reginput points to w */
4414 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004415 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004416#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004417 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004418 {
4419 int this_class;
4420
4421 /* Get class of current and previous char (if it exists). */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004422 this_class = mb_get_class_buf(reginput, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004423 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004424 status = RA_NOMATCH; /* not on a word at all */
4425 else if (reg_prev_class() == this_class)
4426 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004427 }
4428#endif
4429 else
4430 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004431 if (!vim_iswordc_buf(c, rex.reg_buf) || (reginput > regline
4432 && vim_iswordc_buf(reginput[-1], rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004433 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004434 }
4435 break;
4436
4437 case EOW: /* word\>; reginput points after d */
4438 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004439 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004440#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004441 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004442 {
4443 int this_class, prev_class;
4444
4445 /* Get class of current and previous char (if it exists). */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004446 this_class = mb_get_class_buf(reginput, rex.reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004447 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004448 if (this_class == prev_class
4449 || prev_class == 0 || prev_class == 1)
4450 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004451 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004452#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004453 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004454 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004455 if (!vim_iswordc_buf(reginput[-1], rex.reg_buf)
4456 || (reginput[0] != NUL
4457 && vim_iswordc_buf(c, rex.reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004458 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004459 }
4460 break; /* Matched with EOW */
4461
4462 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004463 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004464 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004465 status = RA_NOMATCH;
4466 else
4467 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004468 break;
4469
4470 case IDENT:
4471 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004472 status = RA_NOMATCH;
4473 else
4474 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004475 break;
4476
4477 case SIDENT:
4478 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004479 status = RA_NOMATCH;
4480 else
4481 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004482 break;
4483
4484 case KWORD:
Bram Moolenaar6100d022016-10-02 16:51:57 +02004485 if (!vim_iswordp_buf(reginput, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004486 status = RA_NOMATCH;
4487 else
4488 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004489 break;
4490
4491 case SKWORD:
Bram Moolenaar6100d022016-10-02 16:51:57 +02004492 if (VIM_ISDIGIT(*reginput)
4493 || !vim_iswordp_buf(reginput, rex.reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004494 status = RA_NOMATCH;
4495 else
4496 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004497 break;
4498
4499 case FNAME:
4500 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004501 status = RA_NOMATCH;
4502 else
4503 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004504 break;
4505
4506 case SFNAME:
4507 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004508 status = RA_NOMATCH;
4509 else
4510 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004511 break;
4512
4513 case PRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004514 if (!vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004515 status = RA_NOMATCH;
4516 else
4517 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004518 break;
4519
4520 case SPRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004521 if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004522 status = RA_NOMATCH;
4523 else
4524 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004525 break;
4526
4527 case WHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004528 if (!VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004529 status = RA_NOMATCH;
4530 else
4531 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004532 break;
4533
4534 case NWHITE:
Bram Moolenaar1c465442017-03-12 20:10:05 +01004535 if (c == NUL || VIM_ISWHITE(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004536 status = RA_NOMATCH;
4537 else
4538 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004539 break;
4540
4541 case DIGIT:
4542 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004543 status = RA_NOMATCH;
4544 else
4545 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004546 break;
4547
4548 case NDIGIT:
4549 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004550 status = RA_NOMATCH;
4551 else
4552 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004553 break;
4554
4555 case HEX:
4556 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004557 status = RA_NOMATCH;
4558 else
4559 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004560 break;
4561
4562 case NHEX:
4563 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004564 status = RA_NOMATCH;
4565 else
4566 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004567 break;
4568
4569 case OCTAL:
4570 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004571 status = RA_NOMATCH;
4572 else
4573 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004574 break;
4575
4576 case NOCTAL:
4577 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004578 status = RA_NOMATCH;
4579 else
4580 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004581 break;
4582
4583 case WORD:
4584 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004585 status = RA_NOMATCH;
4586 else
4587 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004588 break;
4589
4590 case NWORD:
4591 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004592 status = RA_NOMATCH;
4593 else
4594 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004595 break;
4596
4597 case HEAD:
4598 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004599 status = RA_NOMATCH;
4600 else
4601 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004602 break;
4603
4604 case NHEAD:
4605 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004606 status = RA_NOMATCH;
4607 else
4608 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004609 break;
4610
4611 case ALPHA:
4612 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004613 status = RA_NOMATCH;
4614 else
4615 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004616 break;
4617
4618 case NALPHA:
4619 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004620 status = RA_NOMATCH;
4621 else
4622 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004623 break;
4624
4625 case LOWER:
4626 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004627 status = RA_NOMATCH;
4628 else
4629 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004630 break;
4631
4632 case NLOWER:
4633 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004634 status = RA_NOMATCH;
4635 else
4636 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004637 break;
4638
4639 case UPPER:
4640 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004641 status = RA_NOMATCH;
4642 else
4643 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004644 break;
4645
4646 case NUPPER:
4647 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004648 status = RA_NOMATCH;
4649 else
4650 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004651 break;
4652
4653 case EXACTLY:
4654 {
4655 int len;
4656 char_u *opnd;
4657
4658 opnd = OPERAND(scan);
4659 /* Inline the first byte, for speed. */
4660 if (*opnd != *reginput
Bram Moolenaar6100d022016-10-02 16:51:57 +02004661 && (!rex.reg_ic || (
Bram Moolenaar071d4272004-06-13 20:20:40 +00004662#ifdef FEAT_MBYTE
4663 !enc_utf8 &&
4664#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004665 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004666 status = RA_NOMATCH;
4667 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004668 {
4669 /* match empty string always works; happens when "~" is
4670 * empty. */
4671 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004672 else
4673 {
4674 if (opnd[1] == NUL
Bram Moolenaar071d4272004-06-13 20:20:40 +00004675#ifdef FEAT_MBYTE
Bram Moolenaar6100d022016-10-02 16:51:57 +02004676 && !(enc_utf8 && rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004677#endif
4678 )
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004679 {
4680 len = 1; /* matched a single byte above */
4681 }
4682 else
4683 {
4684 /* Need to match first byte again for multi-byte. */
4685 len = (int)STRLEN(opnd);
4686 if (cstrncmp(opnd, reginput, &len) != 0)
4687 status = RA_NOMATCH;
4688 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004689#ifdef FEAT_MBYTE
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004690 /* Check for following composing character, unless %C
4691 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004692 if (status != RA_NOMATCH
4693 && enc_utf8
4694 && UTF_COMPOSINGLIKE(reginput, reginput + len)
Bram Moolenaar6100d022016-10-02 16:51:57 +02004695 && !rex.reg_icombine
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004696 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004697 {
4698 /* raaron: This code makes a composing character get
4699 * ignored, which is the correct behavior (sometimes)
4700 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004701 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004702 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004703#endif
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004704 if (status != RA_NOMATCH)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004705 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004706 }
4707 }
4708 break;
4709
4710 case ANYOF:
4711 case ANYBUT:
4712 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004713 status = RA_NOMATCH;
4714 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4715 status = RA_NOMATCH;
4716 else
4717 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004718 break;
4719
4720#ifdef FEAT_MBYTE
4721 case MULTIBYTECODE:
4722 if (has_mbyte)
4723 {
4724 int i, len;
4725 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004726 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004727
4728 opnd = OPERAND(scan);
4729 /* Safety check (just in case 'encoding' was changed since
4730 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004731 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004732 {
4733 status = RA_NOMATCH;
4734 break;
4735 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004736 if (enc_utf8)
Bram Moolenaarace95982017-03-29 17:30:27 +02004737 opndc = utf_ptr2char(opnd);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004738 if (enc_utf8 && utf_iscomposing(opndc))
4739 {
4740 /* When only a composing char is given match at any
4741 * position where that composing char appears. */
4742 status = RA_NOMATCH;
Bram Moolenaar0e462412015-03-31 14:17:31 +02004743 for (i = 0; reginput[i] != NUL;
4744 i += utf_ptr2len(reginput + i))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004745 {
Bram Moolenaarace95982017-03-29 17:30:27 +02004746 inpc = utf_ptr2char(reginput + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004747 if (!utf_iscomposing(inpc))
4748 {
4749 if (i > 0)
4750 break;
4751 }
4752 else if (opndc == inpc)
4753 {
4754 /* Include all following composing chars. */
Bram Moolenaarace95982017-03-29 17:30:27 +02004755 len = i + utfc_ptr2len(reginput + i);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004756 status = RA_MATCH;
4757 break;
4758 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004759 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004760 }
4761 else
4762 for (i = 0; i < len; ++i)
4763 if (opnd[i] != reginput[i])
4764 {
4765 status = RA_NOMATCH;
4766 break;
4767 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004768 reginput += len;
4769 }
4770 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004771 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004772 break;
4773#endif
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004774 case RE_COMPOSING:
4775#ifdef FEAT_MBYTE
4776 if (enc_utf8)
4777 {
4778 /* Skip composing characters. */
4779 while (utf_iscomposing(utf_ptr2char(reginput)))
Bram Moolenaar91acfff2017-03-12 19:22:36 +01004780 MB_CPTR_ADV(reginput);
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004781 }
4782#endif
4783 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004784
4785 case NOTHING:
4786 break;
4787
4788 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004789 {
4790 int i;
4791 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004792
Bram Moolenaar582fd852005-03-28 20:58:01 +00004793 /*
4794 * When we run into BACK we need to check if we don't keep
4795 * looping without matching any input. The second and later
4796 * times a BACK is encountered it fails if the input is still
4797 * at the same position as the previous time.
4798 * The positions are stored in "backpos" and found by the
4799 * current value of "scan", the position in the RE program.
4800 */
4801 bp = (backpos_T *)backpos.ga_data;
4802 for (i = 0; i < backpos.ga_len; ++i)
4803 if (bp[i].bp_scan == scan)
4804 break;
4805 if (i == backpos.ga_len)
4806 {
4807 /* First time at this BACK, make room to store the pos. */
4808 if (ga_grow(&backpos, 1) == FAIL)
4809 status = RA_FAIL;
4810 else
4811 {
4812 /* get "ga_data" again, it may have changed */
4813 bp = (backpos_T *)backpos.ga_data;
4814 bp[i].bp_scan = scan;
4815 ++backpos.ga_len;
4816 }
4817 }
4818 else if (reg_save_equal(&bp[i].bp_pos))
4819 /* Still at same position as last time, fail. */
4820 status = RA_NOMATCH;
4821
4822 if (status != RA_FAIL && status != RA_NOMATCH)
4823 reg_save(&bp[i].bp_pos, &backpos);
4824 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004825 break;
4826
Bram Moolenaar071d4272004-06-13 20:20:40 +00004827 case MOPEN + 0: /* Match start: \zs */
4828 case MOPEN + 1: /* \( */
4829 case MOPEN + 2:
4830 case MOPEN + 3:
4831 case MOPEN + 4:
4832 case MOPEN + 5:
4833 case MOPEN + 6:
4834 case MOPEN + 7:
4835 case MOPEN + 8:
4836 case MOPEN + 9:
4837 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004838 no = op - MOPEN;
4839 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004840 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004841 if (rp == NULL)
4842 status = RA_FAIL;
4843 else
4844 {
4845 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004846 save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
4847 &rex.reg_startp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004848 /* We simply continue and handle the result when done. */
4849 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004850 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004851 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004852
4853 case NOPEN: /* \%( */
4854 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004855 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004856 status = RA_FAIL;
4857 /* We simply continue and handle the result when done. */
4858 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004859
4860#ifdef FEAT_SYN_HL
4861 case ZOPEN + 1:
4862 case ZOPEN + 2:
4863 case ZOPEN + 3:
4864 case ZOPEN + 4:
4865 case ZOPEN + 5:
4866 case ZOPEN + 6:
4867 case ZOPEN + 7:
4868 case ZOPEN + 8:
4869 case ZOPEN + 9:
4870 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004871 no = op - ZOPEN;
4872 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004873 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004874 if (rp == NULL)
4875 status = RA_FAIL;
4876 else
4877 {
4878 rp->rs_no = no;
4879 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4880 &reg_startzp[no]);
4881 /* We simply continue and handle the result when done. */
4882 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004883 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004884 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004885#endif
4886
4887 case MCLOSE + 0: /* Match end: \ze */
4888 case MCLOSE + 1: /* \) */
4889 case MCLOSE + 2:
4890 case MCLOSE + 3:
4891 case MCLOSE + 4:
4892 case MCLOSE + 5:
4893 case MCLOSE + 6:
4894 case MCLOSE + 7:
4895 case MCLOSE + 8:
4896 case MCLOSE + 9:
4897 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004898 no = op - MCLOSE;
4899 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004900 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004901 if (rp == NULL)
4902 status = RA_FAIL;
4903 else
4904 {
4905 rp->rs_no = no;
Bram Moolenaar6100d022016-10-02 16:51:57 +02004906 save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
4907 &rex.reg_endp[no]);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004908 /* We simply continue and handle the result when done. */
4909 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004910 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004911 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004912
4913#ifdef FEAT_SYN_HL
4914 case ZCLOSE + 1: /* \) after \z( */
4915 case ZCLOSE + 2:
4916 case ZCLOSE + 3:
4917 case ZCLOSE + 4:
4918 case ZCLOSE + 5:
4919 case ZCLOSE + 6:
4920 case ZCLOSE + 7:
4921 case ZCLOSE + 8:
4922 case ZCLOSE + 9:
4923 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004924 no = op - ZCLOSE;
4925 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004926 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004927 if (rp == NULL)
4928 status = RA_FAIL;
4929 else
4930 {
4931 rp->rs_no = no;
4932 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4933 &reg_endzp[no]);
4934 /* We simply continue and handle the result when done. */
4935 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004936 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004937 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004938#endif
4939
4940 case BACKREF + 1:
4941 case BACKREF + 2:
4942 case BACKREF + 3:
4943 case BACKREF + 4:
4944 case BACKREF + 5:
4945 case BACKREF + 6:
4946 case BACKREF + 7:
4947 case BACKREF + 8:
4948 case BACKREF + 9:
4949 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004950 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004951
4952 no = op - BACKREF;
4953 cleanup_subexpr();
4954 if (!REG_MULTI) /* Single-line regexp */
4955 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004956 if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004957 {
4958 /* Backref was not set: Match an empty string. */
4959 len = 0;
4960 }
4961 else
4962 {
4963 /* Compare current input with back-ref in the same
4964 * line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004965 len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
4966 if (cstrncmp(rex.reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004967 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004968 }
4969 }
4970 else /* Multi-line regexp */
4971 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004972 if (rex.reg_startpos[no].lnum < 0
4973 || rex.reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004974 {
4975 /* Backref was not set: Match an empty string. */
4976 len = 0;
4977 }
4978 else
4979 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02004980 if (rex.reg_startpos[no].lnum == reglnum
4981 && rex.reg_endpos[no].lnum == reglnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004982 {
4983 /* Compare back-ref within the current line. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02004984 len = rex.reg_endpos[no].col
4985 - rex.reg_startpos[no].col;
4986 if (cstrncmp(regline + rex.reg_startpos[no].col,
Bram Moolenaar071d4272004-06-13 20:20:40 +00004987 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004988 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004989 }
4990 else
4991 {
4992 /* Messy situation: Need to compare between two
4993 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02004994 int r = match_with_backref(
Bram Moolenaar6100d022016-10-02 16:51:57 +02004995 rex.reg_startpos[no].lnum,
4996 rex.reg_startpos[no].col,
4997 rex.reg_endpos[no].lnum,
4998 rex.reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02004999 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005000
5001 if (r != RA_MATCH)
5002 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005003 }
5004 }
5005 }
5006
5007 /* Matched the backref, skip over it. */
5008 reginput += len;
5009 }
5010 break;
5011
5012#ifdef FEAT_SYN_HL
5013 case ZREF + 1:
5014 case ZREF + 2:
5015 case ZREF + 3:
5016 case ZREF + 4:
5017 case ZREF + 5:
5018 case ZREF + 6:
5019 case ZREF + 7:
5020 case ZREF + 8:
5021 case ZREF + 9:
5022 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005023 int len;
5024
5025 cleanup_zsubexpr();
5026 no = op - ZREF;
5027 if (re_extmatch_in != NULL
5028 && re_extmatch_in->matches[no] != NULL)
5029 {
5030 len = (int)STRLEN(re_extmatch_in->matches[no]);
5031 if (cstrncmp(re_extmatch_in->matches[no],
5032 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005033 status = RA_NOMATCH;
5034 else
5035 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005036 }
5037 else
5038 {
5039 /* Backref was not set: Match an empty string. */
5040 }
5041 }
5042 break;
5043#endif
5044
5045 case BRANCH:
5046 {
5047 if (OP(next) != BRANCH) /* No choice. */
5048 next = OPERAND(scan); /* Avoid recursion. */
5049 else
5050 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005051 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005052 if (rp == NULL)
5053 status = RA_FAIL;
5054 else
5055 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005056 }
5057 }
5058 break;
5059
5060 case BRACE_LIMITS:
5061 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005062 if (OP(next) == BRACE_SIMPLE)
5063 {
5064 bl_minval = OPERAND_MIN(scan);
5065 bl_maxval = OPERAND_MAX(scan);
5066 }
5067 else if (OP(next) >= BRACE_COMPLEX
5068 && OP(next) < BRACE_COMPLEX + 10)
5069 {
5070 no = OP(next) - BRACE_COMPLEX;
5071 brace_min[no] = OPERAND_MIN(scan);
5072 brace_max[no] = OPERAND_MAX(scan);
5073 brace_count[no] = 0;
5074 }
5075 else
5076 {
Bram Moolenaar95f09602016-11-10 20:01:45 +01005077 internal_error("BRACE_LIMITS");
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005078 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005079 }
5080 }
5081 break;
5082
5083 case BRACE_COMPLEX + 0:
5084 case BRACE_COMPLEX + 1:
5085 case BRACE_COMPLEX + 2:
5086 case BRACE_COMPLEX + 3:
5087 case BRACE_COMPLEX + 4:
5088 case BRACE_COMPLEX + 5:
5089 case BRACE_COMPLEX + 6:
5090 case BRACE_COMPLEX + 7:
5091 case BRACE_COMPLEX + 8:
5092 case BRACE_COMPLEX + 9:
5093 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005094 no = op - BRACE_COMPLEX;
5095 ++brace_count[no];
5096
5097 /* If not matched enough times yet, try one more */
5098 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005099 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005100 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005101 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005102 if (rp == NULL)
5103 status = RA_FAIL;
5104 else
5105 {
5106 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005107 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005108 next = OPERAND(scan);
5109 /* We continue and handle the result when done. */
5110 }
5111 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005112 }
5113
5114 /* If matched enough times, may try matching some more */
5115 if (brace_min[no] <= brace_max[no])
5116 {
5117 /* Range is the normal way around, use longest match */
5118 if (brace_count[no] <= brace_max[no])
5119 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005120 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005121 if (rp == NULL)
5122 status = RA_FAIL;
5123 else
5124 {
5125 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005126 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005127 next = OPERAND(scan);
5128 /* We continue and handle the result when done. */
5129 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005130 }
5131 }
5132 else
5133 {
5134 /* Range is backwards, use shortest match first */
5135 if (brace_count[no] <= brace_min[no])
5136 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005137 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005138 if (rp == NULL)
5139 status = RA_FAIL;
5140 else
5141 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005142 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005143 /* We continue and handle the result when done. */
5144 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005145 }
5146 }
5147 }
5148 break;
5149
5150 case BRACE_SIMPLE:
5151 case STAR:
5152 case PLUS:
5153 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005154 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005155
5156 /*
5157 * Lookahead to avoid useless match attempts when we know
5158 * what character comes next.
5159 */
5160 if (OP(next) == EXACTLY)
5161 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005162 rst.nextb = *OPERAND(next);
Bram Moolenaar6100d022016-10-02 16:51:57 +02005163 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005164 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005165 if (MB_ISUPPER(rst.nextb))
5166 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005167 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005168 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005169 }
5170 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005171 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005172 }
5173 else
5174 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005175 rst.nextb = NUL;
5176 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005177 }
5178 if (op != BRACE_SIMPLE)
5179 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005180 rst.minval = (op == STAR) ? 0 : 1;
5181 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005182 }
5183 else
5184 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005185 rst.minval = bl_minval;
5186 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005187 }
5188
5189 /*
5190 * When maxval > minval, try matching as much as possible, up
5191 * to maxval. When maxval < minval, try matching at least the
5192 * minimal number (since the range is backwards, that's also
5193 * maxval!).
5194 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005195 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005196 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005197 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005198 status = RA_FAIL;
5199 break;
5200 }
5201 if (rst.minval <= rst.maxval
5202 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5203 {
5204 /* It could match. Prepare for trying to match what
5205 * follows. The code is below. Parameters are stored in
5206 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005207 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005208 {
5209 EMSG(_(e_maxmempat));
5210 status = RA_FAIL;
5211 }
5212 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005213 status = RA_FAIL;
5214 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005215 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005216 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005217 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005218 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005219 if (rp == NULL)
5220 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005221 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005222 {
5223 *(((regstar_T *)rp) - 1) = rst;
5224 status = RA_BREAK; /* skip the restore bits */
5225 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005226 }
5227 }
5228 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005229 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005230
Bram Moolenaar071d4272004-06-13 20:20:40 +00005231 }
5232 break;
5233
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005234 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005235 case MATCH:
5236 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005237 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005238 if (rp == NULL)
5239 status = RA_FAIL;
5240 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005241 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005242 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005243 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005244 next = OPERAND(scan);
5245 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005246 }
5247 break;
5248
5249 case BEHIND:
5250 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005251 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005252 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005253 {
5254 EMSG(_(e_maxmempat));
5255 status = RA_FAIL;
5256 }
5257 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005258 status = RA_FAIL;
5259 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005260 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005261 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005262 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005263 if (rp == NULL)
5264 status = RA_FAIL;
5265 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005266 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005267 /* Need to save the subexpr to be able to restore them
5268 * when there is a match but we don't use it. */
5269 save_subexpr(((regbehind_T *)rp) - 1);
5270
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005271 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005272 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005273 /* First try if what follows matches. If it does then we
5274 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005275 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005276 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005277 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005278
5279 case BHPOS:
5280 if (REG_MULTI)
5281 {
5282 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5283 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005284 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005285 }
5286 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005287 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005288 break;
5289
5290 case NEWL:
Bram Moolenaar6100d022016-10-02 16:51:57 +02005291 if ((c != NUL || !REG_MULTI || reglnum > rex.reg_maxline
5292 || rex.reg_line_lbr)
5293 && (c != '\n' || !rex.reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005294 status = RA_NOMATCH;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005295 else if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005296 ADVANCE_REGINPUT();
5297 else
5298 reg_nextline();
5299 break;
5300
5301 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005302 status = RA_MATCH; /* Success! */
5303 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005304
5305 default:
5306 EMSG(_(e_re_corr));
5307#ifdef DEBUG
5308 printf("Illegal op code %d\n", op);
5309#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005310 status = RA_FAIL;
5311 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005312 }
5313 }
5314
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005315 /* If we can't continue sequentially, break the inner loop. */
5316 if (status != RA_CONT)
5317 break;
5318
5319 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005320 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005321
5322 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005323
5324 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005325 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005326 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005327 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005328 while (regstack.ga_len > 0 && status != RA_FAIL)
5329 {
5330 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5331 switch (rp->rs_state)
5332 {
5333 case RS_NOPEN:
5334 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005335 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005336 break;
5337
5338 case RS_MOPEN:
5339 /* Pop the state. Restore pointers when there is no match. */
5340 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005341 restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
5342 &rex.reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005343 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005344 break;
5345
5346#ifdef FEAT_SYN_HL
5347 case RS_ZOPEN:
5348 /* Pop the state. Restore pointers when there is no match. */
5349 if (status == RA_NOMATCH)
5350 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5351 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005352 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005353 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005354#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005355
5356 case RS_MCLOSE:
5357 /* Pop the state. Restore pointers when there is no match. */
5358 if (status == RA_NOMATCH)
Bram Moolenaar6100d022016-10-02 16:51:57 +02005359 restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
5360 &rex.reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005361 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005362 break;
5363
5364#ifdef FEAT_SYN_HL
5365 case RS_ZCLOSE:
5366 /* Pop the state. Restore pointers when there is no match. */
5367 if (status == RA_NOMATCH)
5368 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5369 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005370 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005371 break;
5372#endif
5373
5374 case RS_BRANCH:
5375 if (status == RA_MATCH)
5376 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005377 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005378 else
5379 {
5380 if (status != RA_BREAK)
5381 {
5382 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005383 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005384 scan = rp->rs_scan;
5385 }
5386 if (scan == NULL || OP(scan) != BRANCH)
5387 {
5388 /* no more branches, didn't find a match */
5389 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005390 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005391 }
5392 else
5393 {
5394 /* Prepare to try a branch. */
5395 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005396 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005397 scan = OPERAND(scan);
5398 }
5399 }
5400 break;
5401
5402 case RS_BRCPLX_MORE:
5403 /* Pop the state. Restore pointers when there is no match. */
5404 if (status == RA_NOMATCH)
5405 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005406 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005407 --brace_count[rp->rs_no]; /* decrement match count */
5408 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005409 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005410 break;
5411
5412 case RS_BRCPLX_LONG:
5413 /* Pop the state. Restore pointers when there is no match. */
5414 if (status == RA_NOMATCH)
5415 {
5416 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005417 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005418 --brace_count[rp->rs_no];
5419 /* continue with the items after "\{}" */
5420 status = RA_CONT;
5421 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005422 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005423 if (status == RA_CONT)
5424 scan = regnext(scan);
5425 break;
5426
5427 case RS_BRCPLX_SHORT:
5428 /* Pop the state. Restore pointers when there is no match. */
5429 if (status == RA_NOMATCH)
5430 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005431 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005432 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005433 if (status == RA_NOMATCH)
5434 {
5435 scan = OPERAND(scan);
5436 status = RA_CONT;
5437 }
5438 break;
5439
5440 case RS_NOMATCH:
5441 /* Pop the state. If the operand matches for NOMATCH or
5442 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5443 * except for SUBPAT, and continue with the next item. */
5444 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5445 status = RA_NOMATCH;
5446 else
5447 {
5448 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005449 if (rp->rs_no != SUBPAT) /* zero-width */
5450 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005451 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005452 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005453 if (status == RA_CONT)
5454 scan = regnext(scan);
5455 break;
5456
5457 case RS_BEHIND1:
5458 if (status == RA_NOMATCH)
5459 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005460 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005461 regstack.ga_len -= sizeof(regbehind_T);
5462 }
5463 else
5464 {
5465 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5466 * the behind part does (not) match before the current
5467 * position in the input. This must be done at every
5468 * position in the input and checking if the match ends at
5469 * the current position. */
5470
5471 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005472 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005473
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005474 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005475 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005476 * result, hitting the start of the line or the previous
5477 * line (for multi-line matching).
5478 * Set behind_pos to where the match should end, BHPOS
5479 * will match it. Save the current value. */
5480 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5481 behind_pos = rp->rs_un.regsave;
5482
5483 rp->rs_state = RS_BEHIND2;
5484
Bram Moolenaar582fd852005-03-28 20:58:01 +00005485 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005486 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005487 }
5488 break;
5489
5490 case RS_BEHIND2:
5491 /*
5492 * Looping for BEHIND / NOBEHIND match.
5493 */
5494 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5495 {
5496 /* found a match that ends where "next" started */
5497 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5498 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005499 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5500 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005501 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005502 {
5503 /* But we didn't want a match. Need to restore the
5504 * subexpr, because what follows matched, so they have
5505 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005506 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005507 restore_subexpr(((regbehind_T *)rp) - 1);
5508 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005509 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005510 regstack.ga_len -= sizeof(regbehind_T);
5511 }
5512 else
5513 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005514 long limit;
5515
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005516 /* No match or a match that doesn't end where we want it: Go
5517 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005518 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005519 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005520 if (REG_MULTI)
5521 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005522 if (limit > 0
5523 && ((rp->rs_un.regsave.rs_u.pos.lnum
5524 < behind_pos.rs_u.pos.lnum
5525 ? (colnr_T)STRLEN(regline)
5526 : behind_pos.rs_u.pos.col)
5527 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5528 no = FAIL;
5529 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005530 {
5531 if (rp->rs_un.regsave.rs_u.pos.lnum
5532 < behind_pos.rs_u.pos.lnum
5533 || reg_getline(
5534 --rp->rs_un.regsave.rs_u.pos.lnum)
5535 == NULL)
5536 no = FAIL;
5537 else
5538 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005539 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005540 rp->rs_un.regsave.rs_u.pos.col =
5541 (colnr_T)STRLEN(regline);
5542 }
5543 }
5544 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005545 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005546#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005547 if (has_mbyte)
5548 rp->rs_un.regsave.rs_u.pos.col -=
5549 (*mb_head_off)(regline, regline
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005550 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005551 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005552#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005553 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005554 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005555 }
5556 else
5557 {
5558 if (rp->rs_un.regsave.rs_u.ptr == regline)
5559 no = FAIL;
5560 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005561 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005562 MB_PTR_BACK(regline, rp->rs_un.regsave.rs_u.ptr);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005563 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5564 - rp->rs_un.regsave.rs_u.ptr) > limit)
5565 no = FAIL;
5566 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005567 }
5568 if (no == OK)
5569 {
5570 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005571 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005572 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005573 if (status == RA_MATCH)
5574 {
5575 /* We did match, so subexpr may have been changed,
5576 * need to restore them for the next try. */
5577 status = RA_NOMATCH;
5578 restore_subexpr(((regbehind_T *)rp) - 1);
5579 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005580 }
5581 else
5582 {
5583 /* Can't advance. For NOBEHIND that's a match. */
5584 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5585 if (rp->rs_no == NOBEHIND)
5586 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005587 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5588 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005589 status = RA_MATCH;
5590 }
5591 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005592 {
5593 /* We do want a proper match. Need to restore the
5594 * subexpr if we had a match, because they may have
5595 * been set. */
5596 if (status == RA_MATCH)
5597 {
5598 status = RA_NOMATCH;
5599 restore_subexpr(((regbehind_T *)rp) - 1);
5600 }
5601 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005602 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005603 regstack.ga_len -= sizeof(regbehind_T);
5604 }
5605 }
5606 break;
5607
5608 case RS_STAR_LONG:
5609 case RS_STAR_SHORT:
5610 {
5611 regstar_T *rst = ((regstar_T *)rp) - 1;
5612
5613 if (status == RA_MATCH)
5614 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005615 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005616 regstack.ga_len -= sizeof(regstar_T);
5617 break;
5618 }
5619
5620 /* Tried once already, restore input pointers. */
5621 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005622 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005623
5624 /* Repeat until we found a position where it could match. */
5625 for (;;)
5626 {
5627 if (status != RA_BREAK)
5628 {
5629 /* Tried first position already, advance. */
5630 if (rp->rs_state == RS_STAR_LONG)
5631 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005632 /* Trying for longest match, but couldn't or
5633 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005634 if (--rst->count < rst->minval)
5635 break;
5636 if (reginput == regline)
5637 {
5638 /* backup to last char of previous line */
5639 --reglnum;
5640 regline = reg_getline(reglnum);
5641 /* Just in case regrepeat() didn't count
5642 * right. */
5643 if (regline == NULL)
5644 break;
5645 reginput = regline + STRLEN(regline);
5646 fast_breakcheck();
5647 }
5648 else
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005649 MB_PTR_BACK(regline, reginput);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005650 }
5651 else
5652 {
5653 /* Range is backwards, use shortest match first.
5654 * Careful: maxval and minval are exchanged!
5655 * Couldn't or didn't match: try advancing one
5656 * char. */
5657 if (rst->count == rst->minval
5658 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5659 break;
5660 ++rst->count;
5661 }
5662 if (got_int)
5663 break;
5664 }
5665 else
5666 status = RA_NOMATCH;
5667
5668 /* If it could match, try it. */
5669 if (rst->nextb == NUL || *reginput == rst->nextb
5670 || *reginput == rst->nextb_ic)
5671 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005672 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005673 scan = regnext(rp->rs_scan);
5674 status = RA_CONT;
5675 break;
5676 }
5677 }
5678 if (status != RA_CONT)
5679 {
5680 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005681 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005682 regstack.ga_len -= sizeof(regstar_T);
5683 status = RA_NOMATCH;
5684 }
5685 }
5686 break;
5687 }
5688
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005689 /* If we want to continue the inner loop or didn't pop a state
5690 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005691 if (status == RA_CONT || rp == (regitem_T *)
5692 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5693 break;
5694 }
5695
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005696 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005697 if (status == RA_CONT)
5698 continue;
5699
5700 /*
5701 * If the regstack is empty or something failed we are done.
5702 */
5703 if (regstack.ga_len == 0 || status == RA_FAIL)
5704 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005705 if (scan == NULL)
5706 {
5707 /*
5708 * We get here only if there's trouble -- normally "case END" is
5709 * the terminating point.
5710 */
5711 EMSG(_(e_re_corr));
5712#ifdef DEBUG
5713 printf("Premature EOL\n");
5714#endif
5715 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005716 if (status == RA_FAIL)
5717 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005718 return (status == RA_MATCH);
5719 }
5720
5721 } /* End of loop until the regstack is empty. */
5722
5723 /* NOTREACHED */
5724}
5725
5726/*
5727 * Push an item onto the regstack.
5728 * Returns pointer to new item. Returns NULL when out of memory.
5729 */
5730 static regitem_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01005731regstack_push(regstate_T state, char_u *scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005732{
5733 regitem_T *rp;
5734
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005735 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005736 {
5737 EMSG(_(e_maxmempat));
5738 return NULL;
5739 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005740 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005741 return NULL;
5742
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005743 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005744 rp->rs_state = state;
5745 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005746
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005747 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005748 return rp;
5749}
5750
5751/*
5752 * Pop an item from the regstack.
5753 */
5754 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01005755regstack_pop(char_u **scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005756{
5757 regitem_T *rp;
5758
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005759 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005760 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005761
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005762 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005763}
5764
Bram Moolenaar071d4272004-06-13 20:20:40 +00005765/*
5766 * regrepeat - repeatedly match something simple, return how many.
5767 * Advances reginput (and reglnum) to just after the matched chars.
5768 */
5769 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01005770regrepeat(
5771 char_u *p,
5772 long maxcount) /* maximum number of matches allowed */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005773{
5774 long count = 0;
5775 char_u *scan;
5776 char_u *opnd;
5777 int mask;
5778 int testval = 0;
5779
5780 scan = reginput; /* Make local copy of reginput for speed. */
5781 opnd = OPERAND(p);
5782 switch (OP(p))
5783 {
5784 case ANY:
5785 case ANY + ADD_NL:
5786 while (count < maxcount)
5787 {
5788 /* Matching anything means we continue until end-of-line (or
5789 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5790 while (*scan != NUL && count < maxcount)
5791 {
5792 ++count;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005793 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005794 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005795 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5796 || rex.reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005797 break;
5798 ++count; /* count the line-break */
5799 reg_nextline();
5800 scan = reginput;
5801 if (got_int)
5802 break;
5803 }
5804 break;
5805
5806 case IDENT:
5807 case IDENT + ADD_NL:
5808 testval = TRUE;
5809 /*FALLTHROUGH*/
5810 case SIDENT:
5811 case SIDENT + ADD_NL:
5812 while (count < maxcount)
5813 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005814 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005815 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005816 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005817 }
5818 else if (*scan == NUL)
5819 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005820 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5821 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005822 break;
5823 reg_nextline();
5824 scan = reginput;
5825 if (got_int)
5826 break;
5827 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005828 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005829 ++scan;
5830 else
5831 break;
5832 ++count;
5833 }
5834 break;
5835
5836 case KWORD:
5837 case KWORD + ADD_NL:
5838 testval = TRUE;
5839 /*FALLTHROUGH*/
5840 case SKWORD:
5841 case SKWORD + ADD_NL:
5842 while (count < maxcount)
5843 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005844 if (vim_iswordp_buf(scan, rex.reg_buf)
Bram Moolenaarf813a182013-01-30 13:59:37 +01005845 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005846 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005847 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005848 }
5849 else if (*scan == NUL)
5850 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005851 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5852 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005853 break;
5854 reg_nextline();
5855 scan = reginput;
5856 if (got_int)
5857 break;
5858 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005859 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005860 ++scan;
5861 else
5862 break;
5863 ++count;
5864 }
5865 break;
5866
5867 case FNAME:
5868 case FNAME + ADD_NL:
5869 testval = TRUE;
5870 /*FALLTHROUGH*/
5871 case SFNAME:
5872 case SFNAME + ADD_NL:
5873 while (count < maxcount)
5874 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005875 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005876 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005877 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005878 }
5879 else if (*scan == NUL)
5880 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005881 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5882 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005883 break;
5884 reg_nextline();
5885 scan = reginput;
5886 if (got_int)
5887 break;
5888 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005889 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005890 ++scan;
5891 else
5892 break;
5893 ++count;
5894 }
5895 break;
5896
5897 case PRINT:
5898 case PRINT + ADD_NL:
5899 testval = TRUE;
5900 /*FALLTHROUGH*/
5901 case SPRINT:
5902 case SPRINT + ADD_NL:
5903 while (count < maxcount)
5904 {
5905 if (*scan == NUL)
5906 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005907 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5908 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005909 break;
5910 reg_nextline();
5911 scan = reginput;
5912 if (got_int)
5913 break;
5914 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005915 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5916 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005917 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005918 MB_PTR_ADV(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005919 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02005920 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005921 ++scan;
5922 else
5923 break;
5924 ++count;
5925 }
5926 break;
5927
5928 case WHITE:
5929 case WHITE + ADD_NL:
5930 testval = mask = RI_WHITE;
5931do_class:
5932 while (count < maxcount)
5933 {
5934#ifdef FEAT_MBYTE
5935 int l;
5936#endif
5937 if (*scan == NUL)
5938 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02005939 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
5940 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005941 break;
5942 reg_nextline();
5943 scan = reginput;
5944 if (got_int)
5945 break;
5946 }
5947#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005948 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005949 {
5950 if (testval != 0)
5951 break;
5952 scan += l;
5953 }
5954#endif
5955 else if ((class_tab[*scan] & mask) == testval)
5956 ++scan;
Bram Moolenaar6100d022016-10-02 16:51:57 +02005957 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005958 ++scan;
5959 else
5960 break;
5961 ++count;
5962 }
5963 break;
5964
5965 case NWHITE:
5966 case NWHITE + ADD_NL:
5967 mask = RI_WHITE;
5968 goto do_class;
5969 case DIGIT:
5970 case DIGIT + ADD_NL:
5971 testval = mask = RI_DIGIT;
5972 goto do_class;
5973 case NDIGIT:
5974 case NDIGIT + ADD_NL:
5975 mask = RI_DIGIT;
5976 goto do_class;
5977 case HEX:
5978 case HEX + ADD_NL:
5979 testval = mask = RI_HEX;
5980 goto do_class;
5981 case NHEX:
5982 case NHEX + ADD_NL:
5983 mask = RI_HEX;
5984 goto do_class;
5985 case OCTAL:
5986 case OCTAL + ADD_NL:
5987 testval = mask = RI_OCTAL;
5988 goto do_class;
5989 case NOCTAL:
5990 case NOCTAL + ADD_NL:
5991 mask = RI_OCTAL;
5992 goto do_class;
5993 case WORD:
5994 case WORD + ADD_NL:
5995 testval = mask = RI_WORD;
5996 goto do_class;
5997 case NWORD:
5998 case NWORD + ADD_NL:
5999 mask = RI_WORD;
6000 goto do_class;
6001 case HEAD:
6002 case HEAD + ADD_NL:
6003 testval = mask = RI_HEAD;
6004 goto do_class;
6005 case NHEAD:
6006 case NHEAD + ADD_NL:
6007 mask = RI_HEAD;
6008 goto do_class;
6009 case ALPHA:
6010 case ALPHA + ADD_NL:
6011 testval = mask = RI_ALPHA;
6012 goto do_class;
6013 case NALPHA:
6014 case NALPHA + ADD_NL:
6015 mask = RI_ALPHA;
6016 goto do_class;
6017 case LOWER:
6018 case LOWER + ADD_NL:
6019 testval = mask = RI_LOWER;
6020 goto do_class;
6021 case NLOWER:
6022 case NLOWER + ADD_NL:
6023 mask = RI_LOWER;
6024 goto do_class;
6025 case UPPER:
6026 case UPPER + ADD_NL:
6027 testval = mask = RI_UPPER;
6028 goto do_class;
6029 case NUPPER:
6030 case NUPPER + ADD_NL:
6031 mask = RI_UPPER;
6032 goto do_class;
6033
6034 case EXACTLY:
6035 {
6036 int cu, cl;
6037
6038 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006039 * would have been used for it. It does handle single-byte
6040 * characters, such as latin1. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006041 if (rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006042 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006043 cu = MB_TOUPPER(*opnd);
6044 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006045 while (count < maxcount && (*scan == cu || *scan == cl))
6046 {
6047 count++;
6048 scan++;
6049 }
6050 }
6051 else
6052 {
6053 cu = *opnd;
6054 while (count < maxcount && *scan == cu)
6055 {
6056 count++;
6057 scan++;
6058 }
6059 }
6060 break;
6061 }
6062
6063#ifdef FEAT_MBYTE
6064 case MULTIBYTECODE:
6065 {
6066 int i, len, cf = 0;
6067
6068 /* Safety check (just in case 'encoding' was changed since
6069 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006070 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006071 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006072 if (rex.reg_ic && enc_utf8)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006073 cf = utf_fold(utf_ptr2char(opnd));
Bram Moolenaar069dd082015-05-04 09:56:49 +02006074 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006075 {
6076 for (i = 0; i < len; ++i)
6077 if (opnd[i] != scan[i])
6078 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006079 if (i < len && (!rex.reg_ic || !enc_utf8
Bram Moolenaar071d4272004-06-13 20:20:40 +00006080 || utf_fold(utf_ptr2char(scan)) != cf))
6081 break;
6082 scan += len;
6083 ++count;
6084 }
6085 }
6086 }
6087 break;
6088#endif
6089
6090 case ANYOF:
6091 case ANYOF + ADD_NL:
6092 testval = TRUE;
6093 /*FALLTHROUGH*/
6094
6095 case ANYBUT:
6096 case ANYBUT + ADD_NL:
6097 while (count < maxcount)
6098 {
6099#ifdef FEAT_MBYTE
6100 int len;
6101#endif
6102 if (*scan == NUL)
6103 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006104 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline
6105 || rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006106 break;
6107 reg_nextline();
6108 scan = reginput;
6109 if (got_int)
6110 break;
6111 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02006112 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006113 ++scan;
6114#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006115 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006116 {
6117 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6118 break;
6119 scan += len;
6120 }
6121#endif
6122 else
6123 {
6124 if ((cstrchr(opnd, *scan) == NULL) == testval)
6125 break;
6126 ++scan;
6127 }
6128 ++count;
6129 }
6130 break;
6131
6132 case NEWL:
6133 while (count < maxcount
Bram Moolenaar6100d022016-10-02 16:51:57 +02006134 && ((*scan == NUL && reglnum <= rex.reg_maxline
6135 && !rex.reg_line_lbr && REG_MULTI)
6136 || (*scan == '\n' && rex.reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006137 {
6138 count++;
Bram Moolenaar6100d022016-10-02 16:51:57 +02006139 if (rex.reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006140 ADVANCE_REGINPUT();
6141 else
6142 reg_nextline();
6143 scan = reginput;
6144 if (got_int)
6145 break;
6146 }
6147 break;
6148
6149 default: /* Oh dear. Called inappropriately. */
6150 EMSG(_(e_re_corr));
6151#ifdef DEBUG
6152 printf("Called regrepeat with op code %d\n", OP(p));
6153#endif
6154 break;
6155 }
6156
6157 reginput = scan;
6158
6159 return (int)count;
6160}
6161
6162/*
6163 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006164 * Returns NULL when calculating size, when there is no next item and when
6165 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006166 */
6167 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006168regnext(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006169{
6170 int offset;
6171
Bram Moolenaard3005802009-11-25 17:21:32 +00006172 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006173 return NULL;
6174
6175 offset = NEXT(p);
6176 if (offset == 0)
6177 return NULL;
6178
Bram Moolenaar582fd852005-03-28 20:58:01 +00006179 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006180 return p - offset;
6181 else
6182 return p + offset;
6183}
6184
6185/*
6186 * Check the regexp program for its magic number.
6187 * Return TRUE if it's wrong.
6188 */
6189 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006190prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006191{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006192 regprog_T *prog;
6193
Bram Moolenaar6100d022016-10-02 16:51:57 +02006194 prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006195 if (prog->engine == &nfa_regengine)
6196 /* For NFA matcher we don't check the magic */
6197 return FALSE;
6198
6199 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006200 {
6201 EMSG(_(e_re_corr));
6202 return TRUE;
6203 }
6204 return FALSE;
6205}
6206
6207/*
6208 * Cleanup the subexpressions, if this wasn't done yet.
6209 * This construction is used to clear the subexpressions only when they are
6210 * used (to increase speed).
6211 */
6212 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006213cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006214{
6215 if (need_clear_subexpr)
6216 {
6217 if (REG_MULTI)
6218 {
6219 /* Use 0xff to set lnum to -1 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006220 vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6221 vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006222 }
6223 else
6224 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006225 vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6226 vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006227 }
6228 need_clear_subexpr = FALSE;
6229 }
6230}
6231
6232#ifdef FEAT_SYN_HL
6233 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006234cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006235{
6236 if (need_clear_zsubexpr)
6237 {
6238 if (REG_MULTI)
6239 {
6240 /* Use 0xff to set lnum to -1 */
6241 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6242 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6243 }
6244 else
6245 {
6246 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6247 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6248 }
6249 need_clear_zsubexpr = FALSE;
6250 }
6251}
6252#endif
6253
6254/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006255 * Save the current subexpr to "bp", so that they can be restored
6256 * later by restore_subexpr().
6257 */
6258 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006259save_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006260{
6261 int i;
6262
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006263 /* When "need_clear_subexpr" is set we don't need to save the values, only
6264 * remember that this flag needs to be set again when restoring. */
6265 bp->save_need_clear_subexpr = need_clear_subexpr;
6266 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006267 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006268 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006269 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006270 if (REG_MULTI)
6271 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006272 bp->save_start[i].se_u.pos = rex.reg_startpos[i];
6273 bp->save_end[i].se_u.pos = rex.reg_endpos[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006274 }
6275 else
6276 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006277 bp->save_start[i].se_u.ptr = rex.reg_startp[i];
6278 bp->save_end[i].se_u.ptr = rex.reg_endp[i];
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006279 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006280 }
6281 }
6282}
6283
6284/*
6285 * Restore the subexpr from "bp".
6286 */
6287 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006288restore_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006289{
6290 int i;
6291
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006292 /* Only need to restore saved values when they are not to be cleared. */
6293 need_clear_subexpr = bp->save_need_clear_subexpr;
6294 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006295 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006296 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006297 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006298 if (REG_MULTI)
6299 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006300 rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
6301 rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006302 }
6303 else
6304 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02006305 rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
6306 rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006307 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006308 }
6309 }
6310}
6311
6312/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006313 * Advance reglnum, regline and reginput to the next line.
6314 */
6315 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006316reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006317{
6318 regline = reg_getline(++reglnum);
6319 reginput = regline;
6320 fast_breakcheck();
6321}
6322
6323/*
6324 * Save the input line and position in a regsave_T.
6325 */
6326 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006327reg_save(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006328{
6329 if (REG_MULTI)
6330 {
6331 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6332 save->rs_u.pos.lnum = reglnum;
6333 }
6334 else
6335 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006336 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006337}
6338
6339/*
6340 * Restore the input line and position from a regsave_T.
6341 */
6342 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006343reg_restore(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006344{
6345 if (REG_MULTI)
6346 {
6347 if (reglnum != save->rs_u.pos.lnum)
6348 {
6349 /* only call reg_getline() when the line number changed to save
6350 * a bit of time */
6351 reglnum = save->rs_u.pos.lnum;
6352 regline = reg_getline(reglnum);
6353 }
6354 reginput = regline + save->rs_u.pos.col;
6355 }
6356 else
6357 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006358 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006359}
6360
6361/*
6362 * Return TRUE if current position is equal to saved position.
6363 */
6364 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006365reg_save_equal(regsave_T *save)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006366{
6367 if (REG_MULTI)
6368 return reglnum == save->rs_u.pos.lnum
6369 && reginput == regline + save->rs_u.pos.col;
6370 return reginput == save->rs_u.ptr;
6371}
6372
6373/*
6374 * Tentatively set the sub-expression start to the current position (after
6375 * calling regmatch() they will have changed). Need to save the existing
6376 * values for when there is no match.
6377 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6378 * depending on REG_MULTI.
6379 */
6380 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006381save_se_multi(save_se_T *savep, lpos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006382{
6383 savep->se_u.pos = *posp;
6384 posp->lnum = reglnum;
6385 posp->col = (colnr_T)(reginput - regline);
6386}
6387
6388 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006389save_se_one(save_se_T *savep, char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006390{
6391 savep->se_u.ptr = *pp;
6392 *pp = reginput;
6393}
6394
6395/*
6396 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6397 */
6398 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006399re_num_cmp(long_u val, char_u *scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006400{
6401 long_u n = OPERAND_MIN(scan);
6402
6403 if (OPERAND_CMP(scan) == '>')
6404 return val > n;
6405 if (OPERAND_CMP(scan) == '<')
6406 return val < n;
6407 return val == n;
6408}
6409
Bram Moolenaar580abea2013-06-14 20:31:28 +02006410/*
6411 * Check whether a backreference matches.
6412 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006413 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6414 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006415 */
6416 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006417match_with_backref(
6418 linenr_T start_lnum,
6419 colnr_T start_col,
6420 linenr_T end_lnum,
6421 colnr_T end_col,
6422 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006423{
6424 linenr_T clnum = start_lnum;
6425 colnr_T ccol = start_col;
6426 int len;
6427 char_u *p;
6428
6429 if (bytelen != NULL)
6430 *bytelen = 0;
6431 for (;;)
6432 {
6433 /* Since getting one line may invalidate the other, need to make copy.
6434 * Slow! */
6435 if (regline != reg_tofree)
6436 {
6437 len = (int)STRLEN(regline);
6438 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6439 {
6440 len += 50; /* get some extra */
6441 vim_free(reg_tofree);
6442 reg_tofree = alloc(len);
6443 if (reg_tofree == NULL)
6444 return RA_FAIL; /* out of memory!*/
6445 reg_tofreelen = len;
6446 }
6447 STRCPY(reg_tofree, regline);
6448 reginput = reg_tofree + (reginput - regline);
6449 regline = reg_tofree;
6450 }
6451
6452 /* Get the line to compare with. */
6453 p = reg_getline(clnum);
6454 if (clnum == end_lnum)
6455 len = end_col - ccol;
6456 else
6457 len = (int)STRLEN(p + ccol);
6458
6459 if (cstrncmp(p + ccol, reginput, &len) != 0)
6460 return RA_NOMATCH; /* doesn't match */
6461 if (bytelen != NULL)
6462 *bytelen += len;
6463 if (clnum == end_lnum)
6464 break; /* match and at end! */
Bram Moolenaar6100d022016-10-02 16:51:57 +02006465 if (reglnum >= rex.reg_maxline)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006466 return RA_NOMATCH; /* text too short */
6467
6468 /* Advance to next line. */
6469 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006470 if (bytelen != NULL)
6471 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006472 ++clnum;
6473 ccol = 0;
6474 if (got_int)
6475 return RA_FAIL;
6476 }
6477
6478 /* found a match! Note that regline may now point to a copy of the line,
6479 * that should not matter. */
6480 return RA_MATCH;
6481}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006482
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006483#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006484
6485/*
6486 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6487 */
6488 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006489regdump(char_u *pattern, bt_regprog_T *r)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006490{
6491 char_u *s;
6492 int op = EXACTLY; /* Arbitrary non-END op. */
6493 char_u *next;
6494 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006495 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006496
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006497#ifdef BT_REGEXP_LOG
6498 f = fopen("bt_regexp_log.log", "a");
6499#else
6500 f = stdout;
6501#endif
6502 if (f == NULL)
6503 return;
6504 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006505
6506 s = r->program + 1;
6507 /*
6508 * Loop until we find the END that isn't before a referred next (an END
6509 * can also appear in a NOMATCH operand).
6510 */
6511 while (op != END || s <= end)
6512 {
6513 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006514 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006515 next = regnext(s);
6516 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006517 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006518 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006519 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006520 if (end < next)
6521 end = next;
6522 if (op == BRACE_LIMITS)
6523 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006524 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006525 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006526 s += 8;
6527 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006528 else if (op == BEHIND || op == NOBEHIND)
6529 {
6530 /* one int */
6531 fprintf(f, " count %ld", OPERAND_MIN(s));
6532 s += 4;
6533 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006534 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6535 {
6536 /* one int plus comperator */
6537 fprintf(f, " count %ld", OPERAND_MIN(s));
6538 s += 5;
6539 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006540 s += 3;
6541 if (op == ANYOF || op == ANYOF + ADD_NL
6542 || op == ANYBUT || op == ANYBUT + ADD_NL
6543 || op == EXACTLY)
6544 {
6545 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006546 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006547 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006548 fprintf(f, "%c", *s++);
6549 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006550 s++;
6551 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006552 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006553 }
6554
6555 /* Header fields of interest. */
6556 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006557 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006558 ? (char *)transchar(r->regstart)
6559 : "multibyte", r->regstart);
6560 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006561 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006562 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006563 fprintf(f, "must have \"%s\"", r->regmust);
6564 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006565
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006566#ifdef BT_REGEXP_LOG
6567 fclose(f);
6568#endif
6569}
6570#endif /* BT_REGEXP_DUMP */
6571
6572#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006573/*
6574 * regprop - printable representation of opcode
6575 */
6576 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006577regprop(char_u *op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006578{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006579 char *p;
6580 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006581
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006582 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006583
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006584 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006585 {
6586 case BOL:
6587 p = "BOL";
6588 break;
6589 case EOL:
6590 p = "EOL";
6591 break;
6592 case RE_BOF:
6593 p = "BOF";
6594 break;
6595 case RE_EOF:
6596 p = "EOF";
6597 break;
6598 case CURSOR:
6599 p = "CURSOR";
6600 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006601 case RE_VISUAL:
6602 p = "RE_VISUAL";
6603 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006604 case RE_LNUM:
6605 p = "RE_LNUM";
6606 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006607 case RE_MARK:
6608 p = "RE_MARK";
6609 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006610 case RE_COL:
6611 p = "RE_COL";
6612 break;
6613 case RE_VCOL:
6614 p = "RE_VCOL";
6615 break;
6616 case BOW:
6617 p = "BOW";
6618 break;
6619 case EOW:
6620 p = "EOW";
6621 break;
6622 case ANY:
6623 p = "ANY";
6624 break;
6625 case ANY + ADD_NL:
6626 p = "ANY+NL";
6627 break;
6628 case ANYOF:
6629 p = "ANYOF";
6630 break;
6631 case ANYOF + ADD_NL:
6632 p = "ANYOF+NL";
6633 break;
6634 case ANYBUT:
6635 p = "ANYBUT";
6636 break;
6637 case ANYBUT + ADD_NL:
6638 p = "ANYBUT+NL";
6639 break;
6640 case IDENT:
6641 p = "IDENT";
6642 break;
6643 case IDENT + ADD_NL:
6644 p = "IDENT+NL";
6645 break;
6646 case SIDENT:
6647 p = "SIDENT";
6648 break;
6649 case SIDENT + ADD_NL:
6650 p = "SIDENT+NL";
6651 break;
6652 case KWORD:
6653 p = "KWORD";
6654 break;
6655 case KWORD + ADD_NL:
6656 p = "KWORD+NL";
6657 break;
6658 case SKWORD:
6659 p = "SKWORD";
6660 break;
6661 case SKWORD + ADD_NL:
6662 p = "SKWORD+NL";
6663 break;
6664 case FNAME:
6665 p = "FNAME";
6666 break;
6667 case FNAME + ADD_NL:
6668 p = "FNAME+NL";
6669 break;
6670 case SFNAME:
6671 p = "SFNAME";
6672 break;
6673 case SFNAME + ADD_NL:
6674 p = "SFNAME+NL";
6675 break;
6676 case PRINT:
6677 p = "PRINT";
6678 break;
6679 case PRINT + ADD_NL:
6680 p = "PRINT+NL";
6681 break;
6682 case SPRINT:
6683 p = "SPRINT";
6684 break;
6685 case SPRINT + ADD_NL:
6686 p = "SPRINT+NL";
6687 break;
6688 case WHITE:
6689 p = "WHITE";
6690 break;
6691 case WHITE + ADD_NL:
6692 p = "WHITE+NL";
6693 break;
6694 case NWHITE:
6695 p = "NWHITE";
6696 break;
6697 case NWHITE + ADD_NL:
6698 p = "NWHITE+NL";
6699 break;
6700 case DIGIT:
6701 p = "DIGIT";
6702 break;
6703 case DIGIT + ADD_NL:
6704 p = "DIGIT+NL";
6705 break;
6706 case NDIGIT:
6707 p = "NDIGIT";
6708 break;
6709 case NDIGIT + ADD_NL:
6710 p = "NDIGIT+NL";
6711 break;
6712 case HEX:
6713 p = "HEX";
6714 break;
6715 case HEX + ADD_NL:
6716 p = "HEX+NL";
6717 break;
6718 case NHEX:
6719 p = "NHEX";
6720 break;
6721 case NHEX + ADD_NL:
6722 p = "NHEX+NL";
6723 break;
6724 case OCTAL:
6725 p = "OCTAL";
6726 break;
6727 case OCTAL + ADD_NL:
6728 p = "OCTAL+NL";
6729 break;
6730 case NOCTAL:
6731 p = "NOCTAL";
6732 break;
6733 case NOCTAL + ADD_NL:
6734 p = "NOCTAL+NL";
6735 break;
6736 case WORD:
6737 p = "WORD";
6738 break;
6739 case WORD + ADD_NL:
6740 p = "WORD+NL";
6741 break;
6742 case NWORD:
6743 p = "NWORD";
6744 break;
6745 case NWORD + ADD_NL:
6746 p = "NWORD+NL";
6747 break;
6748 case HEAD:
6749 p = "HEAD";
6750 break;
6751 case HEAD + ADD_NL:
6752 p = "HEAD+NL";
6753 break;
6754 case NHEAD:
6755 p = "NHEAD";
6756 break;
6757 case NHEAD + ADD_NL:
6758 p = "NHEAD+NL";
6759 break;
6760 case ALPHA:
6761 p = "ALPHA";
6762 break;
6763 case ALPHA + ADD_NL:
6764 p = "ALPHA+NL";
6765 break;
6766 case NALPHA:
6767 p = "NALPHA";
6768 break;
6769 case NALPHA + ADD_NL:
6770 p = "NALPHA+NL";
6771 break;
6772 case LOWER:
6773 p = "LOWER";
6774 break;
6775 case LOWER + ADD_NL:
6776 p = "LOWER+NL";
6777 break;
6778 case NLOWER:
6779 p = "NLOWER";
6780 break;
6781 case NLOWER + ADD_NL:
6782 p = "NLOWER+NL";
6783 break;
6784 case UPPER:
6785 p = "UPPER";
6786 break;
6787 case UPPER + ADD_NL:
6788 p = "UPPER+NL";
6789 break;
6790 case NUPPER:
6791 p = "NUPPER";
6792 break;
6793 case NUPPER + ADD_NL:
6794 p = "NUPPER+NL";
6795 break;
6796 case BRANCH:
6797 p = "BRANCH";
6798 break;
6799 case EXACTLY:
6800 p = "EXACTLY";
6801 break;
6802 case NOTHING:
6803 p = "NOTHING";
6804 break;
6805 case BACK:
6806 p = "BACK";
6807 break;
6808 case END:
6809 p = "END";
6810 break;
6811 case MOPEN + 0:
6812 p = "MATCH START";
6813 break;
6814 case MOPEN + 1:
6815 case MOPEN + 2:
6816 case MOPEN + 3:
6817 case MOPEN + 4:
6818 case MOPEN + 5:
6819 case MOPEN + 6:
6820 case MOPEN + 7:
6821 case MOPEN + 8:
6822 case MOPEN + 9:
6823 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6824 p = NULL;
6825 break;
6826 case MCLOSE + 0:
6827 p = "MATCH END";
6828 break;
6829 case MCLOSE + 1:
6830 case MCLOSE + 2:
6831 case MCLOSE + 3:
6832 case MCLOSE + 4:
6833 case MCLOSE + 5:
6834 case MCLOSE + 6:
6835 case MCLOSE + 7:
6836 case MCLOSE + 8:
6837 case MCLOSE + 9:
6838 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6839 p = NULL;
6840 break;
6841 case BACKREF + 1:
6842 case BACKREF + 2:
6843 case BACKREF + 3:
6844 case BACKREF + 4:
6845 case BACKREF + 5:
6846 case BACKREF + 6:
6847 case BACKREF + 7:
6848 case BACKREF + 8:
6849 case BACKREF + 9:
6850 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6851 p = NULL;
6852 break;
6853 case NOPEN:
6854 p = "NOPEN";
6855 break;
6856 case NCLOSE:
6857 p = "NCLOSE";
6858 break;
6859#ifdef FEAT_SYN_HL
6860 case ZOPEN + 1:
6861 case ZOPEN + 2:
6862 case ZOPEN + 3:
6863 case ZOPEN + 4:
6864 case ZOPEN + 5:
6865 case ZOPEN + 6:
6866 case ZOPEN + 7:
6867 case ZOPEN + 8:
6868 case ZOPEN + 9:
6869 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6870 p = NULL;
6871 break;
6872 case ZCLOSE + 1:
6873 case ZCLOSE + 2:
6874 case ZCLOSE + 3:
6875 case ZCLOSE + 4:
6876 case ZCLOSE + 5:
6877 case ZCLOSE + 6:
6878 case ZCLOSE + 7:
6879 case ZCLOSE + 8:
6880 case ZCLOSE + 9:
6881 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6882 p = NULL;
6883 break;
6884 case ZREF + 1:
6885 case ZREF + 2:
6886 case ZREF + 3:
6887 case ZREF + 4:
6888 case ZREF + 5:
6889 case ZREF + 6:
6890 case ZREF + 7:
6891 case ZREF + 8:
6892 case ZREF + 9:
6893 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6894 p = NULL;
6895 break;
6896#endif
6897 case STAR:
6898 p = "STAR";
6899 break;
6900 case PLUS:
6901 p = "PLUS";
6902 break;
6903 case NOMATCH:
6904 p = "NOMATCH";
6905 break;
6906 case MATCH:
6907 p = "MATCH";
6908 break;
6909 case BEHIND:
6910 p = "BEHIND";
6911 break;
6912 case NOBEHIND:
6913 p = "NOBEHIND";
6914 break;
6915 case SUBPAT:
6916 p = "SUBPAT";
6917 break;
6918 case BRACE_LIMITS:
6919 p = "BRACE_LIMITS";
6920 break;
6921 case BRACE_SIMPLE:
6922 p = "BRACE_SIMPLE";
6923 break;
6924 case BRACE_COMPLEX + 0:
6925 case BRACE_COMPLEX + 1:
6926 case BRACE_COMPLEX + 2:
6927 case BRACE_COMPLEX + 3:
6928 case BRACE_COMPLEX + 4:
6929 case BRACE_COMPLEX + 5:
6930 case BRACE_COMPLEX + 6:
6931 case BRACE_COMPLEX + 7:
6932 case BRACE_COMPLEX + 8:
6933 case BRACE_COMPLEX + 9:
6934 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6935 p = NULL;
6936 break;
6937#ifdef FEAT_MBYTE
6938 case MULTIBYTECODE:
6939 p = "MULTIBYTECODE";
6940 break;
6941#endif
6942 case NEWL:
6943 p = "NEWL";
6944 break;
6945 default:
6946 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6947 p = NULL;
6948 break;
6949 }
6950 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006951 STRCAT(buf, p);
6952 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006953}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006954#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006955
Bram Moolenaarfb031402014-09-09 17:18:49 +02006956/*
6957 * Used in a place where no * or \+ can follow.
6958 */
6959 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006960re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02006961{
6962 if (re_multi_type(peekchr()) == MULTI_MULT)
6963 EMSG2_RET_FAIL(_("E888: (NFA regexp) cannot repeat %s"), what);
6964 return OK;
6965}
6966
Bram Moolenaar071d4272004-06-13 20:20:40 +00006967#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01006968static void mb_decompose(int c, int *c1, int *c2, int *c3);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006969
6970typedef struct
6971{
6972 int a, b, c;
6973} decomp_T;
6974
6975
6976/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006977static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006978{
6979 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6980 {0x5d0,0,0}, /* 0xfb21 alt alef */
6981 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6982 {0x5d4,0,0}, /* 0xfb23 alt he */
6983 {0x5db,0,0}, /* 0xfb24 alt kaf */
6984 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6985 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6986 {0x5e8,0,0}, /* 0xfb27 alt resh */
6987 {0x5ea,0,0}, /* 0xfb28 alt tav */
6988 {'+', 0, 0}, /* 0xfb29 alt plus */
6989 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6990 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6991 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6992 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6993 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6994 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6995 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6996 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6997 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6998 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6999 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
7000 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
7001 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
7002 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
7003 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
7004 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
7005 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
7006 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
7007 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
7008 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
7009 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
7010 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
7011 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
7012 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
7013 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
7014 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
7015 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
7016 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
7017 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
7018 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
7019 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7020 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7021 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7022 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7023 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7024 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7025 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7026 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7027};
7028
7029 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01007030mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007031{
7032 decomp_T d;
7033
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007034 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007035 {
7036 d = decomp_table[c - 0xfb20];
7037 *c1 = d.a;
7038 *c2 = d.b;
7039 *c3 = d.c;
7040 }
7041 else
7042 {
7043 *c1 = c;
7044 *c2 = *c3 = 0;
7045 }
7046}
7047#endif
7048
7049/*
Bram Moolenaar6100d022016-10-02 16:51:57 +02007050 * Compare two strings, ignore case if rex.reg_ic set.
Bram Moolenaar071d4272004-06-13 20:20:40 +00007051 * Return 0 if strings match, non-zero otherwise.
7052 * Correct the length "*n" when composing characters are ignored.
7053 */
7054 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007055cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007056{
7057 int result;
7058
Bram Moolenaar6100d022016-10-02 16:51:57 +02007059 if (!rex.reg_ic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007060 result = STRNCMP(s1, s2, *n);
7061 else
7062 result = MB_STRNICMP(s1, s2, *n);
7063
7064#ifdef FEAT_MBYTE
7065 /* if it failed and it's utf8 and we want to combineignore: */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007066 if (result != 0 && enc_utf8 && rex.reg_icombine)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007067 {
7068 char_u *str1, *str2;
7069 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007070 int junk;
7071
7072 /* we have to handle the strcmp ourselves, since it is necessary to
7073 * deal with the composing characters by ignoring them: */
7074 str1 = s1;
7075 str2 = s2;
7076 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007077 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007078 {
7079 c1 = mb_ptr2char_adv(&str1);
7080 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007081
7082 /* decompose the character if necessary, into 'base' characters
7083 * because I don't care about Arabic, I will hard-code the Hebrew
7084 * which I *do* care about! So sue me... */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007085 if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007086 {
7087 /* decomposition necessary? */
7088 mb_decompose(c1, &c11, &junk, &junk);
7089 mb_decompose(c2, &c12, &junk, &junk);
7090 c1 = c11;
7091 c2 = c12;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007092 if (c11 != c12
7093 && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007094 break;
7095 }
7096 }
7097 result = c2 - c1;
7098 if (result == 0)
7099 *n = (int)(str2 - s2);
7100 }
7101#endif
7102
7103 return result;
7104}
7105
7106/*
7107 * cstrchr: This function is used a lot for simple searches, keep it fast!
7108 */
7109 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007110cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007111{
7112 char_u *p;
7113 int cc;
7114
Bram Moolenaar6100d022016-10-02 16:51:57 +02007115 if (!rex.reg_ic
Bram Moolenaar071d4272004-06-13 20:20:40 +00007116#ifdef FEAT_MBYTE
7117 || (!enc_utf8 && mb_char2len(c) > 1)
7118#endif
7119 )
7120 return vim_strchr(s, c);
7121
7122 /* tolower() and toupper() can be slow, comparing twice should be a lot
7123 * faster (esp. when using MS Visual C++!).
7124 * For UTF-8 need to use folded case. */
7125#ifdef FEAT_MBYTE
7126 if (enc_utf8 && c > 0x80)
7127 cc = utf_fold(c);
7128 else
7129#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007130 if (MB_ISUPPER(c))
7131 cc = MB_TOLOWER(c);
7132 else if (MB_ISLOWER(c))
7133 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007134 else
7135 return vim_strchr(s, c);
7136
7137#ifdef FEAT_MBYTE
7138 if (has_mbyte)
7139 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007140 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007141 {
7142 if (enc_utf8 && c > 0x80)
7143 {
7144 if (utf_fold(utf_ptr2char(p)) == cc)
7145 return p;
7146 }
7147 else if (*p == c || *p == cc)
7148 return p;
7149 }
7150 }
7151 else
7152#endif
7153 /* Faster version for when there are no multi-byte characters. */
7154 for (p = s; *p != NUL; ++p)
7155 if (*p == c || *p == cc)
7156 return p;
7157
7158 return NULL;
7159}
7160
7161/***************************************************************
7162 * regsub stuff *
7163 ***************************************************************/
7164
Bram Moolenaar071d4272004-06-13 20:20:40 +00007165/*
7166 * We should define ftpr as a pointer to a function returning a pointer to
7167 * a function returning a pointer to a function ...
7168 * This is impossible, so we declare a pointer to a function returning a
7169 * pointer to a function returning void. This should work for all compilers.
7170 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007171typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007172
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007173static fptr_T do_upper(int *, int);
7174static fptr_T do_Upper(int *, int);
7175static fptr_T do_lower(int *, int);
7176static fptr_T do_Lower(int *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007177
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007178static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007179
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007180 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007181do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007182{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007183 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007184
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007185 return (fptr_T)NULL;
7186}
7187
7188 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007189do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007190{
7191 *d = MB_TOUPPER(c);
7192
7193 return (fptr_T)do_Upper;
7194}
7195
7196 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007197do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007198{
7199 *d = MB_TOLOWER(c);
7200
7201 return (fptr_T)NULL;
7202}
7203
7204 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007205do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007206{
7207 *d = MB_TOLOWER(c);
7208
7209 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007210}
7211
7212/*
7213 * regtilde(): Replace tildes in the pattern by the old pattern.
7214 *
7215 * Short explanation of the tilde: It stands for the previous replacement
7216 * pattern. If that previous pattern also contains a ~ we should go back a
7217 * step further... But we insert the previous pattern into the current one
7218 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007219 * This still does not handle the case where "magic" changes. So require the
7220 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007221 *
7222 * The tildes are parsed once before the first call to vim_regsub().
7223 */
7224 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007225regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007226{
7227 char_u *newsub = source;
7228 char_u *tmpsub;
7229 char_u *p;
7230 int len;
7231 int prevlen;
7232
7233 for (p = newsub; *p; ++p)
7234 {
7235 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7236 {
7237 if (reg_prev_sub != NULL)
7238 {
7239 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7240 prevlen = (int)STRLEN(reg_prev_sub);
7241 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7242 if (tmpsub != NULL)
7243 {
7244 /* copy prefix */
7245 len = (int)(p - newsub); /* not including ~ */
7246 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007247 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007248 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7249 /* copy postfix */
7250 if (!magic)
7251 ++p; /* back off \ */
7252 STRCPY(tmpsub + len + prevlen, p + 1);
7253
7254 if (newsub != source) /* already allocated newsub */
7255 vim_free(newsub);
7256 newsub = tmpsub;
7257 p = newsub + len + prevlen;
7258 }
7259 }
7260 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007261 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007262 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007263 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007264 --p;
7265 }
7266 else
7267 {
7268 if (*p == '\\' && p[1]) /* skip escaped characters */
7269 ++p;
7270#ifdef FEAT_MBYTE
7271 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007272 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007273#endif
7274 }
7275 }
7276
7277 vim_free(reg_prev_sub);
7278 if (newsub != source) /* newsub was allocated, just keep it */
7279 reg_prev_sub = newsub;
7280 else /* no ~ found, need to save newsub */
7281 reg_prev_sub = vim_strsave(newsub);
7282 return newsub;
7283}
7284
7285#ifdef FEAT_EVAL
7286static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7287
Bram Moolenaar6100d022016-10-02 16:51:57 +02007288/* These pointers are used for reg_submatch(). Needed for when the
7289 * substitution string is an expression that contains a call to substitute()
7290 * and submatch(). */
7291typedef struct {
7292 regmatch_T *sm_match;
7293 regmmatch_T *sm_mmatch;
7294 linenr_T sm_firstlnum;
7295 linenr_T sm_maxline;
7296 int sm_line_lbr;
7297} regsubmatch_T;
7298
7299static regsubmatch_T rsm; /* can only be used when can_f_submatch is TRUE */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007300#endif
7301
7302#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007303
7304/*
7305 * Put the submatches in "argv[0]" which is a list passed into call_func() by
7306 * vim_regsub_both().
7307 */
7308 static int
7309fill_submatch_list(int argc UNUSED, typval_T *argv, int argcount)
7310{
7311 listitem_T *li;
7312 int i;
7313 char_u *s;
7314
7315 if (argcount == 0)
7316 /* called function doesn't take an argument */
7317 return 0;
7318
7319 /* Relies on sl_list to be the first item in staticList10_T. */
7320 init_static_list((staticList10_T *)(argv->vval.v_list));
7321
7322 /* There are always 10 list items in staticList10_T. */
7323 li = argv->vval.v_list->lv_first;
7324 for (i = 0; i < 10; ++i)
7325 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007326 s = rsm.sm_match->startp[i];
7327 if (s == NULL || rsm.sm_match->endp[i] == NULL)
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007328 s = NULL;
7329 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007330 s = vim_strnsave(s, (int)(rsm.sm_match->endp[i] - s));
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007331 li->li_tv.v_type = VAR_STRING;
7332 li->li_tv.vval.v_string = s;
7333 li = li->li_next;
7334 }
7335 return 1;
7336}
7337
7338 static void
7339clear_submatch_list(staticList10_T *sl)
7340{
7341 int i;
7342
7343 for (i = 0; i < 10; ++i)
7344 vim_free(sl->sl_items[i].li_tv.vval.v_string);
7345}
7346
Bram Moolenaar071d4272004-06-13 20:20:40 +00007347/*
7348 * vim_regsub() - perform substitutions after a vim_regexec() or
7349 * vim_regexec_multi() match.
7350 *
7351 * If "copy" is TRUE really copy into "dest".
7352 * If "copy" is FALSE nothing is copied, this is just to find out the length
7353 * of the result.
7354 *
7355 * If "backslash" is TRUE, a backslash will be removed later, need to double
7356 * them to keep them, and insert a backslash before a CR to avoid it being
7357 * replaced with a line break later.
7358 *
7359 * Note: The matched text must not change between the call of
7360 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7361 * references invalid!
7362 *
7363 * Returns the size of the replacement, including terminating NUL.
7364 */
7365 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007366vim_regsub(
7367 regmatch_T *rmp,
7368 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007369 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007370 char_u *dest,
7371 int copy,
7372 int magic,
7373 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007374{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007375 int result;
7376 regexec_T rex_save;
7377 int rex_in_use_save = rex_in_use;
7378
7379 if (rex_in_use)
7380 /* Being called recursively, save the state. */
7381 rex_save = rex;
7382 rex_in_use = TRUE;
7383
7384 rex.reg_match = rmp;
7385 rex.reg_mmatch = NULL;
7386 rex.reg_maxline = 0;
7387 rex.reg_buf = curbuf;
7388 rex.reg_line_lbr = TRUE;
7389 result = vim_regsub_both(source, expr, dest, copy, magic, backslash);
7390
7391 rex_in_use = rex_in_use_save;
7392 if (rex_in_use)
7393 rex = rex_save;
7394
7395 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007396}
7397#endif
7398
7399 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007400vim_regsub_multi(
7401 regmmatch_T *rmp,
7402 linenr_T lnum,
7403 char_u *source,
7404 char_u *dest,
7405 int copy,
7406 int magic,
7407 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007408{
Bram Moolenaar6100d022016-10-02 16:51:57 +02007409 int result;
7410 regexec_T rex_save;
7411 int rex_in_use_save = rex_in_use;
7412
7413 if (rex_in_use)
7414 /* Being called recursively, save the state. */
7415 rex_save = rex;
7416 rex_in_use = TRUE;
7417
7418 rex.reg_match = NULL;
7419 rex.reg_mmatch = rmp;
7420 rex.reg_buf = curbuf; /* always works on the current buffer! */
7421 rex.reg_firstlnum = lnum;
7422 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7423 rex.reg_line_lbr = FALSE;
7424 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash);
7425
7426 rex_in_use = rex_in_use_save;
7427 if (rex_in_use)
7428 rex = rex_save;
7429
7430 return result;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007431}
7432
7433 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007434vim_regsub_both(
7435 char_u *source,
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007436 typval_T *expr,
Bram Moolenaar05540972016-01-30 20:31:25 +01007437 char_u *dest,
7438 int copy,
7439 int magic,
7440 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007441{
7442 char_u *src;
7443 char_u *dst;
7444 char_u *s;
7445 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007446 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007447 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007448 fptr_T func_all = (fptr_T)NULL;
7449 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007450 linenr_T clnum = 0; /* init for GCC */
7451 int len = 0; /* init for GCC */
7452#ifdef FEAT_EVAL
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007453 static char_u *eval_result = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007454#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007455
7456 /* Be paranoid... */
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007457 if ((source == NULL && expr == NULL) || dest == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007458 {
7459 EMSG(_(e_null));
7460 return 0;
7461 }
7462 if (prog_magic_wrong())
7463 return 0;
7464 src = source;
7465 dst = dest;
7466
7467 /*
7468 * When the substitute part starts with "\=" evaluate it as an expression.
7469 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007470 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007471 {
7472#ifdef FEAT_EVAL
7473 /* To make sure that the length doesn't change between checking the
7474 * length and copying the string, and to speed up things, the
7475 * resulting string is saved from the call with "copy" == FALSE to the
7476 * call with "copy" == TRUE. */
7477 if (copy)
7478 {
7479 if (eval_result != NULL)
7480 {
7481 STRCPY(dest, eval_result);
7482 dst += STRLEN(eval_result);
7483 vim_free(eval_result);
7484 eval_result = NULL;
7485 }
7486 }
7487 else
7488 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007489 int prev_can_f_submatch = can_f_submatch;
7490 regsubmatch_T rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007491
7492 vim_free(eval_result);
7493
7494 /* The expression may contain substitute(), which calls us
7495 * recursively. Make sure submatch() gets the text from the first
Bram Moolenaar6100d022016-10-02 16:51:57 +02007496 * level. */
7497 if (can_f_submatch)
7498 rsm_save = rsm;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007499 can_f_submatch = TRUE;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007500 rsm.sm_match = rex.reg_match;
7501 rsm.sm_mmatch = rex.reg_mmatch;
7502 rsm.sm_firstlnum = rex.reg_firstlnum;
7503 rsm.sm_maxline = rex.reg_maxline;
7504 rsm.sm_line_lbr = rex.reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007505
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007506 if (expr != NULL)
7507 {
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007508 typval_T argv[2];
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007509 int dummy;
7510 char_u buf[NUMBUFLEN];
7511 typval_T rettv;
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007512 staticList10_T matchList;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007513
7514 rettv.v_type = VAR_STRING;
7515 rettv.vval.v_string = NULL;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007516 argv[0].v_type = VAR_LIST;
7517 argv[0].vval.v_list = &matchList.sl_list;
7518 matchList.sl_list.lv_len = 0;
7519 if (expr->v_type == VAR_FUNC)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007520 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007521 s = expr->vval.v_string;
7522 call_func(s, (int)STRLEN(s), &rettv,
7523 1, argv, fill_submatch_list,
7524 0L, 0L, &dummy, TRUE, NULL, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007525 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007526 else if (expr->v_type == VAR_PARTIAL)
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007527 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007528 partial_T *partial = expr->vval.v_partial;
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007529
Bram Moolenaar6100d022016-10-02 16:51:57 +02007530 s = partial_name(partial);
7531 call_func(s, (int)STRLEN(s), &rettv,
7532 1, argv, fill_submatch_list,
7533 0L, 0L, &dummy, TRUE, partial, NULL);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007534 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02007535 if (matchList.sl_list.lv_len > 0)
7536 /* fill_submatch_list() was called */
7537 clear_submatch_list(&matchList);
7538
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007539 eval_result = get_tv_string_buf_chk(&rettv, buf);
7540 if (eval_result != NULL)
7541 eval_result = vim_strsave(eval_result);
Bram Moolenaardf48fb42016-07-22 21:50:18 +02007542 clear_tv(&rettv);
Bram Moolenaar72ab7292016-07-19 19:10:51 +02007543 }
7544 else
7545 eval_result = eval_to_string(source + 2, NULL, TRUE);
7546
Bram Moolenaar071d4272004-06-13 20:20:40 +00007547 if (eval_result != NULL)
7548 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007549 int had_backslash = FALSE;
7550
Bram Moolenaar91acfff2017-03-12 19:22:36 +01007551 for (s = eval_result; *s != NUL; MB_PTR_ADV(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007552 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007553 /* Change NL to CR, so that it becomes a line break,
7554 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007555 * Skip over a backslashed character. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007556 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007557 *s = CAR;
7558 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007559 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007560 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007561 /* Change NL to CR here too, so that this works:
7562 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7563 * abc\
7564 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007565 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007566 */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007567 if (*s == NL && !rsm.sm_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007568 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007569 had_backslash = TRUE;
7570 }
7571 }
7572 if (had_backslash && backslash)
7573 {
7574 /* Backslashes will be consumed, need to double them. */
7575 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7576 if (s != NULL)
7577 {
7578 vim_free(eval_result);
7579 eval_result = s;
7580 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007581 }
7582
7583 dst += STRLEN(eval_result);
7584 }
7585
Bram Moolenaar6100d022016-10-02 16:51:57 +02007586 can_f_submatch = prev_can_f_submatch;
7587 if (can_f_submatch)
7588 rsm = rsm_save;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007589 }
7590#endif
7591 }
7592 else
7593 while ((c = *src++) != NUL)
7594 {
7595 if (c == '&' && magic)
7596 no = 0;
7597 else if (c == '\\' && *src != NUL)
7598 {
7599 if (*src == '&' && !magic)
7600 {
7601 ++src;
7602 no = 0;
7603 }
7604 else if ('0' <= *src && *src <= '9')
7605 {
7606 no = *src++ - '0';
7607 }
7608 else if (vim_strchr((char_u *)"uUlLeE", *src))
7609 {
7610 switch (*src++)
7611 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007612 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007613 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007614 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007615 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007616 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007617 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007618 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007619 continue;
7620 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007621 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007622 continue;
7623 }
7624 }
7625 }
7626 if (no < 0) /* Ordinary character. */
7627 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007628 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7629 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007630 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007631 if (copy)
7632 {
7633 *dst++ = c;
7634 *dst++ = *src++;
7635 *dst++ = *src++;
7636 }
7637 else
7638 {
7639 dst += 3;
7640 src += 2;
7641 }
7642 continue;
7643 }
7644
Bram Moolenaar071d4272004-06-13 20:20:40 +00007645 if (c == '\\' && *src != NUL)
7646 {
7647 /* Check for abbreviations -- webb */
7648 switch (*src)
7649 {
7650 case 'r': c = CAR; ++src; break;
7651 case 'n': c = NL; ++src; break;
7652 case 't': c = TAB; ++src; break;
7653 /* Oh no! \e already has meaning in subst pat :-( */
7654 /* case 'e': c = ESC; ++src; break; */
7655 case 'b': c = Ctrl_H; ++src; break;
7656
7657 /* If "backslash" is TRUE the backslash will be removed
7658 * later. Used to insert a literal CR. */
7659 default: if (backslash)
7660 {
7661 if (copy)
7662 *dst = '\\';
7663 ++dst;
7664 }
7665 c = *src++;
7666 }
7667 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007668#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007669 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007670 c = mb_ptr2char(src - 1);
7671#endif
7672
Bram Moolenaardb552d602006-03-23 22:59:57 +00007673 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007674 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007675 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007676 func_one = (fptr_T)(func_one(&cc, c));
7677 else if (func_all != (fptr_T)NULL)
7678 /* Turbo C complains without the typecast */
7679 func_all = (fptr_T)(func_all(&cc, c));
7680 else /* just copy */
7681 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007682
7683#ifdef FEAT_MBYTE
7684 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007685 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007686 int totlen = mb_ptr2len(src - 1);
7687
Bram Moolenaar071d4272004-06-13 20:20:40 +00007688 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007689 mb_char2bytes(cc, dst);
7690 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007691 if (enc_utf8)
7692 {
7693 int clen = utf_ptr2len(src - 1);
7694
7695 /* If the character length is shorter than "totlen", there
7696 * are composing characters; copy them as-is. */
7697 if (clen < totlen)
7698 {
7699 if (copy)
7700 mch_memmove(dst + 1, src - 1 + clen,
7701 (size_t)(totlen - clen));
7702 dst += totlen - clen;
7703 }
7704 }
7705 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007706 }
7707 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007708#endif
7709 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007710 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007711 dst++;
7712 }
7713 else
7714 {
7715 if (REG_MULTI)
7716 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007717 clnum = rex.reg_mmatch->startpos[no].lnum;
7718 if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007719 s = NULL;
7720 else
7721 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007722 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
7723 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7724 len = rex.reg_mmatch->endpos[no].col
7725 - rex.reg_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007726 else
7727 len = (int)STRLEN(s);
7728 }
7729 }
7730 else
7731 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007732 s = rex.reg_match->startp[no];
7733 if (rex.reg_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007734 s = NULL;
7735 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007736 len = (int)(rex.reg_match->endp[no] - s);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007737 }
7738 if (s != NULL)
7739 {
7740 for (;;)
7741 {
7742 if (len == 0)
7743 {
7744 if (REG_MULTI)
7745 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007746 if (rex.reg_mmatch->endpos[no].lnum == clnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007747 break;
7748 if (copy)
7749 *dst = CAR;
7750 ++dst;
7751 s = reg_getline(++clnum);
Bram Moolenaar6100d022016-10-02 16:51:57 +02007752 if (rex.reg_mmatch->endpos[no].lnum == clnum)
7753 len = rex.reg_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007754 else
7755 len = (int)STRLEN(s);
7756 }
7757 else
7758 break;
7759 }
7760 else if (*s == NUL) /* we hit NUL. */
7761 {
7762 if (copy)
7763 EMSG(_(e_re_damg));
7764 goto exit;
7765 }
7766 else
7767 {
7768 if (backslash && (*s == CAR || *s == '\\'))
7769 {
7770 /*
7771 * Insert a backslash in front of a CR, otherwise
7772 * it will be replaced by a line break.
7773 * Number of backslashes will be halved later,
7774 * double them here.
7775 */
7776 if (copy)
7777 {
7778 dst[0] = '\\';
7779 dst[1] = *s;
7780 }
7781 dst += 2;
7782 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007783 else
7784 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007785#ifdef FEAT_MBYTE
7786 if (has_mbyte)
7787 c = mb_ptr2char(s);
7788 else
7789#endif
7790 c = *s;
7791
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007792 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007793 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007794 func_one = (fptr_T)(func_one(&cc, c));
7795 else if (func_all != (fptr_T)NULL)
7796 /* Turbo C complains without the typecast */
7797 func_all = (fptr_T)(func_all(&cc, c));
7798 else /* just copy */
7799 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007800
7801#ifdef FEAT_MBYTE
7802 if (has_mbyte)
7803 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007804 int l;
7805
7806 /* Copy composing characters separately, one
7807 * at a time. */
7808 if (enc_utf8)
7809 l = utf_ptr2len(s) - 1;
7810 else
7811 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007812
7813 s += l;
7814 len -= l;
7815 if (copy)
7816 mb_char2bytes(cc, dst);
7817 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007818 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007819 else
7820#endif
7821 if (copy)
7822 *dst = cc;
7823 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007824 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007825
Bram Moolenaar071d4272004-06-13 20:20:40 +00007826 ++s;
7827 --len;
7828 }
7829 }
7830 }
7831 no = -1;
7832 }
7833 }
7834 if (copy)
7835 *dst = NUL;
7836
7837exit:
7838 return (int)((dst - dest) + 1);
7839}
7840
7841#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007842static char_u *reg_getline_submatch(linenr_T lnum);
Bram Moolenaard32a3192009-11-26 19:40:49 +00007843
Bram Moolenaar071d4272004-06-13 20:20:40 +00007844/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007845 * Call reg_getline() with the line numbers from the submatch. If a
7846 * substitute() was used the reg_maxline and other values have been
7847 * overwritten.
7848 */
7849 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007850reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007851{
7852 char_u *s;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007853 linenr_T save_first = rex.reg_firstlnum;
7854 linenr_T save_max = rex.reg_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007855
Bram Moolenaar6100d022016-10-02 16:51:57 +02007856 rex.reg_firstlnum = rsm.sm_firstlnum;
7857 rex.reg_maxline = rsm.sm_maxline;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007858
7859 s = reg_getline(lnum);
7860
Bram Moolenaar6100d022016-10-02 16:51:57 +02007861 rex.reg_firstlnum = save_first;
7862 rex.reg_maxline = save_max;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007863 return s;
7864}
7865
7866/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007867 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007868 * allocated memory.
7869 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7870 */
7871 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007872reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007873{
7874 char_u *retval = NULL;
7875 char_u *s;
7876 int len;
7877 int round;
7878 linenr_T lnum;
7879
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007880 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007881 return NULL;
7882
Bram Moolenaar6100d022016-10-02 16:51:57 +02007883 if (rsm.sm_match == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007884 {
7885 /*
7886 * First round: compute the length and allocate memory.
7887 * Second round: copy the text.
7888 */
7889 for (round = 1; round <= 2; ++round)
7890 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007891 lnum = rsm.sm_mmatch->startpos[no].lnum;
7892 if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007893 return NULL;
7894
Bram Moolenaar6100d022016-10-02 16:51:57 +02007895 s = reg_getline_submatch(lnum) + rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007896 if (s == NULL) /* anti-crash check, cannot happen? */
7897 break;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007898 if (rsm.sm_mmatch->endpos[no].lnum == lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007899 {
7900 /* Within one line: take form start to end col. */
Bram Moolenaar6100d022016-10-02 16:51:57 +02007901 len = rsm.sm_mmatch->endpos[no].col
7902 - rsm.sm_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007903 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007904 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007905 ++len;
7906 }
7907 else
7908 {
7909 /* Multiple lines: take start line from start col, middle
7910 * lines completely and end line up to end col. */
7911 len = (int)STRLEN(s);
7912 if (round == 2)
7913 {
7914 STRCPY(retval, s);
7915 retval[len] = '\n';
7916 }
7917 ++len;
7918 ++lnum;
Bram Moolenaar6100d022016-10-02 16:51:57 +02007919 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007920 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007921 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007922 if (round == 2)
7923 STRCPY(retval + len, s);
7924 len += (int)STRLEN(s);
7925 if (round == 2)
7926 retval[len] = '\n';
7927 ++len;
7928 }
7929 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007930 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar6100d022016-10-02 16:51:57 +02007931 rsm.sm_mmatch->endpos[no].col);
7932 len += rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007933 if (round == 2)
7934 retval[len] = NUL;
7935 ++len;
7936 }
7937
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007938 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007939 {
7940 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007941 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007942 return NULL;
7943 }
7944 }
7945 }
7946 else
7947 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007948 s = rsm.sm_match->startp[no];
7949 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007950 retval = NULL;
7951 else
Bram Moolenaar6100d022016-10-02 16:51:57 +02007952 retval = vim_strnsave(s, (int)(rsm.sm_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007953 }
7954
7955 return retval;
7956}
Bram Moolenaar41571762014-04-02 19:00:58 +02007957
7958/*
7959 * Used for the submatch() function with the optional non-zero argument: get
7960 * the list of strings from the n'th submatch in allocated memory with NULs
7961 * represented in NLs.
7962 * Returns a list of allocated strings. Returns NULL when not in a ":s"
7963 * command, for a non-existing submatch and for any error.
7964 */
7965 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007966reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02007967{
7968 char_u *s;
7969 linenr_T slnum;
7970 linenr_T elnum;
7971 colnr_T scol;
7972 colnr_T ecol;
7973 int i;
7974 list_T *list;
7975 int error = FALSE;
7976
7977 if (!can_f_submatch || no < 0)
7978 return NULL;
7979
Bram Moolenaar6100d022016-10-02 16:51:57 +02007980 if (rsm.sm_match == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02007981 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02007982 slnum = rsm.sm_mmatch->startpos[no].lnum;
7983 elnum = rsm.sm_mmatch->endpos[no].lnum;
Bram Moolenaar41571762014-04-02 19:00:58 +02007984 if (slnum < 0 || elnum < 0)
7985 return NULL;
7986
Bram Moolenaar6100d022016-10-02 16:51:57 +02007987 scol = rsm.sm_mmatch->startpos[no].col;
7988 ecol = rsm.sm_mmatch->endpos[no].col;
Bram Moolenaar41571762014-04-02 19:00:58 +02007989
7990 list = list_alloc();
7991 if (list == NULL)
7992 return NULL;
7993
7994 s = reg_getline_submatch(slnum) + scol;
7995 if (slnum == elnum)
7996 {
7997 if (list_append_string(list, s, ecol - scol) == FAIL)
7998 error = TRUE;
7999 }
8000 else
8001 {
8002 if (list_append_string(list, s, -1) == FAIL)
8003 error = TRUE;
8004 for (i = 1; i < elnum - slnum; i++)
8005 {
8006 s = reg_getline_submatch(slnum + i);
8007 if (list_append_string(list, s, -1) == FAIL)
8008 error = TRUE;
8009 }
8010 s = reg_getline_submatch(elnum);
8011 if (list_append_string(list, s, ecol) == FAIL)
8012 error = TRUE;
8013 }
8014 }
8015 else
8016 {
Bram Moolenaar6100d022016-10-02 16:51:57 +02008017 s = rsm.sm_match->startp[no];
8018 if (s == NULL || rsm.sm_match->endp[no] == NULL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008019 return NULL;
8020 list = list_alloc();
8021 if (list == NULL)
8022 return NULL;
8023 if (list_append_string(list, s,
Bram Moolenaar6100d022016-10-02 16:51:57 +02008024 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
Bram Moolenaar41571762014-04-02 19:00:58 +02008025 error = TRUE;
8026 }
8027
8028 if (error)
8029 {
Bram Moolenaar107e1ee2016-04-08 17:07:19 +02008030 list_free(list);
Bram Moolenaar41571762014-04-02 19:00:58 +02008031 return NULL;
8032 }
8033 return list;
8034}
Bram Moolenaar071d4272004-06-13 20:20:40 +00008035#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008036
8037static regengine_T bt_regengine =
8038{
8039 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008040 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008041 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008042 bt_regexec_multi,
8043 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008044};
8045
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008046#include "regexp_nfa.c"
8047
8048static regengine_T nfa_regengine =
8049{
8050 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008051 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008052 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008053 nfa_regexec_multi,
8054 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008055};
8056
8057/* Which regexp engine to use? Needed for vim_regcomp().
8058 * Must match with 'regexpengine'. */
8059static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008060
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008061#ifdef DEBUG
8062static char_u regname[][30] = {
8063 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02008064 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008065 "NFA Regexp Engine"
8066 };
8067#endif
8068
8069/*
8070 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02008071 * Returns the program in allocated memory.
8072 * Use vim_regfree() to free the memory.
8073 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008074 */
8075 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01008076vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008077{
8078 regprog_T *prog = NULL;
8079 char_u *expr = expr_arg;
8080
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008081 regexp_engine = p_re;
8082
8083 /* Check for prefix "\%#=", that sets the regexp engine */
8084 if (STRNCMP(expr, "\\%#=", 4) == 0)
8085 {
8086 int newengine = expr[4] - '0';
8087
8088 if (newengine == AUTOMATIC_ENGINE
8089 || newengine == BACKTRACKING_ENGINE
8090 || newengine == NFA_ENGINE)
8091 {
8092 regexp_engine = expr[4] - '0';
8093 expr += 5;
8094#ifdef DEBUG
Bram Moolenaar6e132072014-05-13 16:46:32 +02008095 smsg((char_u *)"New regexp mode selected (%d): %s",
8096 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008097#endif
8098 }
8099 else
8100 {
8101 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
8102 regexp_engine = AUTOMATIC_ENGINE;
8103 }
8104 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008105 bt_regengine.expr = expr;
8106 nfa_regengine.expr = expr;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008107
8108 /*
8109 * First try the NFA engine, unless backtracking was requested.
8110 */
8111 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008112 prog = nfa_regengine.regcomp(expr,
8113 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008114 else
8115 prog = bt_regengine.regcomp(expr, re_flags);
8116
Bram Moolenaarfda37292014-11-05 14:27:36 +01008117 /* Check for error compiling regexp with initial engine. */
8118 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008119 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008120#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008121 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8122 {
8123 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008124 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008125 if (f)
8126 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008127 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008128 fclose(f);
8129 }
8130 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008131 EMSG2("(NFA) Could not open \"%s\" to write !!!",
8132 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008133 }
8134#endif
8135 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01008136 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008137 * The NFA engine also fails for patterns that it can't handle well
8138 * but are still valid patterns, thus a retry should work.
8139 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008140 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008141 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008142 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008143 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008144 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008145 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008146
Bram Moolenaarfda37292014-11-05 14:27:36 +01008147 if (prog != NULL)
8148 {
8149 /* Store the info needed to call regcomp() again when the engine turns
8150 * out to be very slow when executing it. */
8151 prog->re_engine = regexp_engine;
8152 prog->re_flags = re_flags;
8153 }
8154
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008155 return prog;
8156}
8157
8158/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008159 * Free a compiled regexp program, returned by vim_regcomp().
8160 */
8161 void
Bram Moolenaar05540972016-01-30 20:31:25 +01008162vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02008163{
8164 if (prog != NULL)
8165 prog->engine->regfree(prog);
8166}
8167
Bram Moolenaarfda37292014-11-05 14:27:36 +01008168#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01008169static void report_re_switch(char_u *pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008170
8171 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01008172report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008173{
8174 if (p_verbose > 0)
8175 {
8176 verbose_enter();
8177 MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
8178 MSG_PUTS(pat);
8179 verbose_leave();
8180 }
8181}
8182#endif
8183
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01008184static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, int nl);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008185
Bram Moolenaar473de612013-06-08 18:19:48 +02008186/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008187 * Match a regexp against a string.
8188 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008189 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008190 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01008191 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008192 *
8193 * Return TRUE if there is a match, FALSE if not.
8194 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01008195 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01008196vim_regexec_both(
8197 regmatch_T *rmp,
8198 char_u *line, /* string to match against */
8199 colnr_T col, /* column to start looking for match */
8200 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008201{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008202 int result;
8203 regexec_T rex_save;
8204 int rex_in_use_save = rex_in_use;
8205
8206 if (rex_in_use)
8207 /* Being called recursively, save the state. */
8208 rex_save = rex;
8209 rex_in_use = TRUE;
8210 rex.reg_startp = NULL;
8211 rex.reg_endp = NULL;
8212 rex.reg_startpos = NULL;
8213 rex.reg_endpos = NULL;
8214
8215 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008216
8217 /* NFA engine aborted because it's very slow. */
8218 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8219 && result == NFA_TOO_EXPENSIVE)
8220 {
8221 int save_p_re = p_re;
8222 int re_flags = rmp->regprog->re_flags;
8223 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8224
8225 p_re = BACKTRACKING_ENGINE;
8226 vim_regfree(rmp->regprog);
8227 if (pat != NULL)
8228 {
8229#ifdef FEAT_EVAL
8230 report_re_switch(pat);
8231#endif
8232 rmp->regprog = vim_regcomp(pat, re_flags);
8233 if (rmp->regprog != NULL)
8234 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8235 vim_free(pat);
8236 }
8237
8238 p_re = save_p_re;
8239 }
Bram Moolenaar6100d022016-10-02 16:51:57 +02008240
8241 rex_in_use = rex_in_use_save;
8242 if (rex_in_use)
8243 rex = rex_save;
8244
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008245 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008246}
8247
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008248/*
8249 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008250 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008251 */
8252 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008253vim_regexec_prog(
8254 regprog_T **prog,
8255 int ignore_case,
8256 char_u *line,
8257 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008258{
8259 int r;
8260 regmatch_T regmatch;
8261
8262 regmatch.regprog = *prog;
8263 regmatch.rm_ic = ignore_case;
8264 r = vim_regexec_both(&regmatch, line, col, FALSE);
8265 *prog = regmatch.regprog;
8266 return r;
8267}
8268
8269/*
8270 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008271 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008272 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008273 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008274vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008275{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008276 return vim_regexec_both(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008277}
8278
8279#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8280 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8281/*
8282 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008283 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008284 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008285 */
8286 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008287vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008288{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008289 return vim_regexec_both(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008290}
8291#endif
8292
8293/*
8294 * Match a regexp against multiple lines.
8295 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008296 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008297 * Uses curbuf for line count and 'iskeyword'.
8298 *
8299 * Return zero if there is no match. Return number of lines contained in the
8300 * match otherwise.
8301 */
8302 long
Bram Moolenaar05540972016-01-30 20:31:25 +01008303vim_regexec_multi(
8304 regmmatch_T *rmp,
8305 win_T *win, /* window in which to search or NULL */
8306 buf_T *buf, /* buffer in which to search */
8307 linenr_T lnum, /* nr of line to start looking for match */
8308 colnr_T col, /* column to start looking for match */
8309 proftime_T *tm) /* timeout limit or NULL */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008310{
Bram Moolenaar6100d022016-10-02 16:51:57 +02008311 int result;
8312 regexec_T rex_save;
8313 int rex_in_use_save = rex_in_use;
8314
8315 if (rex_in_use)
8316 /* Being called recursively, save the state. */
8317 rex_save = rex;
8318 rex_in_use = TRUE;
8319
8320 result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008321
8322 /* NFA engine aborted because it's very slow. */
8323 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8324 && result == NFA_TOO_EXPENSIVE)
8325 {
8326 int save_p_re = p_re;
8327 int re_flags = rmp->regprog->re_flags;
8328 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8329
8330 p_re = BACKTRACKING_ENGINE;
8331 vim_regfree(rmp->regprog);
8332 if (pat != NULL)
8333 {
8334#ifdef FEAT_EVAL
8335 report_re_switch(pat);
8336#endif
8337 rmp->regprog = vim_regcomp(pat, re_flags);
8338 if (rmp->regprog != NULL)
8339 result = rmp->regprog->engine->regexec_multi(
8340 rmp, win, buf, lnum, col, tm);
8341 vim_free(pat);
8342 }
8343 p_re = save_p_re;
8344 }
8345
Bram Moolenaar6100d022016-10-02 16:51:57 +02008346 rex_in_use = rex_in_use_save;
8347 if (rex_in_use)
8348 rex = rex_save;
8349
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008350 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008351}