blob: 9ed5c75704ae6ae4f323e9e4f77a3bfbfda3dc3d [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100258static int no_Magic(int x);
259static int toggle_Magic(int x);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000260
261 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100262no_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000263{
264 if (is_Magic(x))
265 return un_Magic(x);
266 return x;
267}
268
269 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100270toggle_Magic(int x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000271{
272 if (is_Magic(x))
273 return un_Magic(x);
274 return Magic(x);
275}
276
277/*
278 * The first byte of the regexp internal "program" is actually this magic
279 * number; the start node begins in the second byte. It's used to catch the
280 * most severe mutilation of the program by the caller.
281 */
282
283#define REGMAGIC 0234
284
285/*
286 * Opcode notes:
287 *
288 * BRANCH The set of branches constituting a single choice are hooked
289 * together with their "next" pointers, since precedence prevents
290 * anything being concatenated to any individual branch. The
291 * "next" pointer of the last BRANCH in a choice points to the
292 * thing following the whole choice. This is also where the
293 * final "next" pointer of each individual branch points; each
294 * branch starts with the operand node of a BRANCH node.
295 *
296 * BACK Normal "next" pointers all implicitly point forward; BACK
297 * exists to make loop structures possible.
298 *
299 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
300 * BRANCH structures using BACK. Simple cases (one character
301 * per match) are implemented with STAR and PLUS for speed
302 * and to minimize recursive plunges.
303 *
304 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
305 * node, and defines the min and max limits to be used for that
306 * node.
307 *
308 * MOPEN,MCLOSE ...are numbered at compile time.
309 * ZOPEN,ZCLOSE ...ditto
310 */
311
312/*
313 * A node is one char of opcode followed by two chars of "next" pointer.
314 * "Next" pointers are stored as two 8-bit bytes, high order first. The
315 * value is a positive offset from the opcode of the node containing it.
316 * An operand, if any, simply follows the node. (Note that much of the
317 * code generation knows about this implicit relationship.)
318 *
319 * Using two bytes for the "next" pointer is vast overkill for most things,
320 * but allows patterns to get big without disasters.
321 */
322#define OP(p) ((int)*(p))
323#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
324#define OPERAND(p) ((p) + 3)
325/* Obtain an operand that was stored as four bytes, MSB first. */
326#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
327 + ((long)(p)[5] << 8) + (long)(p)[6])
328/* Obtain a second operand stored as four bytes. */
329#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
330/* Obtain a second single-byte operand stored after a four bytes operand. */
331#define OPERAND_CMP(p) (p)[7]
332
333/*
334 * Utility definitions.
335 */
336#define UCHARAT(p) ((int)*(char_u *)(p))
337
338/* Used for an error (down from) vim_regcomp(): give the error message, set
339 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000340#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000341#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200342#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
343#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
344#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000345
346#define MAX_LIMIT (32767L << 16L)
347
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100348static int re_multi_type(int);
349static int cstrncmp(char_u *s1, char_u *s2, int *n);
350static char_u *cstrchr(char_u *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000351
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200352#ifdef BT_REGEXP_DUMP
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100353static void regdump(char_u *, bt_regprog_T *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200354#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000355#ifdef DEBUG
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100356static char_u *regprop(char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000357#endif
358
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100359static int re_mult_next(char *what);
Bram Moolenaarfb031402014-09-09 17:18:49 +0200360
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200361static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
362static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
363static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
364static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200365#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200366static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
367static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200368#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200369static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200370static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar071d4272004-06-13 20:20:40 +0000371#define NOT_MULTI 0
372#define MULTI_ONE 1
373#define MULTI_MULT 2
374/*
375 * Return NOT_MULTI if c is not a "multi" operator.
376 * Return MULTI_ONE if c is a single "multi" operator.
377 * Return MULTI_MULT if c is a multi "multi" operator.
378 */
379 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100380re_multi_type(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000381{
382 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
383 return MULTI_ONE;
384 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
385 return MULTI_MULT;
386 return NOT_MULTI;
387}
388
389/*
390 * Flags to be passed up and down.
391 */
392#define HASWIDTH 0x1 /* Known never to match null string. */
393#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
394#define SPSTART 0x4 /* Starts with * or +. */
395#define HASNL 0x8 /* Contains some \n. */
396#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
397#define WORST 0 /* Worst case. */
398
399/*
400 * When regcode is set to this value, code is not emitted and size is computed
401 * instead.
402 */
403#define JUST_CALC_SIZE ((char_u *) -1)
404
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000405static char_u *reg_prev_sub = NULL;
406
Bram Moolenaar071d4272004-06-13 20:20:40 +0000407/*
408 * REGEXP_INRANGE contains all characters which are always special in a []
409 * range after '\'.
410 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
411 * These are:
412 * \n - New line (NL).
413 * \r - Carriage Return (CR).
414 * \t - Tab (TAB).
415 * \e - Escape (ESC).
416 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000417 * \d - Character code in decimal, eg \d123
418 * \o - Character code in octal, eg \o80
419 * \x - Character code in hex, eg \x4a
420 * \u - Multibyte character code, eg \u20ac
421 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422 */
423static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000424static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000425
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100426static int backslash_trans(int c);
427static int get_char_class(char_u **pp);
428static int get_equi_class(char_u **pp);
429static void reg_equi_class(int c);
430static int get_coll_element(char_u **pp);
431static char_u *skip_anyof(char_u *p);
432static void init_class_tab(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000433
434/*
435 * Translate '\x' to its control character, except "\n", which is Magic.
436 */
437 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100438backslash_trans(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000439{
440 switch (c)
441 {
442 case 'r': return CAR;
443 case 't': return TAB;
444 case 'e': return ESC;
445 case 'b': return BS;
446 }
447 return c;
448}
449
450/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000451 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000452 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
453 * recognized. Otherwise "pp" is advanced to after the item.
454 */
455 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100456get_char_class(char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000457{
458 static const char *(class_names[]) =
459 {
460 "alnum:]",
461#define CLASS_ALNUM 0
462 "alpha:]",
463#define CLASS_ALPHA 1
464 "blank:]",
465#define CLASS_BLANK 2
466 "cntrl:]",
467#define CLASS_CNTRL 3
468 "digit:]",
469#define CLASS_DIGIT 4
470 "graph:]",
471#define CLASS_GRAPH 5
472 "lower:]",
473#define CLASS_LOWER 6
474 "print:]",
475#define CLASS_PRINT 7
476 "punct:]",
477#define CLASS_PUNCT 8
478 "space:]",
479#define CLASS_SPACE 9
480 "upper:]",
481#define CLASS_UPPER 10
482 "xdigit:]",
483#define CLASS_XDIGIT 11
484 "tab:]",
485#define CLASS_TAB 12
486 "return:]",
487#define CLASS_RETURN 13
488 "backspace:]",
489#define CLASS_BACKSPACE 14
490 "escape:]",
491#define CLASS_ESCAPE 15
492 };
493#define CLASS_NONE 99
494 int i;
495
496 if ((*pp)[1] == ':')
497 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000498 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000499 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
500 {
501 *pp += STRLEN(class_names[i]) + 2;
502 return i;
503 }
504 }
505 return CLASS_NONE;
506}
507
508/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000509 * Specific version of character class functions.
510 * Using a table to keep this fast.
511 */
512static short class_tab[256];
513
514#define RI_DIGIT 0x01
515#define RI_HEX 0x02
516#define RI_OCTAL 0x04
517#define RI_WORD 0x08
518#define RI_HEAD 0x10
519#define RI_ALPHA 0x20
520#define RI_LOWER 0x40
521#define RI_UPPER 0x80
522#define RI_WHITE 0x100
523
524 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100525init_class_tab(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000526{
527 int i;
528 static int done = FALSE;
529
530 if (done)
531 return;
532
533 for (i = 0; i < 256; ++i)
534 {
535 if (i >= '0' && i <= '7')
536 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
537 else if (i >= '8' && i <= '9')
538 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
539 else if (i >= 'a' && i <= 'f')
540 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
541#ifdef EBCDIC
542 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
543 || (i >= 's' && i <= 'z'))
544#else
545 else if (i >= 'g' && i <= 'z')
546#endif
547 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
548 else if (i >= 'A' && i <= 'F')
549 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
550#ifdef EBCDIC
551 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
552 || (i >= 'S' && i <= 'Z'))
553#else
554 else if (i >= 'G' && i <= 'Z')
555#endif
556 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
557 else if (i == '_')
558 class_tab[i] = RI_WORD + RI_HEAD;
559 else
560 class_tab[i] = 0;
561 }
562 class_tab[' '] |= RI_WHITE;
563 class_tab['\t'] |= RI_WHITE;
564 done = TRUE;
565}
566
567#ifdef FEAT_MBYTE
568# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
569# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
570# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
571# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
572# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
573# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
574# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
575# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
576# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
577#else
578# define ri_digit(c) (class_tab[c] & RI_DIGIT)
579# define ri_hex(c) (class_tab[c] & RI_HEX)
580# define ri_octal(c) (class_tab[c] & RI_OCTAL)
581# define ri_word(c) (class_tab[c] & RI_WORD)
582# define ri_head(c) (class_tab[c] & RI_HEAD)
583# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
584# define ri_lower(c) (class_tab[c] & RI_LOWER)
585# define ri_upper(c) (class_tab[c] & RI_UPPER)
586# define ri_white(c) (class_tab[c] & RI_WHITE)
587#endif
588
589/* flags for regflags */
590#define RF_ICASE 1 /* ignore case */
591#define RF_NOICASE 2 /* don't ignore case */
592#define RF_HASNL 4 /* can match a NL */
593#define RF_ICOMBINE 8 /* ignore combining characters */
594#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
595
596/*
597 * Global work variables for vim_regcomp().
598 */
599
600static char_u *regparse; /* Input-scan pointer. */
601static int prevchr_len; /* byte length of previous char */
602static int num_complex_braces; /* Complex \{...} count */
603static int regnpar; /* () count. */
604#ifdef FEAT_SYN_HL
605static int regnzpar; /* \z() count. */
606static int re_has_z; /* \z item detected */
607#endif
608static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
609static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000610static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000611static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
612static unsigned regflags; /* RF_ flags for prog */
613static long brace_min[10]; /* Minimums for complex brace repeats */
614static long brace_max[10]; /* Maximums for complex brace repeats */
615static int brace_count[10]; /* Current counts for complex brace repeats */
616#if defined(FEAT_SYN_HL) || defined(PROTO)
617static int had_eol; /* TRUE when EOL found by vim_regcomp() */
618#endif
619static int one_exactly = FALSE; /* only do one char for EXACTLY */
620
621static int reg_magic; /* magicness of the pattern: */
622#define MAGIC_NONE 1 /* "\V" very unmagic */
623#define MAGIC_OFF 2 /* "\M" or 'magic' off */
624#define MAGIC_ON 3 /* "\m" or 'magic' */
625#define MAGIC_ALL 4 /* "\v" very magic */
626
627static int reg_string; /* matching with a string instead of a buffer
628 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000629static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000630
631/*
632 * META contains all characters that may be magic, except '^' and '$'.
633 */
634
635#ifdef EBCDIC
636static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
637#else
638/* META[] is used often enough to justify turning it into a table. */
639static char_u META_flags[] = {
640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
641 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
642/* % & ( ) * + . */
643 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
644/* 1 2 3 4 5 6 7 8 9 < = > ? */
645 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
646/* @ A C D F H I K L M O */
647 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
648/* P S U V W X Z [ _ */
649 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
650/* a c d f h i k l m n o */
651 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
652/* p s u v w x z { | ~ */
653 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
654};
655#endif
656
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200657static int curchr; /* currently parsed character */
658/* Previous character. Note: prevchr is sometimes -1 when we are not at the
659 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
660 * because ^ was taken to be magic -- webb */
661static int prevchr;
662static int prevprevchr; /* previous-previous character */
663static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000664
665/* arguments for reg() */
666#define REG_NOPAREN 0 /* toplevel reg() */
667#define REG_PAREN 1 /* \(\) */
668#define REG_ZPAREN 2 /* \z(\) */
669#define REG_NPAREN 3 /* \%(\) */
670
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200671typedef struct
672{
673 char_u *regparse;
674 int prevchr_len;
675 int curchr;
676 int prevchr;
677 int prevprevchr;
678 int nextchr;
679 int at_start;
680 int prev_at_start;
681 int regnpar;
682} parse_state_T;
683
Bram Moolenaar071d4272004-06-13 20:20:40 +0000684/*
685 * Forward declarations for vim_regcomp()'s friends.
686 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100687static void initchr(char_u *);
688static void save_parse_state(parse_state_T *ps);
689static void restore_parse_state(parse_state_T *ps);
690static int getchr(void);
691static void skipchr_keepstart(void);
692static int peekchr(void);
693static void skipchr(void);
694static void ungetchr(void);
695static int gethexchrs(int maxinputlen);
696static int getoctchrs(void);
697static int getdecchrs(void);
698static int coll_get_char(void);
699static void regcomp_start(char_u *expr, int flags);
700static char_u *reg(int, int *);
701static char_u *regbranch(int *flagp);
702static char_u *regconcat(int *flagp);
703static char_u *regpiece(int *);
704static char_u *regatom(int *);
705static char_u *regnode(int);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000706#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100707static int use_multibytecode(int c);
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000708#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100709static int prog_magic_wrong(void);
710static char_u *regnext(char_u *);
711static void regc(int b);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000712#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100713static void regmbc(int c);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200714# define REGMBC(x) regmbc(x);
715# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000716#else
717# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200718# define REGMBC(x)
719# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000720#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100721static void reginsert(int, char_u *);
722static void reginsert_nr(int op, long val, char_u *opnd);
723static void reginsert_limits(int, long, long, char_u *);
724static char_u *re_put_long(char_u *pr, long_u val);
725static int read_limits(long *, long *);
726static void regtail(char_u *, char_u *);
727static void regoptail(char_u *, char_u *);
Bram Moolenaar071d4272004-06-13 20:20:40 +0000728
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200729static regengine_T bt_regengine;
730static regengine_T nfa_regengine;
731
Bram Moolenaar071d4272004-06-13 20:20:40 +0000732/*
733 * Return TRUE if compiled regular expression "prog" can match a line break.
734 */
735 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100736re_multiline(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000737{
738 return (prog->regflags & RF_HASNL);
739}
740
741/*
742 * Return TRUE if compiled regular expression "prog" looks before the start
743 * position (pattern contains "\@<=" or "\@<!").
744 */
745 int
Bram Moolenaar05540972016-01-30 20:31:25 +0100746re_lookbehind(regprog_T *prog)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000747{
748 return (prog->regflags & RF_LOOKBH);
749}
750
751/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000752 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
753 * Returns a character representing the class. Zero means that no item was
754 * recognized. Otherwise "pp" is advanced to after the item.
755 */
756 static int
Bram Moolenaar05540972016-01-30 20:31:25 +0100757get_equi_class(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000758{
759 int c;
760 int l = 1;
761 char_u *p = *pp;
762
763 if (p[1] == '=')
764 {
765#ifdef FEAT_MBYTE
766 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000767 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000768#endif
769 if (p[l + 2] == '=' && p[l + 3] == ']')
770 {
771#ifdef FEAT_MBYTE
772 if (has_mbyte)
773 c = mb_ptr2char(p + 2);
774 else
775#endif
776 c = p[2];
777 *pp += l + 4;
778 return c;
779 }
780 }
781 return 0;
782}
783
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200784#ifdef EBCDIC
785/*
786 * Table for equivalence class "c". (IBM-1047)
787 */
788char *EQUIVAL_CLASS_C[16] = {
789 "A\x62\x63\x64\x65\x66\x67",
790 "C\x68",
791 "E\x71\x72\x73\x74",
792 "I\x75\x76\x77\x78",
793 "N\x69",
794 "O\xEB\xEC\xED\xEE\xEF",
795 "U\xFB\xFC\xFD\xFE",
796 "Y\xBA",
797 "a\x42\x43\x44\x45\x46\x47",
798 "c\x48",
799 "e\x51\x52\x53\x54",
800 "i\x55\x56\x57\x58",
801 "n\x49",
802 "o\xCB\xCC\xCD\xCE\xCF",
803 "u\xDB\xDC\xDD\xDE",
804 "y\x8D\xDF",
805};
806#endif
807
Bram Moolenaardf177f62005-02-22 08:39:57 +0000808/*
809 * Produce the bytes for equivalence class "c".
810 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200811 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000812 */
813 static void
Bram Moolenaar05540972016-01-30 20:31:25 +0100814reg_equi_class(int c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000815{
816#ifdef FEAT_MBYTE
817 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000818 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000819#endif
820 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200821#ifdef EBCDIC
822 int i;
823
824 /* This might be slower than switch/case below. */
825 for (i = 0; i < 16; i++)
826 {
827 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
828 {
829 char *p = EQUIVAL_CLASS_C[i];
830
831 while (*p != 0)
832 regmbc(*p++);
833 return;
834 }
835 }
836#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000837 switch (c)
838 {
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200839 /* Do not use '\300' style, it results in a negative number. */
840 case 'A': case 0xc0: case 0xc1: case 0xc2:
841 case 0xc3: case 0xc4: case 0xc5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200842 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
843 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200844 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
845 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
846 regmbc(0xc5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200847 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
848 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
849 REGMBC(0x1ea2)
850 return;
851 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
852 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000853 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200854 case 'C': case 0xc7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200855 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200856 regmbc('C'); regmbc(0xc7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200857 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
858 REGMBC(0x10c)
859 return;
860 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
861 CASEMBC(0x1e0e) CASEMBC(0x1e10)
862 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
863 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000864 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200865 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200866 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
867 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200868 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
869 regmbc(0xca); regmbc(0xcb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200870 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
871 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
872 REGMBC(0x1ebc)
873 return;
874 case 'F': CASEMBC(0x1e1e)
875 regmbc('F'); REGMBC(0x1e1e)
876 return;
877 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
878 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
879 CASEMBC(0x1e20)
880 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
881 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
882 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
883 return;
884 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
885 CASEMBC(0x1e26) CASEMBC(0x1e28)
886 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
887 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000888 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200889 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200890 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
891 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200892 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
893 regmbc(0xce); regmbc(0xcf);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200894 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
895 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
896 REGMBC(0x1ec8)
897 return;
898 case 'J': CASEMBC(0x134)
899 regmbc('J'); REGMBC(0x134)
900 return;
901 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
902 CASEMBC(0x1e34)
903 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
904 REGMBC(0x1e30) REGMBC(0x1e34)
905 return;
906 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
907 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
908 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
909 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
910 REGMBC(0x1e3a)
911 return;
912 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
913 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000914 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200915 case 'N': case 0xd1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200916 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
917 CASEMBC(0x1e48)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200918 regmbc('N'); regmbc(0xd1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200919 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
920 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000921 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200922 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
923 case 0xd6: case 0xd8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200924 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
925 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200926 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
927 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
928 regmbc(0xd8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200929 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
930 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
931 REGMBC(0x1ec) REGMBC(0x1ece)
932 return;
933 case 'P': case 0x1e54: case 0x1e56:
934 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
935 return;
936 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
937 CASEMBC(0x1e58) CASEMBC(0x1e5e)
938 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
939 REGMBC(0x1e58) REGMBC(0x1e5e)
940 return;
941 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
942 CASEMBC(0x160) CASEMBC(0x1e60)
943 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
944 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
945 return;
946 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
947 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
948 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
949 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000950 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200951 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200952 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
953 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
954 CASEMBC(0x1ee6)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200955 regmbc('U'); regmbc(0xd9); regmbc(0xda);
956 regmbc(0xdb); regmbc(0xdc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200957 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
958 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
959 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
960 return;
961 case 'V': CASEMBC(0x1e7c)
962 regmbc('V'); REGMBC(0x1e7c)
963 return;
964 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
965 CASEMBC(0x1e84) CASEMBC(0x1e86)
966 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
967 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
968 return;
969 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
970 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000971 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200972 case 'Y': case 0xdd:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200973 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
974 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200975 regmbc('Y'); regmbc(0xdd);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200976 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
977 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
978 return;
979 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
980 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
981 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
982 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
983 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000984 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200985 case 'a': case 0xe0: case 0xe1: case 0xe2:
986 case 0xe3: case 0xe4: case 0xe5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200987 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
988 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200989 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
990 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
991 regmbc(0xe5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200992 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
993 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
994 REGMBC(0x1ea3)
995 return;
996 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
997 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000998 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200999 case 'c': case 0xe7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001000 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001001 regmbc('c'); regmbc(0xe7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001002 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
1003 REGMBC(0x10d)
1004 return;
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001005 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
1006 CASEMBC(0x1e0f) CASEMBC(0x1e11)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001007 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
Bram Moolenaar2c61ec62015-07-10 19:16:34 +02001008 REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001009 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001010 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001011 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1012 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001013 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
1014 regmbc(0xea); regmbc(0xeb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001015 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1016 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1017 REGMBC(0x1ebd)
1018 return;
1019 case 'f': CASEMBC(0x1e1f)
1020 regmbc('f'); REGMBC(0x1e1f)
1021 return;
1022 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1023 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1024 CASEMBC(0x1e21)
1025 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1026 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1027 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1028 return;
1029 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1030 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1031 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1032 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1033 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001034 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001035 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001036 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1037 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001038 regmbc('i'); regmbc(0xec); regmbc(0xed);
1039 regmbc(0xee); regmbc(0xef);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001040 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1041 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1042 return;
1043 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1044 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1045 return;
1046 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1047 CASEMBC(0x1e35)
1048 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1049 REGMBC(0x1e31) REGMBC(0x1e35)
1050 return;
1051 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1052 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1053 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1054 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1055 REGMBC(0x1e3b)
1056 return;
1057 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1058 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001059 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001060 case 'n': case 0xf1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001061 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1062 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001063 regmbc('n'); regmbc(0xf1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001064 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1065 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001066 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001067 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1068 case 0xf6: case 0xf8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001069 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1070 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001071 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1072 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1073 regmbc(0xf8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001074 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1075 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1076 REGMBC(0x1ed) REGMBC(0x1ecf)
1077 return;
1078 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1079 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1080 return;
1081 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1082 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1083 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1084 REGMBC(0x1e59) REGMBC(0x1e5f)
1085 return;
1086 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1087 CASEMBC(0x161) CASEMBC(0x1e61)
1088 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1089 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1090 return;
1091 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1092 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1093 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1094 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001095 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001096 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001097 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1098 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1099 CASEMBC(0x1ee7)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001100 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1101 regmbc(0xfb); regmbc(0xfc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001102 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1103 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1104 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1105 return;
1106 case 'v': CASEMBC(0x1e7d)
1107 regmbc('v'); REGMBC(0x1e7d)
1108 return;
1109 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1110 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1111 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1112 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1113 REGMBC(0x1e98)
1114 return;
1115 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1116 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001117 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001118 case 'y': case 0xfd: case 0xff:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001119 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1120 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001121 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001122 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1123 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1124 return;
1125 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1126 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1127 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1128 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1129 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001130 return;
1131 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001132#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001133 }
1134 regmbc(c);
1135}
1136
1137/*
1138 * Check for a collating element "[.a.]". "pp" points to the '['.
1139 * Returns a character. Zero means that no item was recognized. Otherwise
1140 * "pp" is advanced to after the item.
1141 * Currently only single characters are recognized!
1142 */
1143 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01001144get_coll_element(char_u **pp)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001145{
1146 int c;
1147 int l = 1;
1148 char_u *p = *pp;
1149
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001150 if (p[0] != NUL && p[1] == '.')
Bram Moolenaardf177f62005-02-22 08:39:57 +00001151 {
1152#ifdef FEAT_MBYTE
1153 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001154 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001155#endif
1156 if (p[l + 2] == '.' && p[l + 3] == ']')
1157 {
1158#ifdef FEAT_MBYTE
1159 if (has_mbyte)
1160 c = mb_ptr2char(p + 2);
1161 else
1162#endif
1163 c = p[2];
1164 *pp += l + 4;
1165 return c;
1166 }
1167 }
1168 return 0;
1169}
1170
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001171static void get_cpo_flags(void);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001172static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1173static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1174
1175 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001176get_cpo_flags(void)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001177{
1178 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1179 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1180}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001181
1182/*
1183 * Skip over a "[]" range.
1184 * "p" must point to the character after the '['.
1185 * The returned pointer is on the matching ']', or the terminating NUL.
1186 */
1187 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001188skip_anyof(char_u *p)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001189{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001190#ifdef FEAT_MBYTE
1191 int l;
1192#endif
1193
Bram Moolenaardf177f62005-02-22 08:39:57 +00001194 if (*p == '^') /* Complement of range. */
1195 ++p;
1196 if (*p == ']' || *p == '-')
1197 ++p;
1198 while (*p != NUL && *p != ']')
1199 {
1200#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001201 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001202 p += l;
1203 else
1204#endif
1205 if (*p == '-')
1206 {
1207 ++p;
1208 if (*p != ']' && *p != NUL)
1209 mb_ptr_adv(p);
1210 }
1211 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001212 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001213 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001214 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001215 p += 2;
1216 else if (*p == '[')
1217 {
1218 if (get_char_class(&p) == CLASS_NONE
1219 && get_equi_class(&p) == 0
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02001220 && get_coll_element(&p) == 0
1221 && *p != NUL)
1222 ++p; /* it is not a class name and not NUL */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001223 }
1224 else
1225 ++p;
1226 }
1227
1228 return p;
1229}
1230
1231/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001232 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001233 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001234 * Take care of characters with a backslash in front of it.
1235 * Skip strings inside [ and ].
1236 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1237 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1238 * is changed in-place.
1239 */
1240 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001241skip_regexp(
1242 char_u *startp,
1243 int dirc,
1244 int magic,
1245 char_u **newp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001246{
1247 int mymagic;
1248 char_u *p = startp;
1249
1250 if (magic)
1251 mymagic = MAGIC_ON;
1252 else
1253 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001254 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001255
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001256 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001257 {
1258 if (p[0] == dirc) /* found end of regexp */
1259 break;
1260 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1261 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1262 {
1263 p = skip_anyof(p + 1);
1264 if (p[0] == NUL)
1265 break;
1266 }
1267 else if (p[0] == '\\' && p[1] != NUL)
1268 {
1269 if (dirc == '?' && newp != NULL && p[1] == '?')
1270 {
1271 /* change "\?" to "?", make a copy first. */
1272 if (*newp == NULL)
1273 {
1274 *newp = vim_strsave(startp);
1275 if (*newp != NULL)
1276 p = *newp + (p - startp);
1277 }
1278 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001279 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001280 else
1281 ++p;
1282 }
1283 else
1284 ++p; /* skip next character */
1285 if (*p == 'v')
1286 mymagic = MAGIC_ALL;
1287 else if (*p == 'V')
1288 mymagic = MAGIC_NONE;
1289 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001290 }
1291 return p;
1292}
1293
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01001294static regprog_T *bt_regcomp(char_u *expr, int re_flags);
1295static void bt_regfree(regprog_T *prog);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001296
Bram Moolenaar071d4272004-06-13 20:20:40 +00001297/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001298 * bt_regcomp() - compile a regular expression into internal code for the
1299 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001300 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001301 *
1302 * We can't allocate space until we know how big the compiled form will be,
1303 * but we can't compile it (and thus know how big it is) until we've got a
1304 * place to put the code. So we cheat: we compile it twice, once with code
1305 * generation turned off and size counting turned on, and once "for real".
1306 * This also means that we don't allocate space until we are sure that the
1307 * thing really will compile successfully, and we never have to move the
1308 * code and thus invalidate pointers into it. (Note that it has to be in
1309 * one piece because vim_free() must be able to free it all.)
1310 *
1311 * Whether upper/lower case is to be ignored is decided when executing the
1312 * program, it does not matter here.
1313 *
1314 * Beware that the optimization-preparation code in here knows about some
1315 * of the structure of the compiled regexp.
1316 * "re_flags": RE_MAGIC and/or RE_STRING.
1317 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001318 static regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01001319bt_regcomp(char_u *expr, int re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001320{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001321 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001322 char_u *scan;
1323 char_u *longest;
1324 int len;
1325 int flags;
1326
1327 if (expr == NULL)
1328 EMSG_RET_NULL(_(e_null));
1329
1330 init_class_tab();
1331
1332 /*
1333 * First pass: determine size, legality.
1334 */
1335 regcomp_start(expr, re_flags);
1336 regcode = JUST_CALC_SIZE;
1337 regc(REGMAGIC);
1338 if (reg(REG_NOPAREN, &flags) == NULL)
1339 return NULL;
1340
1341 /* Small enough for pointer-storage convention? */
1342#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1343 if (regsize >= 65536L - 256L)
1344 EMSG_RET_NULL(_("E339: Pattern too long"));
1345#endif
1346
1347 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001348 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001349 if (r == NULL)
1350 return NULL;
1351
1352 /*
1353 * Second pass: emit code.
1354 */
1355 regcomp_start(expr, re_flags);
1356 regcode = r->program;
1357 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001358 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001359 {
1360 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001361 if (reg_toolong)
1362 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001363 return NULL;
1364 }
1365
1366 /* Dig out information for optimizations. */
1367 r->regstart = NUL; /* Worst-case defaults. */
1368 r->reganch = 0;
1369 r->regmust = NULL;
1370 r->regmlen = 0;
1371 r->regflags = regflags;
1372 if (flags & HASNL)
1373 r->regflags |= RF_HASNL;
1374 if (flags & HASLOOKBH)
1375 r->regflags |= RF_LOOKBH;
1376#ifdef FEAT_SYN_HL
1377 /* Remember whether this pattern has any \z specials in it. */
1378 r->reghasz = re_has_z;
1379#endif
1380 scan = r->program + 1; /* First BRANCH. */
1381 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1382 {
1383 scan = OPERAND(scan);
1384
1385 /* Starting-point info. */
1386 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1387 {
1388 r->reganch++;
1389 scan = regnext(scan);
1390 }
1391
1392 if (OP(scan) == EXACTLY)
1393 {
1394#ifdef FEAT_MBYTE
1395 if (has_mbyte)
1396 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1397 else
1398#endif
1399 r->regstart = *OPERAND(scan);
1400 }
1401 else if ((OP(scan) == BOW
1402 || OP(scan) == EOW
1403 || OP(scan) == NOTHING
1404 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1405 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1406 && OP(regnext(scan)) == EXACTLY)
1407 {
1408#ifdef FEAT_MBYTE
1409 if (has_mbyte)
1410 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1411 else
1412#endif
1413 r->regstart = *OPERAND(regnext(scan));
1414 }
1415
1416 /*
1417 * If there's something expensive in the r.e., find the longest
1418 * literal string that must appear and make it the regmust. Resolve
1419 * ties in favor of later strings, since the regstart check works
1420 * with the beginning of the r.e. and avoiding duplication
1421 * strengthens checking. Not a strong reason, but sufficient in the
1422 * absence of others.
1423 */
1424 /*
1425 * When the r.e. starts with BOW, it is faster to look for a regmust
1426 * first. Used a lot for "#" and "*" commands. (Added by mool).
1427 */
1428 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1429 && !(flags & HASNL))
1430 {
1431 longest = NULL;
1432 len = 0;
1433 for (; scan != NULL; scan = regnext(scan))
1434 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1435 {
1436 longest = OPERAND(scan);
1437 len = (int)STRLEN(OPERAND(scan));
1438 }
1439 r->regmust = longest;
1440 r->regmlen = len;
1441 }
1442 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001443#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001444 regdump(expr, r);
1445#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001446 r->engine = &bt_regengine;
1447 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001448}
1449
1450/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001451 * Free a compiled regexp program, returned by bt_regcomp().
1452 */
1453 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001454bt_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02001455{
1456 vim_free(prog);
1457}
1458
1459/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001460 * Setup to parse the regexp. Used once to get the length and once to do it.
1461 */
1462 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01001463regcomp_start(
1464 char_u *expr,
1465 int re_flags) /* see vim_regcomp() */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001466{
1467 initchr(expr);
1468 if (re_flags & RE_MAGIC)
1469 reg_magic = MAGIC_ON;
1470 else
1471 reg_magic = MAGIC_OFF;
1472 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001473 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001474 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001475
1476 num_complex_braces = 0;
1477 regnpar = 1;
1478 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1479#ifdef FEAT_SYN_HL
1480 regnzpar = 1;
1481 re_has_z = 0;
1482#endif
1483 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001484 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001485 regflags = 0;
1486#if defined(FEAT_SYN_HL) || defined(PROTO)
1487 had_eol = FALSE;
1488#endif
1489}
1490
1491#if defined(FEAT_SYN_HL) || defined(PROTO)
1492/*
1493 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1494 * found. This is messy, but it works fine.
1495 */
1496 int
Bram Moolenaar05540972016-01-30 20:31:25 +01001497vim_regcomp_had_eol(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001498{
1499 return had_eol;
1500}
1501#endif
1502
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001503/* variables for parsing reginput */
1504static int at_start; /* True when on the first character */
1505static int prev_at_start; /* True when on the second character */
1506
Bram Moolenaar071d4272004-06-13 20:20:40 +00001507/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001508 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001509 *
1510 * Caller must absorb opening parenthesis.
1511 *
1512 * Combining parenthesis handling with the base level of regular expression
1513 * is a trifle forced, but the need to tie the tails of the branches to what
1514 * follows makes it hard to avoid.
1515 */
1516 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001517reg(
1518 int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1519 int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001520{
1521 char_u *ret;
1522 char_u *br;
1523 char_u *ender;
1524 int parno = 0;
1525 int flags;
1526
1527 *flagp = HASWIDTH; /* Tentatively. */
1528
1529#ifdef FEAT_SYN_HL
1530 if (paren == REG_ZPAREN)
1531 {
1532 /* Make a ZOPEN node. */
1533 if (regnzpar >= NSUBEXP)
1534 EMSG_RET_NULL(_("E50: Too many \\z("));
1535 parno = regnzpar;
1536 regnzpar++;
1537 ret = regnode(ZOPEN + parno);
1538 }
1539 else
1540#endif
1541 if (paren == REG_PAREN)
1542 {
1543 /* Make a MOPEN node. */
1544 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001545 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001546 parno = regnpar;
1547 ++regnpar;
1548 ret = regnode(MOPEN + parno);
1549 }
1550 else if (paren == REG_NPAREN)
1551 {
1552 /* Make a NOPEN node. */
1553 ret = regnode(NOPEN);
1554 }
1555 else
1556 ret = NULL;
1557
1558 /* Pick up the branches, linking them together. */
1559 br = regbranch(&flags);
1560 if (br == NULL)
1561 return NULL;
1562 if (ret != NULL)
1563 regtail(ret, br); /* [MZ]OPEN -> first. */
1564 else
1565 ret = br;
1566 /* If one of the branches can be zero-width, the whole thing can.
1567 * If one of the branches has * at start or matches a line-break, the
1568 * whole thing can. */
1569 if (!(flags & HASWIDTH))
1570 *flagp &= ~HASWIDTH;
1571 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1572 while (peekchr() == Magic('|'))
1573 {
1574 skipchr();
1575 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001576 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001577 return NULL;
1578 regtail(ret, br); /* BRANCH -> BRANCH. */
1579 if (!(flags & HASWIDTH))
1580 *flagp &= ~HASWIDTH;
1581 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1582 }
1583
1584 /* Make a closing node, and hook it on the end. */
1585 ender = regnode(
1586#ifdef FEAT_SYN_HL
1587 paren == REG_ZPAREN ? ZCLOSE + parno :
1588#endif
1589 paren == REG_PAREN ? MCLOSE + parno :
1590 paren == REG_NPAREN ? NCLOSE : END);
1591 regtail(ret, ender);
1592
1593 /* Hook the tails of the branches to the closing node. */
1594 for (br = ret; br != NULL; br = regnext(br))
1595 regoptail(br, ender);
1596
1597 /* Check for proper termination. */
1598 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1599 {
1600#ifdef FEAT_SYN_HL
1601 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001602 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001603 else
1604#endif
1605 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001606 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001607 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001608 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609 }
1610 else if (paren == REG_NOPAREN && peekchr() != NUL)
1611 {
1612 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001613 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001614 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001615 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001616 /* NOTREACHED */
1617 }
1618 /*
1619 * Here we set the flag allowing back references to this set of
1620 * parentheses.
1621 */
1622 if (paren == REG_PAREN)
1623 had_endbrace[parno] = TRUE; /* have seen the close paren */
1624 return ret;
1625}
1626
1627/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001628 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001629 * Implements the & operator.
1630 */
1631 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001632regbranch(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001633{
1634 char_u *ret;
1635 char_u *chain = NULL;
1636 char_u *latest;
1637 int flags;
1638
1639 *flagp = WORST | HASNL; /* Tentatively. */
1640
1641 ret = regnode(BRANCH);
1642 for (;;)
1643 {
1644 latest = regconcat(&flags);
1645 if (latest == NULL)
1646 return NULL;
1647 /* If one of the branches has width, the whole thing has. If one of
1648 * the branches anchors at start-of-line, the whole thing does.
1649 * If one of the branches uses look-behind, the whole thing does. */
1650 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1651 /* If one of the branches doesn't match a line-break, the whole thing
1652 * doesn't. */
1653 *flagp &= ~HASNL | (flags & HASNL);
1654 if (chain != NULL)
1655 regtail(chain, latest);
1656 if (peekchr() != Magic('&'))
1657 break;
1658 skipchr();
1659 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001660 if (reg_toolong)
1661 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001662 reginsert(MATCH, latest);
1663 chain = latest;
1664 }
1665
1666 return ret;
1667}
1668
1669/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001670 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001671 * Implements the concatenation operator.
1672 */
1673 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001674regconcat(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001675{
1676 char_u *first = NULL;
1677 char_u *chain = NULL;
1678 char_u *latest;
1679 int flags;
1680 int cont = TRUE;
1681
1682 *flagp = WORST; /* Tentatively. */
1683
1684 while (cont)
1685 {
1686 switch (peekchr())
1687 {
1688 case NUL:
1689 case Magic('|'):
1690 case Magic('&'):
1691 case Magic(')'):
1692 cont = FALSE;
1693 break;
1694 case Magic('Z'):
1695#ifdef FEAT_MBYTE
1696 regflags |= RF_ICOMBINE;
1697#endif
1698 skipchr_keepstart();
1699 break;
1700 case Magic('c'):
1701 regflags |= RF_ICASE;
1702 skipchr_keepstart();
1703 break;
1704 case Magic('C'):
1705 regflags |= RF_NOICASE;
1706 skipchr_keepstart();
1707 break;
1708 case Magic('v'):
1709 reg_magic = MAGIC_ALL;
1710 skipchr_keepstart();
1711 curchr = -1;
1712 break;
1713 case Magic('m'):
1714 reg_magic = MAGIC_ON;
1715 skipchr_keepstart();
1716 curchr = -1;
1717 break;
1718 case Magic('M'):
1719 reg_magic = MAGIC_OFF;
1720 skipchr_keepstart();
1721 curchr = -1;
1722 break;
1723 case Magic('V'):
1724 reg_magic = MAGIC_NONE;
1725 skipchr_keepstart();
1726 curchr = -1;
1727 break;
1728 default:
1729 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001730 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001731 return NULL;
1732 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1733 if (chain == NULL) /* First piece. */
1734 *flagp |= flags & SPSTART;
1735 else
1736 regtail(chain, latest);
1737 chain = latest;
1738 if (first == NULL)
1739 first = latest;
1740 break;
1741 }
1742 }
1743 if (first == NULL) /* Loop ran zero times. */
1744 first = regnode(NOTHING);
1745 return first;
1746}
1747
1748/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001749 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001750 *
1751 * Note that the branching code sequences used for = and the general cases
1752 * of * and + are somewhat optimized: they use the same NOTHING node as
1753 * both the endmarker for their branch list and the body of the last branch.
1754 * It might seem that this node could be dispensed with entirely, but the
1755 * endmarker role is not redundant.
1756 */
1757 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001758regpiece(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001759{
1760 char_u *ret;
1761 int op;
1762 char_u *next;
1763 int flags;
1764 long minval;
1765 long maxval;
1766
1767 ret = regatom(&flags);
1768 if (ret == NULL)
1769 return NULL;
1770
1771 op = peekchr();
1772 if (re_multi_type(op) == NOT_MULTI)
1773 {
1774 *flagp = flags;
1775 return ret;
1776 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001777 /* default flags */
1778 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1779
1780 skipchr();
1781 switch (op)
1782 {
1783 case Magic('*'):
1784 if (flags & SIMPLE)
1785 reginsert(STAR, ret);
1786 else
1787 {
1788 /* Emit x* as (x&|), where & means "self". */
1789 reginsert(BRANCH, ret); /* Either x */
1790 regoptail(ret, regnode(BACK)); /* and loop */
1791 regoptail(ret, ret); /* back */
1792 regtail(ret, regnode(BRANCH)); /* or */
1793 regtail(ret, regnode(NOTHING)); /* null. */
1794 }
1795 break;
1796
1797 case Magic('+'):
1798 if (flags & SIMPLE)
1799 reginsert(PLUS, ret);
1800 else
1801 {
1802 /* Emit x+ as x(&|), where & means "self". */
1803 next = regnode(BRANCH); /* Either */
1804 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001805 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001806 regtail(next, regnode(BRANCH)); /* or */
1807 regtail(ret, regnode(NOTHING)); /* null. */
1808 }
1809 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1810 break;
1811
1812 case Magic('@'):
1813 {
1814 int lop = END;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001815 int nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001816
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001817 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001818 switch (no_Magic(getchr()))
1819 {
1820 case '=': lop = MATCH; break; /* \@= */
1821 case '!': lop = NOMATCH; break; /* \@! */
1822 case '>': lop = SUBPAT; break; /* \@> */
1823 case '<': switch (no_Magic(getchr()))
1824 {
1825 case '=': lop = BEHIND; break; /* \@<= */
1826 case '!': lop = NOBEHIND; break; /* \@<! */
1827 }
1828 }
1829 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001830 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001831 reg_magic == MAGIC_ALL);
1832 /* Look behind must match with behind_pos. */
1833 if (lop == BEHIND || lop == NOBEHIND)
1834 {
1835 regtail(ret, regnode(BHPOS));
1836 *flagp |= HASLOOKBH;
1837 }
1838 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001839 if (lop == BEHIND || lop == NOBEHIND)
1840 {
1841 if (nr < 0)
1842 nr = 0; /* no limit is same as zero limit */
1843 reginsert_nr(lop, nr, ret);
1844 }
1845 else
1846 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001847 break;
1848 }
1849
1850 case Magic('?'):
1851 case Magic('='):
1852 /* Emit x= as (x|) */
1853 reginsert(BRANCH, ret); /* Either x */
1854 regtail(ret, regnode(BRANCH)); /* or */
1855 next = regnode(NOTHING); /* null. */
1856 regtail(ret, next);
1857 regoptail(ret, next);
1858 break;
1859
1860 case Magic('{'):
1861 if (!read_limits(&minval, &maxval))
1862 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001863 if (flags & SIMPLE)
1864 {
1865 reginsert(BRACE_SIMPLE, ret);
1866 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1867 }
1868 else
1869 {
1870 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001871 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001872 reg_magic == MAGIC_ALL);
1873 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1874 regoptail(ret, regnode(BACK));
1875 regoptail(ret, ret);
1876 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1877 ++num_complex_braces;
1878 }
1879 if (minval > 0 && maxval > 0)
1880 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1881 break;
1882 }
1883 if (re_multi_type(peekchr()) != NOT_MULTI)
1884 {
1885 /* Can't have a multi follow a multi. */
1886 if (peekchr() == Magic('*'))
1887 sprintf((char *)IObuff, _("E61: Nested %s*"),
1888 reg_magic >= MAGIC_ON ? "" : "\\");
1889 else
1890 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1891 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1892 EMSG_RET_NULL(IObuff);
1893 }
1894
1895 return ret;
1896}
1897
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001898/* When making changes to classchars also change nfa_classcodes. */
1899static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1900static int classcodes[] = {
1901 ANY, IDENT, SIDENT, KWORD, SKWORD,
1902 FNAME, SFNAME, PRINT, SPRINT,
1903 WHITE, NWHITE, DIGIT, NDIGIT,
1904 HEX, NHEX, OCTAL, NOCTAL,
1905 WORD, NWORD, HEAD, NHEAD,
1906 ALPHA, NALPHA, LOWER, NLOWER,
1907 UPPER, NUPPER
1908};
1909
Bram Moolenaar071d4272004-06-13 20:20:40 +00001910/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001911 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001912 *
1913 * Optimization: gobbles an entire sequence of ordinary characters so that
1914 * it can turn them into a single node, which is smaller to store and
1915 * faster to run. Don't do this when one_exactly is set.
1916 */
1917 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01001918regatom(int *flagp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001919{
1920 char_u *ret;
1921 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001922 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001923 char_u *p;
1924 int extra = 0;
Bram Moolenaar7c29f382016-02-12 19:08:15 +01001925 int save_prev_at_start = prev_at_start;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001926
1927 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001928
1929 c = getchr();
1930 switch (c)
1931 {
1932 case Magic('^'):
1933 ret = regnode(BOL);
1934 break;
1935
1936 case Magic('$'):
1937 ret = regnode(EOL);
1938#if defined(FEAT_SYN_HL) || defined(PROTO)
1939 had_eol = TRUE;
1940#endif
1941 break;
1942
1943 case Magic('<'):
1944 ret = regnode(BOW);
1945 break;
1946
1947 case Magic('>'):
1948 ret = regnode(EOW);
1949 break;
1950
1951 case Magic('_'):
1952 c = no_Magic(getchr());
1953 if (c == '^') /* "\_^" is start-of-line */
1954 {
1955 ret = regnode(BOL);
1956 break;
1957 }
1958 if (c == '$') /* "\_$" is end-of-line */
1959 {
1960 ret = regnode(EOL);
1961#if defined(FEAT_SYN_HL) || defined(PROTO)
1962 had_eol = TRUE;
1963#endif
1964 break;
1965 }
1966
1967 extra = ADD_NL;
1968 *flagp |= HASNL;
1969
1970 /* "\_[" is character range plus newline */
1971 if (c == '[')
1972 goto collection;
1973
1974 /* "\_x" is character class plus newline */
1975 /*FALLTHROUGH*/
1976
1977 /*
1978 * Character classes.
1979 */
1980 case Magic('.'):
1981 case Magic('i'):
1982 case Magic('I'):
1983 case Magic('k'):
1984 case Magic('K'):
1985 case Magic('f'):
1986 case Magic('F'):
1987 case Magic('p'):
1988 case Magic('P'):
1989 case Magic('s'):
1990 case Magic('S'):
1991 case Magic('d'):
1992 case Magic('D'):
1993 case Magic('x'):
1994 case Magic('X'):
1995 case Magic('o'):
1996 case Magic('O'):
1997 case Magic('w'):
1998 case Magic('W'):
1999 case Magic('h'):
2000 case Magic('H'):
2001 case Magic('a'):
2002 case Magic('A'):
2003 case Magic('l'):
2004 case Magic('L'):
2005 case Magic('u'):
2006 case Magic('U'):
2007 p = vim_strchr(classchars, no_Magic(c));
2008 if (p == NULL)
2009 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002010#ifdef FEAT_MBYTE
2011 /* When '.' is followed by a composing char ignore the dot, so that
2012 * the composing char is matched here. */
2013 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2014 {
2015 c = getchr();
2016 goto do_multibyte;
2017 }
2018#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002019 ret = regnode(classcodes[p - classchars] + extra);
2020 *flagp |= HASWIDTH | SIMPLE;
2021 break;
2022
2023 case Magic('n'):
2024 if (reg_string)
2025 {
2026 /* In a string "\n" matches a newline character. */
2027 ret = regnode(EXACTLY);
2028 regc(NL);
2029 regc(NUL);
2030 *flagp |= HASWIDTH | SIMPLE;
2031 }
2032 else
2033 {
2034 /* In buffer text "\n" matches the end of a line. */
2035 ret = regnode(NEWL);
2036 *flagp |= HASWIDTH | HASNL;
2037 }
2038 break;
2039
2040 case Magic('('):
2041 if (one_exactly)
2042 EMSG_ONE_RET_NULL;
2043 ret = reg(REG_PAREN, &flags);
2044 if (ret == NULL)
2045 return NULL;
2046 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2047 break;
2048
2049 case NUL:
2050 case Magic('|'):
2051 case Magic('&'):
2052 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002053 if (one_exactly)
2054 EMSG_ONE_RET_NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002055 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
2056 /* NOTREACHED */
2057
2058 case Magic('='):
2059 case Magic('?'):
2060 case Magic('+'):
2061 case Magic('@'):
2062 case Magic('{'):
2063 case Magic('*'):
2064 c = no_Magic(c);
2065 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2066 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2067 ? "" : "\\", c);
2068 EMSG_RET_NULL(IObuff);
2069 /* NOTREACHED */
2070
2071 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002072 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002073 {
2074 char_u *lp;
2075
2076 ret = regnode(EXACTLY);
2077 lp = reg_prev_sub;
2078 while (*lp != NUL)
2079 regc(*lp++);
2080 regc(NUL);
2081 if (*reg_prev_sub != NUL)
2082 {
2083 *flagp |= HASWIDTH;
2084 if ((lp - reg_prev_sub) == 1)
2085 *flagp |= SIMPLE;
2086 }
2087 }
2088 else
2089 EMSG_RET_NULL(_(e_nopresub));
2090 break;
2091
2092 case Magic('1'):
2093 case Magic('2'):
2094 case Magic('3'):
2095 case Magic('4'):
2096 case Magic('5'):
2097 case Magic('6'):
2098 case Magic('7'):
2099 case Magic('8'):
2100 case Magic('9'):
2101 {
2102 int refnum;
2103
2104 refnum = c - Magic('0');
2105 /*
2106 * Check if the back reference is legal. We must have seen the
2107 * close brace.
2108 * TODO: Should also check that we don't refer to something
2109 * that is repeated (+*=): what instance of the repetition
2110 * should we match?
2111 */
2112 if (!had_endbrace[refnum])
2113 {
2114 /* Trick: check if "@<=" or "@<!" follows, in which case
2115 * the \1 can appear before the referenced match. */
2116 for (p = regparse; *p != NUL; ++p)
2117 if (p[0] == '@' && p[1] == '<'
2118 && (p[2] == '!' || p[2] == '='))
2119 break;
2120 if (*p == NUL)
2121 EMSG_RET_NULL(_("E65: Illegal back reference"));
2122 }
2123 ret = regnode(BACKREF + refnum);
2124 }
2125 break;
2126
Bram Moolenaar071d4272004-06-13 20:20:40 +00002127 case Magic('z'):
2128 {
2129 c = no_Magic(getchr());
2130 switch (c)
2131 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002132#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002133 case '(': if (reg_do_extmatch != REX_SET)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002134 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002135 if (one_exactly)
2136 EMSG_ONE_RET_NULL;
2137 ret = reg(REG_ZPAREN, &flags);
2138 if (ret == NULL)
2139 return NULL;
2140 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2141 re_has_z = REX_SET;
2142 break;
2143
2144 case '1':
2145 case '2':
2146 case '3':
2147 case '4':
2148 case '5':
2149 case '6':
2150 case '7':
2151 case '8':
2152 case '9': if (reg_do_extmatch != REX_USE)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002153 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002154 ret = regnode(ZREF + c - '0');
2155 re_has_z = REX_USE;
2156 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002157#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002158
2159 case 's': ret = regnode(MOPEN + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002160 if (re_mult_next("\\zs") == FAIL)
2161 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002162 break;
2163
2164 case 'e': ret = regnode(MCLOSE + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002165 if (re_mult_next("\\ze") == FAIL)
2166 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002167 break;
2168
2169 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2170 }
2171 }
2172 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002173
2174 case Magic('%'):
2175 {
2176 c = no_Magic(getchr());
2177 switch (c)
2178 {
2179 /* () without a back reference */
2180 case '(':
2181 if (one_exactly)
2182 EMSG_ONE_RET_NULL;
2183 ret = reg(REG_NPAREN, &flags);
2184 if (ret == NULL)
2185 return NULL;
2186 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2187 break;
2188
2189 /* Catch \%^ and \%$ regardless of where they appear in the
2190 * pattern -- regardless of whether or not it makes sense. */
2191 case '^':
2192 ret = regnode(RE_BOF);
2193 break;
2194
2195 case '$':
2196 ret = regnode(RE_EOF);
2197 break;
2198
2199 case '#':
2200 ret = regnode(CURSOR);
2201 break;
2202
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002203 case 'V':
2204 ret = regnode(RE_VISUAL);
2205 break;
2206
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002207 case 'C':
2208 ret = regnode(RE_COMPOSING);
2209 break;
2210
Bram Moolenaar071d4272004-06-13 20:20:40 +00002211 /* \%[abc]: Emit as a list of branches, all ending at the last
2212 * branch which matches nothing. */
2213 case '[':
2214 if (one_exactly) /* doesn't nest */
2215 EMSG_ONE_RET_NULL;
2216 {
2217 char_u *lastbranch;
2218 char_u *lastnode = NULL;
2219 char_u *br;
2220
2221 ret = NULL;
2222 while ((c = getchr()) != ']')
2223 {
2224 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002225 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002226 reg_magic == MAGIC_ALL);
2227 br = regnode(BRANCH);
2228 if (ret == NULL)
2229 ret = br;
2230 else
2231 regtail(lastnode, br);
2232
2233 ungetchr();
2234 one_exactly = TRUE;
2235 lastnode = regatom(flagp);
2236 one_exactly = FALSE;
2237 if (lastnode == NULL)
2238 return NULL;
2239 }
2240 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002241 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002242 reg_magic == MAGIC_ALL);
2243 lastbranch = regnode(BRANCH);
2244 br = regnode(NOTHING);
2245 if (ret != JUST_CALC_SIZE)
2246 {
2247 regtail(lastnode, br);
2248 regtail(lastbranch, br);
2249 /* connect all branches to the NOTHING
2250 * branch at the end */
2251 for (br = ret; br != lastnode; )
2252 {
2253 if (OP(br) == BRANCH)
2254 {
2255 regtail(br, lastbranch);
2256 br = OPERAND(br);
2257 }
2258 else
2259 br = regnext(br);
2260 }
2261 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002262 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002263 break;
2264 }
2265
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002266 case 'd': /* %d123 decimal */
2267 case 'o': /* %o123 octal */
2268 case 'x': /* %xab hex 2 */
2269 case 'u': /* %uabcd hex 4 */
2270 case 'U': /* %U1234abcd hex 8 */
2271 {
2272 int i;
2273
2274 switch (c)
2275 {
2276 case 'd': i = getdecchrs(); break;
2277 case 'o': i = getoctchrs(); break;
2278 case 'x': i = gethexchrs(2); break;
2279 case 'u': i = gethexchrs(4); break;
2280 case 'U': i = gethexchrs(8); break;
2281 default: i = -1; break;
2282 }
2283
2284 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002285 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002286 _("E678: Invalid character after %s%%[dxouU]"),
2287 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002288#ifdef FEAT_MBYTE
2289 if (use_multibytecode(i))
2290 ret = regnode(MULTIBYTECODE);
2291 else
2292#endif
2293 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002294 if (i == 0)
2295 regc(0x0a);
2296 else
2297#ifdef FEAT_MBYTE
2298 regmbc(i);
2299#else
2300 regc(i);
2301#endif
2302 regc(NUL);
2303 *flagp |= HASWIDTH;
2304 break;
2305 }
2306
Bram Moolenaar071d4272004-06-13 20:20:40 +00002307 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002308 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2309 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002310 {
2311 long_u n = 0;
2312 int cmp;
2313
2314 cmp = c;
2315 if (cmp == '<' || cmp == '>')
2316 c = getchr();
2317 while (VIM_ISDIGIT(c))
2318 {
2319 n = n * 10 + (c - '0');
2320 c = getchr();
2321 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002322 if (c == '\'' && n == 0)
2323 {
2324 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2325 c = getchr();
2326 ret = regnode(RE_MARK);
2327 if (ret == JUST_CALC_SIZE)
2328 regsize += 2;
2329 else
2330 {
2331 *regcode++ = c;
2332 *regcode++ = cmp;
2333 }
2334 break;
2335 }
2336 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002337 {
2338 if (c == 'l')
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002339 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002340 ret = regnode(RE_LNUM);
Bram Moolenaar7c29f382016-02-12 19:08:15 +01002341 if (save_prev_at_start)
2342 at_start = TRUE;
2343 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002344 else if (c == 'c')
2345 ret = regnode(RE_COL);
2346 else
2347 ret = regnode(RE_VCOL);
2348 if (ret == JUST_CALC_SIZE)
2349 regsize += 5;
2350 else
2351 {
2352 /* put the number and the optional
2353 * comparator after the opcode */
2354 regcode = re_put_long(regcode, n);
2355 *regcode++ = cmp;
2356 }
2357 break;
2358 }
2359 }
2360
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002361 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002362 reg_magic == MAGIC_ALL);
2363 }
2364 }
2365 break;
2366
2367 case Magic('['):
2368collection:
2369 {
2370 char_u *lp;
2371
2372 /*
2373 * If there is no matching ']', we assume the '[' is a normal
2374 * character. This makes 'incsearch' and ":help [" work.
2375 */
2376 lp = skip_anyof(regparse);
2377 if (*lp == ']') /* there is a matching ']' */
2378 {
2379 int startc = -1; /* > 0 when next '-' is a range */
2380 int endc;
2381
2382 /*
2383 * In a character class, different parsing rules apply.
2384 * Not even \ is special anymore, nothing is.
2385 */
2386 if (*regparse == '^') /* Complement of range. */
2387 {
2388 ret = regnode(ANYBUT + extra);
2389 regparse++;
2390 }
2391 else
2392 ret = regnode(ANYOF + extra);
2393
2394 /* At the start ']' and '-' mean the literal character. */
2395 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002396 {
2397 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002398 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002399 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002400
2401 while (*regparse != NUL && *regparse != ']')
2402 {
2403 if (*regparse == '-')
2404 {
2405 ++regparse;
2406 /* The '-' is not used for a range at the end and
2407 * after or before a '\n'. */
2408 if (*regparse == ']' || *regparse == NUL
2409 || startc == -1
2410 || (regparse[0] == '\\' && regparse[1] == 'n'))
2411 {
2412 regc('-');
2413 startc = '-'; /* [--x] is a range */
2414 }
2415 else
2416 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002417 /* Also accept "a-[.z.]" */
2418 endc = 0;
2419 if (*regparse == '[')
2420 endc = get_coll_element(&regparse);
2421 if (endc == 0)
2422 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002423#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002424 if (has_mbyte)
2425 endc = mb_ptr2char_adv(&regparse);
2426 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002427#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002428 endc = *regparse++;
2429 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002430
2431 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002432 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002433 endc = coll_get_char();
2434
Bram Moolenaar071d4272004-06-13 20:20:40 +00002435 if (startc > endc)
2436 EMSG_RET_NULL(_(e_invrange));
2437#ifdef FEAT_MBYTE
2438 if (has_mbyte && ((*mb_char2len)(startc) > 1
2439 || (*mb_char2len)(endc) > 1))
2440 {
2441 /* Limit to a range of 256 chars */
2442 if (endc > startc + 256)
2443 EMSG_RET_NULL(_(e_invrange));
2444 while (++startc <= endc)
2445 regmbc(startc);
2446 }
2447 else
2448#endif
2449 {
2450#ifdef EBCDIC
2451 int alpha_only = FALSE;
2452
2453 /* for alphabetical range skip the gaps
2454 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2455 if (isalpha(startc) && isalpha(endc))
2456 alpha_only = TRUE;
2457#endif
2458 while (++startc <= endc)
2459#ifdef EBCDIC
2460 if (!alpha_only || isalpha(startc))
2461#endif
2462 regc(startc);
2463 }
2464 startc = -1;
2465 }
2466 }
2467 /*
2468 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2469 * accepts "\t", "\e", etc., but only when the 'l' flag in
2470 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002471 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002472 */
2473 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002474 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002475 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002476 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002477 && vim_strchr(REGEXP_ABBR,
2478 regparse[1]) != NULL)))
2479 {
2480 regparse++;
2481 if (*regparse == 'n')
2482 {
2483 /* '\n' in range: also match NL */
2484 if (ret != JUST_CALC_SIZE)
2485 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002486 /* Using \n inside [^] does not change what
2487 * matches. "[^\n]" is the same as ".". */
2488 if (*ret == ANYOF)
2489 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002490 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002491 *flagp |= HASNL;
2492 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002493 /* else: must have had a \n already */
2494 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002495 regparse++;
2496 startc = -1;
2497 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002498 else if (*regparse == 'd'
2499 || *regparse == 'o'
2500 || *regparse == 'x'
2501 || *regparse == 'u'
2502 || *regparse == 'U')
2503 {
2504 startc = coll_get_char();
2505 if (startc == 0)
2506 regc(0x0a);
2507 else
2508#ifdef FEAT_MBYTE
2509 regmbc(startc);
2510#else
2511 regc(startc);
2512#endif
2513 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002514 else
2515 {
2516 startc = backslash_trans(*regparse++);
2517 regc(startc);
2518 }
2519 }
2520 else if (*regparse == '[')
2521 {
2522 int c_class;
2523 int cu;
2524
Bram Moolenaardf177f62005-02-22 08:39:57 +00002525 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002526 startc = -1;
2527 /* Characters assumed to be 8 bits! */
2528 switch (c_class)
2529 {
2530 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002531 c_class = get_equi_class(&regparse);
2532 if (c_class != 0)
2533 {
2534 /* produce equivalence class */
2535 reg_equi_class(c_class);
2536 }
2537 else if ((c_class =
2538 get_coll_element(&regparse)) != 0)
2539 {
2540 /* produce a collating element */
2541 regmbc(c_class);
2542 }
2543 else
2544 {
2545 /* literal '[', allow [[-x] as a range */
2546 startc = *regparse++;
2547 regc(startc);
2548 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002549 break;
2550 case CLASS_ALNUM:
2551 for (cu = 1; cu <= 255; cu++)
2552 if (isalnum(cu))
2553 regc(cu);
2554 break;
2555 case CLASS_ALPHA:
2556 for (cu = 1; cu <= 255; cu++)
2557 if (isalpha(cu))
2558 regc(cu);
2559 break;
2560 case CLASS_BLANK:
2561 regc(' ');
2562 regc('\t');
2563 break;
2564 case CLASS_CNTRL:
2565 for (cu = 1; cu <= 255; cu++)
2566 if (iscntrl(cu))
2567 regc(cu);
2568 break;
2569 case CLASS_DIGIT:
2570 for (cu = 1; cu <= 255; cu++)
2571 if (VIM_ISDIGIT(cu))
2572 regc(cu);
2573 break;
2574 case CLASS_GRAPH:
2575 for (cu = 1; cu <= 255; cu++)
2576 if (isgraph(cu))
2577 regc(cu);
2578 break;
2579 case CLASS_LOWER:
2580 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002581 if (MB_ISLOWER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002582 regc(cu);
2583 break;
2584 case CLASS_PRINT:
2585 for (cu = 1; cu <= 255; cu++)
2586 if (vim_isprintc(cu))
2587 regc(cu);
2588 break;
2589 case CLASS_PUNCT:
2590 for (cu = 1; cu <= 255; cu++)
2591 if (ispunct(cu))
2592 regc(cu);
2593 break;
2594 case CLASS_SPACE:
2595 for (cu = 9; cu <= 13; cu++)
2596 regc(cu);
2597 regc(' ');
2598 break;
2599 case CLASS_UPPER:
2600 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002601 if (MB_ISUPPER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002602 regc(cu);
2603 break;
2604 case CLASS_XDIGIT:
2605 for (cu = 1; cu <= 255; cu++)
2606 if (vim_isxdigit(cu))
2607 regc(cu);
2608 break;
2609 case CLASS_TAB:
2610 regc('\t');
2611 break;
2612 case CLASS_RETURN:
2613 regc('\r');
2614 break;
2615 case CLASS_BACKSPACE:
2616 regc('\b');
2617 break;
2618 case CLASS_ESCAPE:
2619 regc('\033');
2620 break;
2621 }
2622 }
2623 else
2624 {
2625#ifdef FEAT_MBYTE
2626 if (has_mbyte)
2627 {
2628 int len;
2629
2630 /* produce a multibyte character, including any
2631 * following composing characters */
2632 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002633 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002634 if (enc_utf8 && utf_char2len(startc) != len)
2635 startc = -1; /* composing chars */
2636 while (--len >= 0)
2637 regc(*regparse++);
2638 }
2639 else
2640#endif
2641 {
2642 startc = *regparse++;
2643 regc(startc);
2644 }
2645 }
2646 }
2647 regc(NUL);
2648 prevchr_len = 1; /* last char was the ']' */
2649 if (*regparse != ']')
2650 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2651 skipchr(); /* let's be friends with the lexer again */
2652 *flagp |= HASWIDTH | SIMPLE;
2653 break;
2654 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002655 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002656 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002657 }
2658 /* FALLTHROUGH */
2659
2660 default:
2661 {
2662 int len;
2663
2664#ifdef FEAT_MBYTE
2665 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002666 * before a multi and when it's a composing char. */
2667 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002668 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002669do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002670 ret = regnode(MULTIBYTECODE);
2671 regmbc(c);
2672 *flagp |= HASWIDTH | SIMPLE;
2673 break;
2674 }
2675#endif
2676
2677 ret = regnode(EXACTLY);
2678
2679 /*
2680 * Append characters as long as:
2681 * - there is no following multi, we then need the character in
2682 * front of it as a single character operand
2683 * - not running into a Magic character
2684 * - "one_exactly" is not set
2685 * But always emit at least one character. Might be a Multi,
2686 * e.g., a "[" without matching "]".
2687 */
2688 for (len = 0; c != NUL && (len == 0
2689 || (re_multi_type(peekchr()) == NOT_MULTI
2690 && !one_exactly
2691 && !is_Magic(c))); ++len)
2692 {
2693 c = no_Magic(c);
2694#ifdef FEAT_MBYTE
2695 if (has_mbyte)
2696 {
2697 regmbc(c);
2698 if (enc_utf8)
2699 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002700 int l;
2701
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002702 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002703 for (;;)
2704 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002705 l = utf_ptr2len(regparse);
2706 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002707 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002708 regmbc(utf_ptr2char(regparse));
2709 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002710 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002711 }
2712 }
2713 else
2714#endif
2715 regc(c);
2716 c = getchr();
2717 }
2718 ungetchr();
2719
2720 regc(NUL);
2721 *flagp |= HASWIDTH;
2722 if (len == 1)
2723 *flagp |= SIMPLE;
2724 }
2725 break;
2726 }
2727
2728 return ret;
2729}
2730
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002731#ifdef FEAT_MBYTE
2732/*
2733 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2734 * character "c".
2735 */
2736 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01002737use_multibytecode(int c)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002738{
2739 return has_mbyte && (*mb_char2len)(c) > 1
2740 && (re_multi_type(peekchr()) != NOT_MULTI
2741 || (enc_utf8 && utf_iscomposing(c)));
2742}
2743#endif
2744
Bram Moolenaar071d4272004-06-13 20:20:40 +00002745/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002746 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002747 * Return pointer to generated code.
2748 */
2749 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002750regnode(int op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002751{
2752 char_u *ret;
2753
2754 ret = regcode;
2755 if (ret == JUST_CALC_SIZE)
2756 regsize += 3;
2757 else
2758 {
2759 *regcode++ = op;
2760 *regcode++ = NUL; /* Null "next" pointer. */
2761 *regcode++ = NUL;
2762 }
2763 return ret;
2764}
2765
2766/*
2767 * Emit (if appropriate) a byte of code
2768 */
2769 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002770regc(int b)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002771{
2772 if (regcode == JUST_CALC_SIZE)
2773 regsize++;
2774 else
2775 *regcode++ = b;
2776}
2777
2778#ifdef FEAT_MBYTE
2779/*
2780 * Emit (if appropriate) a multi-byte character of code
2781 */
2782 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002783regmbc(int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002784{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002785 if (!has_mbyte && c > 0xff)
2786 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002787 if (regcode == JUST_CALC_SIZE)
2788 regsize += (*mb_char2len)(c);
2789 else
2790 regcode += (*mb_char2bytes)(c, regcode);
2791}
2792#endif
2793
2794/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002795 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002796 *
2797 * Means relocating the operand.
2798 */
2799 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002800reginsert(int op, char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002801{
2802 char_u *src;
2803 char_u *dst;
2804 char_u *place;
2805
2806 if (regcode == JUST_CALC_SIZE)
2807 {
2808 regsize += 3;
2809 return;
2810 }
2811 src = regcode;
2812 regcode += 3;
2813 dst = regcode;
2814 while (src > opnd)
2815 *--dst = *--src;
2816
2817 place = opnd; /* Op node, where operand used to be. */
2818 *place++ = op;
2819 *place++ = NUL;
2820 *place = NUL;
2821}
2822
2823/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002824 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002825 * Add a number to the operator.
2826 */
2827 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002828reginsert_nr(int op, long val, char_u *opnd)
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002829{
2830 char_u *src;
2831 char_u *dst;
2832 char_u *place;
2833
2834 if (regcode == JUST_CALC_SIZE)
2835 {
2836 regsize += 7;
2837 return;
2838 }
2839 src = regcode;
2840 regcode += 7;
2841 dst = regcode;
2842 while (src > opnd)
2843 *--dst = *--src;
2844
2845 place = opnd; /* Op node, where operand used to be. */
2846 *place++ = op;
2847 *place++ = NUL;
2848 *place++ = NUL;
2849 place = re_put_long(place, (long_u)val);
2850}
2851
2852/*
2853 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002854 * The operator has the given limit values as operands. Also set next pointer.
2855 *
2856 * Means relocating the operand.
2857 */
2858 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002859reginsert_limits(
2860 int op,
2861 long minval,
2862 long maxval,
2863 char_u *opnd)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002864{
2865 char_u *src;
2866 char_u *dst;
2867 char_u *place;
2868
2869 if (regcode == JUST_CALC_SIZE)
2870 {
2871 regsize += 11;
2872 return;
2873 }
2874 src = regcode;
2875 regcode += 11;
2876 dst = regcode;
2877 while (src > opnd)
2878 *--dst = *--src;
2879
2880 place = opnd; /* Op node, where operand used to be. */
2881 *place++ = op;
2882 *place++ = NUL;
2883 *place++ = NUL;
2884 place = re_put_long(place, (long_u)minval);
2885 place = re_put_long(place, (long_u)maxval);
2886 regtail(opnd, place);
2887}
2888
2889/*
2890 * Write a long as four bytes at "p" and return pointer to the next char.
2891 */
2892 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01002893re_put_long(char_u *p, long_u val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002894{
2895 *p++ = (char_u) ((val >> 24) & 0377);
2896 *p++ = (char_u) ((val >> 16) & 0377);
2897 *p++ = (char_u) ((val >> 8) & 0377);
2898 *p++ = (char_u) (val & 0377);
2899 return p;
2900}
2901
2902/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002903 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002904 */
2905 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002906regtail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002907{
2908 char_u *scan;
2909 char_u *temp;
2910 int offset;
2911
2912 if (p == JUST_CALC_SIZE)
2913 return;
2914
2915 /* Find last node. */
2916 scan = p;
2917 for (;;)
2918 {
2919 temp = regnext(scan);
2920 if (temp == NULL)
2921 break;
2922 scan = temp;
2923 }
2924
Bram Moolenaar582fd852005-03-28 20:58:01 +00002925 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002926 offset = (int)(scan - val);
2927 else
2928 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002929 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002930 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002931 * values in too many places. */
2932 if (offset > 0xffff)
2933 reg_toolong = TRUE;
2934 else
2935 {
2936 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2937 *(scan + 2) = (char_u) (offset & 0377);
2938 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002939}
2940
2941/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002942 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002943 */
2944 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002945regoptail(char_u *p, char_u *val)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002946{
2947 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2948 if (p == NULL || p == JUST_CALC_SIZE
2949 || (OP(p) != BRANCH
2950 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2951 return;
2952 regtail(OPERAND(p), val);
2953}
2954
2955/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002956 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002957 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002958/*
2959 * Start parsing at "str".
2960 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002961 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002962initchr(char_u *str)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002963{
2964 regparse = str;
2965 prevchr_len = 0;
2966 curchr = prevprevchr = prevchr = nextchr = -1;
2967 at_start = TRUE;
2968 prev_at_start = FALSE;
2969}
2970
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002971/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002972 * Save the current parse state, so that it can be restored and parsing
2973 * starts in the same state again.
2974 */
2975 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002976save_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002977{
2978 ps->regparse = regparse;
2979 ps->prevchr_len = prevchr_len;
2980 ps->curchr = curchr;
2981 ps->prevchr = prevchr;
2982 ps->prevprevchr = prevprevchr;
2983 ps->nextchr = nextchr;
2984 ps->at_start = at_start;
2985 ps->prev_at_start = prev_at_start;
2986 ps->regnpar = regnpar;
2987}
2988
2989/*
2990 * Restore a previously saved parse state.
2991 */
2992 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01002993restore_parse_state(parse_state_T *ps)
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002994{
2995 regparse = ps->regparse;
2996 prevchr_len = ps->prevchr_len;
2997 curchr = ps->curchr;
2998 prevchr = ps->prevchr;
2999 prevprevchr = ps->prevprevchr;
3000 nextchr = ps->nextchr;
3001 at_start = ps->at_start;
3002 prev_at_start = ps->prev_at_start;
3003 regnpar = ps->regnpar;
3004}
3005
3006
3007/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003008 * Get the next character without advancing.
3009 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003010 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003011peekchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003012{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003013 static int after_slash = FALSE;
3014
Bram Moolenaar071d4272004-06-13 20:20:40 +00003015 if (curchr == -1)
3016 {
3017 switch (curchr = regparse[0])
3018 {
3019 case '.':
3020 case '[':
3021 case '~':
3022 /* magic when 'magic' is on */
3023 if (reg_magic >= MAGIC_ON)
3024 curchr = Magic(curchr);
3025 break;
3026 case '(':
3027 case ')':
3028 case '{':
3029 case '%':
3030 case '+':
3031 case '=':
3032 case '?':
3033 case '@':
3034 case '!':
3035 case '&':
3036 case '|':
3037 case '<':
3038 case '>':
3039 case '#': /* future ext. */
3040 case '"': /* future ext. */
3041 case '\'': /* future ext. */
3042 case ',': /* future ext. */
3043 case '-': /* future ext. */
3044 case ':': /* future ext. */
3045 case ';': /* future ext. */
3046 case '`': /* future ext. */
3047 case '/': /* Can't be used in / command */
3048 /* magic only after "\v" */
3049 if (reg_magic == MAGIC_ALL)
3050 curchr = Magic(curchr);
3051 break;
3052 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003053 /* * is not magic as the very first character, eg "?*ptr", when
3054 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3055 * "\(\*" is not magic, thus must be magic if "after_slash" */
3056 if (reg_magic >= MAGIC_ON
3057 && !at_start
3058 && !(prev_at_start && prevchr == Magic('^'))
3059 && (after_slash
3060 || (prevchr != Magic('(')
3061 && prevchr != Magic('&')
3062 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003063 curchr = Magic('*');
3064 break;
3065 case '^':
3066 /* '^' is only magic as the very first character and if it's after
3067 * "\(", "\|", "\&' or "\n" */
3068 if (reg_magic >= MAGIC_OFF
3069 && (at_start
3070 || reg_magic == MAGIC_ALL
3071 || prevchr == Magic('(')
3072 || prevchr == Magic('|')
3073 || prevchr == Magic('&')
3074 || prevchr == Magic('n')
3075 || (no_Magic(prevchr) == '('
3076 && prevprevchr == Magic('%'))))
3077 {
3078 curchr = Magic('^');
3079 at_start = TRUE;
3080 prev_at_start = FALSE;
3081 }
3082 break;
3083 case '$':
3084 /* '$' is only magic as the very last char and if it's in front of
3085 * either "\|", "\)", "\&", or "\n" */
3086 if (reg_magic >= MAGIC_OFF)
3087 {
3088 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003089 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003090
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003091 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003092 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003093 || p[1] == 'm' || p[1] == 'M'
3094 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3095 {
3096 if (p[1] == 'v')
3097 is_magic_all = TRUE;
3098 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3099 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003100 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003101 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003102 if (p[0] == NUL
3103 || (p[0] == '\\'
3104 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3105 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003106 || (is_magic_all
3107 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003108 || reg_magic == MAGIC_ALL)
3109 curchr = Magic('$');
3110 }
3111 break;
3112 case '\\':
3113 {
3114 int c = regparse[1];
3115
3116 if (c == NUL)
3117 curchr = '\\'; /* trailing '\' */
3118 else if (
3119#ifdef EBCDIC
3120 vim_strchr(META, c)
3121#else
3122 c <= '~' && META_flags[c]
3123#endif
3124 )
3125 {
3126 /*
3127 * META contains everything that may be magic sometimes,
3128 * except ^ and $ ("\^" and "\$" are only magic after
Bram Moolenaarb878bbb2015-06-09 20:39:24 +02003129 * "\V"). We now fetch the next character and toggle its
Bram Moolenaar071d4272004-06-13 20:20:40 +00003130 * magicness. Therefore, \ is so meta-magic that it is
3131 * not in META.
3132 */
3133 curchr = -1;
3134 prev_at_start = at_start;
3135 at_start = FALSE; /* be able to say "/\*ptr" */
3136 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003137 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003138 peekchr();
3139 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003140 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003141 curchr = toggle_Magic(curchr);
3142 }
3143 else if (vim_strchr(REGEXP_ABBR, c))
3144 {
3145 /*
3146 * Handle abbreviations, like "\t" for TAB -- webb
3147 */
3148 curchr = backslash_trans(c);
3149 }
3150 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3151 curchr = toggle_Magic(c);
3152 else
3153 {
3154 /*
3155 * Next character can never be (made) magic?
3156 * Then backslashing it won't do anything.
3157 */
3158#ifdef FEAT_MBYTE
3159 if (has_mbyte)
3160 curchr = (*mb_ptr2char)(regparse + 1);
3161 else
3162#endif
3163 curchr = c;
3164 }
3165 break;
3166 }
3167
3168#ifdef FEAT_MBYTE
3169 default:
3170 if (has_mbyte)
3171 curchr = (*mb_ptr2char)(regparse);
3172#endif
3173 }
3174 }
3175
3176 return curchr;
3177}
3178
3179/*
3180 * Eat one lexed character. Do this in a way that we can undo it.
3181 */
3182 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003183skipchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003184{
3185 /* peekchr() eats a backslash, do the same here */
3186 if (*regparse == '\\')
3187 prevchr_len = 1;
3188 else
3189 prevchr_len = 0;
3190 if (regparse[prevchr_len] != NUL)
3191 {
3192#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003193 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003194 /* exclude composing chars that mb_ptr2len does include */
3195 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003196 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003197 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003198 else
3199#endif
3200 ++prevchr_len;
3201 }
3202 regparse += prevchr_len;
3203 prev_at_start = at_start;
3204 at_start = FALSE;
3205 prevprevchr = prevchr;
3206 prevchr = curchr;
3207 curchr = nextchr; /* use previously unget char, or -1 */
3208 nextchr = -1;
3209}
3210
3211/*
3212 * Skip a character while keeping the value of prev_at_start for at_start.
3213 * prevchr and prevprevchr are also kept.
3214 */
3215 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003216skipchr_keepstart(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003217{
3218 int as = prev_at_start;
3219 int pr = prevchr;
3220 int prpr = prevprevchr;
3221
3222 skipchr();
3223 at_start = as;
3224 prevchr = pr;
3225 prevprevchr = prpr;
3226}
3227
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003228/*
3229 * Get the next character from the pattern. We know about magic and such, so
3230 * therefore we need a lexical analyzer.
3231 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003232 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003233getchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003234{
3235 int chr = peekchr();
3236
3237 skipchr();
3238 return chr;
3239}
3240
3241/*
3242 * put character back. Works only once!
3243 */
3244 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01003245ungetchr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003246{
3247 nextchr = curchr;
3248 curchr = prevchr;
3249 prevchr = prevprevchr;
3250 at_start = prev_at_start;
3251 prev_at_start = FALSE;
3252
3253 /* Backup regparse, so that it's at the same position as before the
3254 * getchr(). */
3255 regparse -= prevchr_len;
3256}
3257
3258/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003259 * Get and return the value of the hex string at the current position.
3260 * Return -1 if there is no valid hex number.
3261 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003262 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003263 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003264 * The parameter controls the maximum number of input characters. This will be
3265 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3266 */
3267 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003268gethexchrs(int maxinputlen)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003269{
3270 int nr = 0;
3271 int c;
3272 int i;
3273
3274 for (i = 0; i < maxinputlen; ++i)
3275 {
3276 c = regparse[0];
3277 if (!vim_isxdigit(c))
3278 break;
3279 nr <<= 4;
3280 nr |= hex2nr(c);
3281 ++regparse;
3282 }
3283
3284 if (i == 0)
3285 return -1;
3286 return nr;
3287}
3288
3289/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003290 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003291 * current position. Return -1 for invalid. Consumes all digits.
3292 */
3293 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003294getdecchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003295{
3296 int nr = 0;
3297 int c;
3298 int i;
3299
3300 for (i = 0; ; ++i)
3301 {
3302 c = regparse[0];
3303 if (c < '0' || c > '9')
3304 break;
3305 nr *= 10;
3306 nr += c - '0';
3307 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003308 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003309 }
3310
3311 if (i == 0)
3312 return -1;
3313 return nr;
3314}
3315
3316/*
3317 * get and return the value of the octal string immediately after the current
3318 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3319 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3320 * treat 8 or 9 as recognised characters. Position is updated:
3321 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003322 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003323 */
3324 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003325getoctchrs(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003326{
3327 int nr = 0;
3328 int c;
3329 int i;
3330
3331 for (i = 0; i < 3 && nr < 040; ++i)
3332 {
3333 c = regparse[0];
3334 if (c < '0' || c > '7')
3335 break;
3336 nr <<= 3;
3337 nr |= hex2nr(c);
3338 ++regparse;
3339 }
3340
3341 if (i == 0)
3342 return -1;
3343 return nr;
3344}
3345
3346/*
3347 * Get a number after a backslash that is inside [].
3348 * When nothing is recognized return a backslash.
3349 */
3350 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003351coll_get_char(void)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003352{
3353 int nr = -1;
3354
3355 switch (*regparse++)
3356 {
3357 case 'd': nr = getdecchrs(); break;
3358 case 'o': nr = getoctchrs(); break;
3359 case 'x': nr = gethexchrs(2); break;
3360 case 'u': nr = gethexchrs(4); break;
3361 case 'U': nr = gethexchrs(8); break;
3362 }
3363 if (nr < 0)
3364 {
3365 /* If getting the number fails be backwards compatible: the character
3366 * is a backslash. */
3367 --regparse;
3368 nr = '\\';
3369 }
3370 return nr;
3371}
3372
3373/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003374 * read_limits - Read two integers to be taken as a minimum and maximum.
3375 * If the first character is '-', then the range is reversed.
3376 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3377 * missing, a very big number is the default.
3378 */
3379 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003380read_limits(long *minval, long *maxval)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003381{
3382 int reverse = FALSE;
3383 char_u *first_char;
3384 long tmp;
3385
3386 if (*regparse == '-')
3387 {
3388 /* Starts with '-', so reverse the range later */
3389 regparse++;
3390 reverse = TRUE;
3391 }
3392 first_char = regparse;
3393 *minval = getdigits(&regparse);
3394 if (*regparse == ',') /* There is a comma */
3395 {
3396 if (vim_isdigit(*++regparse))
3397 *maxval = getdigits(&regparse);
3398 else
3399 *maxval = MAX_LIMIT;
3400 }
3401 else if (VIM_ISDIGIT(*first_char))
3402 *maxval = *minval; /* It was \{n} or \{-n} */
3403 else
3404 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3405 if (*regparse == '\\')
3406 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003407 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003408 {
3409 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3410 reg_magic == MAGIC_ALL ? "" : "\\");
3411 EMSG_RET_FAIL(IObuff);
3412 }
3413
3414 /*
3415 * Reverse the range if there was a '-', or make sure it is in the right
3416 * order otherwise.
3417 */
3418 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3419 {
3420 tmp = *minval;
3421 *minval = *maxval;
3422 *maxval = tmp;
3423 }
3424 skipchr(); /* let's be friends with the lexer again */
3425 return OK;
3426}
3427
3428/*
3429 * vim_regexec and friends
3430 */
3431
3432/*
3433 * Global work variables for vim_regexec().
3434 */
3435
3436/* The current match-position is remembered with these variables: */
3437static linenr_T reglnum; /* line number, relative to first line */
3438static char_u *regline; /* start of current line */
3439static char_u *reginput; /* current input, points into "regline" */
3440
3441static int need_clear_subexpr; /* subexpressions still need to be
3442 * cleared */
3443#ifdef FEAT_SYN_HL
3444static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3445 * still need to be cleared */
3446#endif
3447
Bram Moolenaar071d4272004-06-13 20:20:40 +00003448/*
3449 * Structure used to save the current input state, when it needs to be
3450 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003451 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003452 */
3453typedef struct
3454{
3455 union
3456 {
3457 char_u *ptr; /* reginput pointer, for single-line regexp */
3458 lpos_T pos; /* reginput pos, for multi-line regexp */
3459 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003460 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003461} regsave_T;
3462
3463/* struct to save start/end pointer/position in for \(\) */
3464typedef struct
3465{
3466 union
3467 {
3468 char_u *ptr;
3469 lpos_T pos;
3470 } se_u;
3471} save_se_T;
3472
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003473/* used for BEHIND and NOBEHIND matching */
3474typedef struct regbehind_S
3475{
3476 regsave_T save_after;
3477 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003478 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003479 save_se_T save_start[NSUBEXP];
3480 save_se_T save_end[NSUBEXP];
3481} regbehind_T;
3482
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003483static char_u *reg_getline(linenr_T lnum);
3484static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm);
3485static long regtry(bt_regprog_T *prog, colnr_T col);
3486static void cleanup_subexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003487#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003488static void cleanup_zsubexpr(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003489#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003490static void save_subexpr(regbehind_T *bp);
3491static void restore_subexpr(regbehind_T *bp);
3492static void reg_nextline(void);
3493static void reg_save(regsave_T *save, garray_T *gap);
3494static void reg_restore(regsave_T *save, garray_T *gap);
3495static int reg_save_equal(regsave_T *save);
3496static void save_se_multi(save_se_T *savep, lpos_T *posp);
3497static void save_se_one(save_se_T *savep, char_u **pp);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003498
3499/* Save the sub-expressions before attempting a match. */
3500#define save_se(savep, posp, pp) \
3501 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3502
3503/* After a failed match restore the sub-expressions. */
3504#define restore_se(savep, posp, pp) { \
3505 if (REG_MULTI) \
3506 *(posp) = (savep)->se_u.pos; \
3507 else \
3508 *(pp) = (savep)->se_u.ptr; }
3509
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003510static int re_num_cmp(long_u val, char_u *scan);
3511static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
3512static int regmatch(char_u *prog);
3513static int regrepeat(char_u *p, long maxcount);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003514
3515#ifdef DEBUG
3516int regnarrate = 0;
3517#endif
3518
3519/*
3520 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3521 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3522 * contains '\c' or '\C' the value is overruled.
3523 */
3524static int ireg_ic;
3525
3526#ifdef FEAT_MBYTE
3527/*
3528 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3529 * in the regexp. Defaults to false, always.
3530 */
3531static int ireg_icombine;
3532#endif
3533
3534/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003535 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3536 * there is no maximum.
3537 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003538static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003539
3540/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003541 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3542 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003543 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003544 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003545static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003546static unsigned reg_tofreelen;
3547
3548/*
3549 * These variables are set when executing a regexp to speed up the execution.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003550 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003551 * done:
3552 * single-line multi-line
3553 * reg_match &regmatch_T NULL
3554 * reg_mmatch NULL &regmmatch_T
3555 * reg_startp reg_match->startp <invalid>
3556 * reg_endp reg_match->endp <invalid>
3557 * reg_startpos <invalid> reg_mmatch->startpos
3558 * reg_endpos <invalid> reg_mmatch->endpos
3559 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003560 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003561 * reg_firstlnum <invalid> first line in which to search
3562 * reg_maxline 0 last line nr
3563 * reg_line_lbr FALSE or TRUE FALSE
3564 */
3565static regmatch_T *reg_match;
3566static regmmatch_T *reg_mmatch;
3567static char_u **reg_startp = NULL;
3568static char_u **reg_endp = NULL;
3569static lpos_T *reg_startpos = NULL;
3570static lpos_T *reg_endpos = NULL;
3571static win_T *reg_win;
3572static buf_T *reg_buf;
3573static linenr_T reg_firstlnum;
3574static linenr_T reg_maxline;
3575static int reg_line_lbr; /* "\n" in string is line break */
3576
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003577/* Values for rs_state in regitem_T. */
3578typedef enum regstate_E
3579{
3580 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3581 , RS_MOPEN /* MOPEN + [0-9] */
3582 , RS_MCLOSE /* MCLOSE + [0-9] */
3583#ifdef FEAT_SYN_HL
3584 , RS_ZOPEN /* ZOPEN + [0-9] */
3585 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3586#endif
3587 , RS_BRANCH /* BRANCH */
3588 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3589 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3590 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3591 , RS_NOMATCH /* NOMATCH */
3592 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3593 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3594 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3595 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3596} regstate_T;
3597
3598/*
3599 * When there are alternatives a regstate_T is put on the regstack to remember
3600 * what we are doing.
3601 * Before it may be another type of item, depending on rs_state, to remember
3602 * more things.
3603 */
3604typedef struct regitem_S
3605{
3606 regstate_T rs_state; /* what we are doing, one of RS_ above */
3607 char_u *rs_scan; /* current node in program */
3608 union
3609 {
3610 save_se_T sesave;
3611 regsave_T regsave;
3612 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003613 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003614} regitem_T;
3615
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003616static regitem_T *regstack_push(regstate_T state, char_u *scan);
3617static void regstack_pop(char_u **scan);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003618
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003619/* used for STAR, PLUS and BRACE_SIMPLE matching */
3620typedef struct regstar_S
3621{
3622 int nextb; /* next byte */
3623 int nextb_ic; /* next byte reverse case */
3624 long count;
3625 long minval;
3626 long maxval;
3627} regstar_T;
3628
3629/* used to store input position when a BACK was encountered, so that we now if
3630 * we made any progress since the last time. */
3631typedef struct backpos_S
3632{
3633 char_u *bp_scan; /* "scan" where BACK was encountered */
3634 regsave_T bp_pos; /* last input position */
3635} backpos_T;
3636
3637/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003638 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3639 * to avoid invoking malloc() and free() often.
3640 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3641 * or regbehind_T.
3642 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003643 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003644static garray_T regstack = {0, 0, 0, 0, NULL};
3645static garray_T backpos = {0, 0, 0, 0, NULL};
3646
3647/*
3648 * Both for regstack and backpos tables we use the following strategy of
3649 * allocation (to reduce malloc/free calls):
3650 * - Initial size is fairly small.
3651 * - When needed, the tables are grown bigger (8 times at first, double after
3652 * that).
3653 * - After executing the match we free the memory only if the array has grown.
3654 * Thus the memory is kept allocated when it's at the initial size.
3655 * This makes it fast while not keeping a lot of memory allocated.
3656 * A three times speed increase was observed when using many simple patterns.
3657 */
3658#define REGSTACK_INITIAL 2048
3659#define BACKPOS_INITIAL 64
3660
3661#if defined(EXITFREE) || defined(PROTO)
3662 void
Bram Moolenaar05540972016-01-30 20:31:25 +01003663free_regexp_stuff(void)
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003664{
3665 ga_clear(&regstack);
3666 ga_clear(&backpos);
3667 vim_free(reg_tofree);
3668 vim_free(reg_prev_sub);
3669}
3670#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003671
Bram Moolenaar071d4272004-06-13 20:20:40 +00003672/*
3673 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3674 */
3675 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01003676reg_getline(linenr_T lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003677{
3678 /* when looking behind for a match/no-match lnum is negative. But we
3679 * can't go before line 1 */
3680 if (reg_firstlnum + lnum < 1)
3681 return NULL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00003682 if (lnum > reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003683 /* Must have matched the "\n" in the last line. */
3684 return (char_u *)"";
Bram Moolenaar071d4272004-06-13 20:20:40 +00003685 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3686}
3687
3688static regsave_T behind_pos;
3689
3690#ifdef FEAT_SYN_HL
3691static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3692static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3693static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3694static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3695#endif
3696
3697/* TRUE if using multi-line regexp. */
3698#define REG_MULTI (reg_match == NULL)
3699
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003700static int bt_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col, int line_lbr);
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003701
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003702
Bram Moolenaar071d4272004-06-13 20:20:40 +00003703/*
3704 * Match a regexp against a string.
3705 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3706 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003707 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003708 *
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003709 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003710 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003711 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01003712bt_regexec_nl(
3713 regmatch_T *rmp,
3714 char_u *line, /* string to match against */
3715 colnr_T col, /* column to start looking for match */
3716 int line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003717{
3718 reg_match = rmp;
3719 reg_mmatch = NULL;
3720 reg_maxline = 0;
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003721 reg_line_lbr = line_lbr;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003722 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003723 reg_win = NULL;
3724 ireg_ic = rmp->rm_ic;
3725#ifdef FEAT_MBYTE
3726 ireg_icombine = FALSE;
3727#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003728 ireg_maxcol = 0;
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003729
3730 return bt_regexec_both(line, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003731}
3732
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01003733static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003734
Bram Moolenaar071d4272004-06-13 20:20:40 +00003735/*
3736 * Match a regexp against multiple lines.
3737 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3738 * Uses curbuf for line count and 'iskeyword'.
3739 *
3740 * Return zero if there is no match. Return number of lines contained in the
3741 * match otherwise.
3742 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003743 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003744bt_regexec_multi(
3745 regmmatch_T *rmp,
3746 win_T *win, /* window in which to search or NULL */
3747 buf_T *buf, /* buffer in which to search */
3748 linenr_T lnum, /* nr of line to start looking for match */
3749 colnr_T col, /* column to start looking for match */
3750 proftime_T *tm) /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003751{
Bram Moolenaar071d4272004-06-13 20:20:40 +00003752 reg_match = NULL;
3753 reg_mmatch = rmp;
3754 reg_buf = buf;
3755 reg_win = win;
3756 reg_firstlnum = lnum;
3757 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3758 reg_line_lbr = FALSE;
3759 ireg_ic = rmp->rmm_ic;
3760#ifdef FEAT_MBYTE
3761 ireg_icombine = FALSE;
3762#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003763 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003764
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003765 return bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003766}
3767
3768/*
3769 * Match a regexp against a string ("line" points to the string) or multiple
3770 * lines ("line" is NULL, use reg_getline()).
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003771 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003772 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003773 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01003774bt_regexec_both(
3775 char_u *line,
3776 colnr_T col, /* column to start looking for match */
3777 proftime_T *tm UNUSED) /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003778{
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003779 bt_regprog_T *prog;
3780 char_u *s;
3781 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003782
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003783 /* Create "regstack" and "backpos" if they are not allocated yet.
3784 * We allocate *_INITIAL amount of bytes first and then set the grow size
3785 * to much bigger value to avoid many malloc calls in case of deep regular
3786 * expressions. */
3787 if (regstack.ga_data == NULL)
3788 {
3789 /* Use an item size of 1 byte, since we push different things
3790 * onto the regstack. */
3791 ga_init2(&regstack, 1, REGSTACK_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003792 (void)ga_grow(&regstack, REGSTACK_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003793 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3794 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003795
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003796 if (backpos.ga_data == NULL)
3797 {
3798 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
Bram Moolenaarcde88542015-08-11 19:14:00 +02003799 (void)ga_grow(&backpos, BACKPOS_INITIAL);
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003800 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3801 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003802
Bram Moolenaar071d4272004-06-13 20:20:40 +00003803 if (REG_MULTI)
3804 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003805 prog = (bt_regprog_T *)reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003806 line = reg_getline((linenr_T)0);
3807 reg_startpos = reg_mmatch->startpos;
3808 reg_endpos = reg_mmatch->endpos;
3809 }
3810 else
3811 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003812 prog = (bt_regprog_T *)reg_match->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003813 reg_startp = reg_match->startp;
3814 reg_endp = reg_match->endp;
3815 }
3816
3817 /* Be paranoid... */
3818 if (prog == NULL || line == NULL)
3819 {
3820 EMSG(_(e_null));
3821 goto theend;
3822 }
3823
3824 /* Check validity of program. */
3825 if (prog_magic_wrong())
3826 goto theend;
3827
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003828 /* If the start column is past the maximum column: no need to try. */
3829 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3830 goto theend;
3831
Bram Moolenaar071d4272004-06-13 20:20:40 +00003832 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3833 if (prog->regflags & RF_ICASE)
3834 ireg_ic = TRUE;
3835 else if (prog->regflags & RF_NOICASE)
3836 ireg_ic = FALSE;
3837
3838#ifdef FEAT_MBYTE
3839 /* If pattern contains "\Z" overrule value of ireg_icombine */
3840 if (prog->regflags & RF_ICOMBINE)
3841 ireg_icombine = TRUE;
3842#endif
3843
3844 /* If there is a "must appear" string, look for it. */
3845 if (prog->regmust != NULL)
3846 {
3847 int c;
3848
3849#ifdef FEAT_MBYTE
3850 if (has_mbyte)
3851 c = (*mb_ptr2char)(prog->regmust);
3852 else
3853#endif
3854 c = *prog->regmust;
3855 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003856
3857 /*
3858 * This is used very often, esp. for ":global". Use three versions of
3859 * the loop to avoid overhead of conditions.
3860 */
3861 if (!ireg_ic
3862#ifdef FEAT_MBYTE
3863 && !has_mbyte
3864#endif
3865 )
3866 while ((s = vim_strbyte(s, c)) != NULL)
3867 {
3868 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3869 break; /* Found it. */
3870 ++s;
3871 }
3872#ifdef FEAT_MBYTE
3873 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3874 while ((s = vim_strchr(s, c)) != NULL)
3875 {
3876 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3877 break; /* Found it. */
3878 mb_ptr_adv(s);
3879 }
3880#endif
3881 else
3882 while ((s = cstrchr(s, c)) != NULL)
3883 {
3884 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3885 break; /* Found it. */
3886 mb_ptr_adv(s);
3887 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003888 if (s == NULL) /* Not present. */
3889 goto theend;
3890 }
3891
3892 regline = line;
3893 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003894 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003895
3896 /* Simplest case: Anchored match need be tried only once. */
3897 if (prog->reganch)
3898 {
3899 int c;
3900
3901#ifdef FEAT_MBYTE
3902 if (has_mbyte)
3903 c = (*mb_ptr2char)(regline + col);
3904 else
3905#endif
3906 c = regline[col];
3907 if (prog->regstart == NUL
3908 || prog->regstart == c
3909 || (ireg_ic && ((
3910#ifdef FEAT_MBYTE
3911 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3912 || (c < 255 && prog->regstart < 255 &&
3913#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003914 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003915 retval = regtry(prog, col);
3916 else
3917 retval = 0;
3918 }
3919 else
3920 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003921#ifdef FEAT_RELTIME
3922 int tm_count = 0;
3923#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003924 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003925 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003926 {
3927 if (prog->regstart != NUL)
3928 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003929 /* Skip until the char we know it must start with.
3930 * Used often, do some work to avoid call overhead. */
3931 if (!ireg_ic
3932#ifdef FEAT_MBYTE
3933 && !has_mbyte
3934#endif
3935 )
3936 s = vim_strbyte(regline + col, prog->regstart);
3937 else
3938 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003939 if (s == NULL)
3940 {
3941 retval = 0;
3942 break;
3943 }
3944 col = (int)(s - regline);
3945 }
3946
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003947 /* Check for maximum column to try. */
3948 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3949 {
3950 retval = 0;
3951 break;
3952 }
3953
Bram Moolenaar071d4272004-06-13 20:20:40 +00003954 retval = regtry(prog, col);
3955 if (retval > 0)
3956 break;
3957
3958 /* if not currently on the first line, get it again */
3959 if (reglnum != 0)
3960 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003961 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003962 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003963 }
3964 if (regline[col] == NUL)
3965 break;
3966#ifdef FEAT_MBYTE
3967 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003968 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003969 else
3970#endif
3971 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003972#ifdef FEAT_RELTIME
3973 /* Check for timeout once in a twenty times to avoid overhead. */
3974 if (tm != NULL && ++tm_count == 20)
3975 {
3976 tm_count = 0;
3977 if (profile_passed_limit(tm))
3978 break;
3979 }
3980#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003981 }
3982 }
3983
Bram Moolenaar071d4272004-06-13 20:20:40 +00003984theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003985 /* Free "reg_tofree" when it's a bit big.
3986 * Free regstack and backpos if they are bigger than their initial size. */
3987 if (reg_tofreelen > 400)
3988 {
3989 vim_free(reg_tofree);
3990 reg_tofree = NULL;
3991 }
3992 if (regstack.ga_maxlen > REGSTACK_INITIAL)
3993 ga_clear(&regstack);
3994 if (backpos.ga_maxlen > BACKPOS_INITIAL)
3995 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003996
Bram Moolenaar071d4272004-06-13 20:20:40 +00003997 return retval;
3998}
3999
4000#ifdef FEAT_SYN_HL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004001static reg_extmatch_T *make_extmatch(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004002
4003/*
4004 * Create a new extmatch and mark it as referenced once.
4005 */
4006 static reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01004007make_extmatch(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004008{
4009 reg_extmatch_T *em;
4010
4011 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4012 if (em != NULL)
4013 em->refcnt = 1;
4014 return em;
4015}
4016
4017/*
4018 * Add a reference to an extmatch.
4019 */
4020 reg_extmatch_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01004021ref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004022{
4023 if (em != NULL)
4024 em->refcnt++;
4025 return em;
4026}
4027
4028/*
4029 * Remove a reference to an extmatch. If there are no references left, free
4030 * the info.
4031 */
4032 void
Bram Moolenaar05540972016-01-30 20:31:25 +01004033unref_extmatch(reg_extmatch_T *em)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004034{
4035 int i;
4036
4037 if (em != NULL && --em->refcnt <= 0)
4038 {
4039 for (i = 0; i < NSUBEXP; ++i)
4040 vim_free(em->matches[i]);
4041 vim_free(em);
4042 }
4043}
4044#endif
4045
4046/*
4047 * regtry - try match of "prog" with at regline["col"].
4048 * Returns 0 for failure, number of lines contained in the match otherwise.
4049 */
4050 static long
Bram Moolenaar05540972016-01-30 20:31:25 +01004051regtry(bt_regprog_T *prog, colnr_T col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004052{
4053 reginput = regline + col;
4054 need_clear_subexpr = TRUE;
4055#ifdef FEAT_SYN_HL
4056 /* Clear the external match subpointers if necessary. */
4057 if (prog->reghasz == REX_SET)
4058 need_clear_zsubexpr = TRUE;
4059#endif
4060
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004061 if (regmatch(prog->program + 1) == 0)
4062 return 0;
4063
4064 cleanup_subexpr();
4065 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004066 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004067 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004068 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004069 reg_startpos[0].lnum = 0;
4070 reg_startpos[0].col = col;
4071 }
4072 if (reg_endpos[0].lnum < 0)
4073 {
4074 reg_endpos[0].lnum = reglnum;
4075 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004076 }
4077 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004078 /* Use line number of "\ze". */
4079 reglnum = reg_endpos[0].lnum;
4080 }
4081 else
4082 {
4083 if (reg_startp[0] == NULL)
4084 reg_startp[0] = regline + col;
4085 if (reg_endp[0] == NULL)
4086 reg_endp[0] = reginput;
4087 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004088#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004089 /* Package any found \z(...\) matches for export. Default is none. */
4090 unref_extmatch(re_extmatch_out);
4091 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004092
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004093 if (prog->reghasz == REX_SET)
4094 {
4095 int i;
4096
4097 cleanup_zsubexpr();
4098 re_extmatch_out = make_extmatch();
4099 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004100 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004101 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004102 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004103 /* Only accept single line matches. */
4104 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004105 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4106 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004107 re_extmatch_out->matches[i] =
4108 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004109 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004110 reg_endzpos[i].col - reg_startzpos[i].col);
4111 }
4112 else
4113 {
4114 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4115 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004116 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004117 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004118 }
4119 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004120 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004121#endif
4122 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004123}
4124
4125#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004126static int reg_prev_class(void);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004127
Bram Moolenaar071d4272004-06-13 20:20:40 +00004128/*
4129 * Get class of previous character.
4130 */
4131 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004132reg_prev_class(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004133{
4134 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004135 return mb_get_class_buf(reginput - 1
4136 - (*mb_head_off)(regline, reginput - 1), reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004137 return -1;
4138}
Bram Moolenaar071d4272004-06-13 20:20:40 +00004139#endif
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004140
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01004141static int reg_match_visual(void);
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004142
4143/*
4144 * Return TRUE if the current reginput position matches the Visual area.
4145 */
4146 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004147reg_match_visual(void)
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004148{
4149 pos_T top, bot;
4150 linenr_T lnum;
4151 colnr_T col;
4152 win_T *wp = reg_win == NULL ? curwin : reg_win;
4153 int mode;
4154 colnr_T start, end;
4155 colnr_T start2, end2;
4156 colnr_T cols;
4157
4158 /* Check if the buffer is the current buffer. */
4159 if (reg_buf != curbuf || VIsual.lnum == 0)
4160 return FALSE;
4161
4162 if (VIsual_active)
4163 {
4164 if (lt(VIsual, wp->w_cursor))
4165 {
4166 top = VIsual;
4167 bot = wp->w_cursor;
4168 }
4169 else
4170 {
4171 top = wp->w_cursor;
4172 bot = VIsual;
4173 }
4174 mode = VIsual_mode;
4175 }
4176 else
4177 {
4178 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
4179 {
4180 top = curbuf->b_visual.vi_start;
4181 bot = curbuf->b_visual.vi_end;
4182 }
4183 else
4184 {
4185 top = curbuf->b_visual.vi_end;
4186 bot = curbuf->b_visual.vi_start;
4187 }
4188 mode = curbuf->b_visual.vi_mode;
4189 }
4190 lnum = reglnum + reg_firstlnum;
4191 if (lnum < top.lnum || lnum > bot.lnum)
4192 return FALSE;
4193
4194 if (mode == 'v')
4195 {
4196 col = (colnr_T)(reginput - regline);
4197 if ((lnum == top.lnum && col < top.col)
4198 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4199 return FALSE;
4200 }
4201 else if (mode == Ctrl_V)
4202 {
4203 getvvcol(wp, &top, &start, NULL, &end);
4204 getvvcol(wp, &bot, &start2, NULL, &end2);
4205 if (start2 < start)
4206 start = start2;
4207 if (end2 > end)
4208 end = end2;
4209 if (top.col == MAXCOL || bot.col == MAXCOL)
4210 end = MAXCOL;
4211 cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline));
4212 if (cols < start || cols > end - (*p_sel == 'e'))
4213 return FALSE;
4214 }
4215 return TRUE;
4216}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004217
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00004218#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004219
4220/*
4221 * The arguments from BRACE_LIMITS are stored here. They are actually local
4222 * to regmatch(), but they are here to reduce the amount of stack space used
4223 * (it can be called recursively many times).
4224 */
4225static long bl_minval;
4226static long bl_maxval;
4227
4228/*
4229 * regmatch - main matching routine
4230 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004231 * Conceptually the strategy is simple: Check to see whether the current node
4232 * matches, push an item onto the regstack and loop to see whether the rest
4233 * matches, and then act accordingly. In practice we make some effort to
4234 * avoid using the regstack, in particular by going through "ordinary" nodes
4235 * (that don't need to know whether the rest of the match failed) by a nested
4236 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004237 *
4238 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4239 * the last matched character.
4240 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4241 * undefined state!
4242 */
4243 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01004244regmatch(
4245 char_u *scan) /* Current node. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004246{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004247 char_u *next; /* Next node. */
4248 int op;
4249 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004250 regitem_T *rp;
4251 int no;
4252 int status; /* one of the RA_ values: */
4253#define RA_FAIL 1 /* something failed, abort */
4254#define RA_CONT 2 /* continue in inner loop */
4255#define RA_BREAK 3 /* break inner loop */
4256#define RA_MATCH 4 /* successful match */
4257#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004258
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004259 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004260 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004261 regstack.ga_len = 0;
4262 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004263
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004264 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004265 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004266 */
4267 for (;;)
4268 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004269 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4270 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004271 fast_breakcheck();
4272
4273#ifdef DEBUG
4274 if (scan != NULL && regnarrate)
4275 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004276 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004277 mch_errmsg("(\n");
4278 }
4279#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004280
4281 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004282 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004283 * regstack.
4284 */
4285 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004286 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004287 if (got_int || scan == NULL)
4288 {
4289 status = RA_FAIL;
4290 break;
4291 }
4292 status = RA_CONT;
4293
Bram Moolenaar071d4272004-06-13 20:20:40 +00004294#ifdef DEBUG
4295 if (regnarrate)
4296 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004297 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004298 mch_errmsg("...\n");
4299# ifdef FEAT_SYN_HL
4300 if (re_extmatch_in != NULL)
4301 {
4302 int i;
4303
4304 mch_errmsg(_("External submatches:\n"));
4305 for (i = 0; i < NSUBEXP; i++)
4306 {
4307 mch_errmsg(" \"");
4308 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004309 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004310 mch_errmsg("\"\n");
4311 }
4312 }
4313# endif
4314 }
4315#endif
4316 next = regnext(scan);
4317
4318 op = OP(scan);
4319 /* Check for character class with NL added. */
Bram Moolenaar640009d2006-10-17 16:48:26 +00004320 if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
4321 && *reginput == NUL && reglnum <= reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004322 {
4323 reg_nextline();
4324 }
4325 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
4326 {
4327 ADVANCE_REGINPUT();
4328 }
4329 else
4330 {
4331 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004332 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004333#ifdef FEAT_MBYTE
4334 if (has_mbyte)
4335 c = (*mb_ptr2char)(reginput);
4336 else
4337#endif
4338 c = *reginput;
4339 switch (op)
4340 {
4341 case BOL:
4342 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004343 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004344 break;
4345
4346 case EOL:
4347 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004348 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004349 break;
4350
4351 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004352 /* We're not at the beginning of the file when below the first
4353 * line where we started, not at the start of the line or we
4354 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004355 if (reglnum != 0 || reginput != regline
Bram Moolenaara7139332007-12-09 18:26:22 +00004356 || (REG_MULTI && reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004357 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004358 break;
4359
4360 case RE_EOF:
4361 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004362 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004363 break;
4364
4365 case CURSOR:
4366 /* Check if the buffer is in a window and compare the
4367 * reg_win->w_cursor position to the match position. */
4368 if (reg_win == NULL
4369 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
4370 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004371 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004372 break;
4373
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004374 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004375 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004376 {
4377 int mark = OPERAND(scan)[0];
4378 int cmp = OPERAND(scan)[1];
4379 pos_T *pos;
4380
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004381 pos = getmark_buf(reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004382 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004383 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004384 || (pos->lnum == reglnum + reg_firstlnum
4385 ? (pos->col == (colnr_T)(reginput - regline)
4386 ? (cmp == '<' || cmp == '>')
4387 : (pos->col < (colnr_T)(reginput - regline)
4388 ? cmp != '>'
4389 : cmp != '<'))
4390 : (pos->lnum < reglnum + reg_firstlnum
4391 ? cmp != '>'
4392 : cmp != '<')))
4393 status = RA_NOMATCH;
4394 }
4395 break;
4396
4397 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004398 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004399 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004400 break;
4401
Bram Moolenaar071d4272004-06-13 20:20:40 +00004402 case RE_LNUM:
4403 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4404 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004405 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004406 break;
4407
4408 case RE_COL:
4409 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004410 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004411 break;
4412
4413 case RE_VCOL:
4414 if (!re_num_cmp((long_u)win_linetabsize(
4415 reg_win == NULL ? curwin : reg_win,
4416 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004417 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004418 break;
4419
4420 case BOW: /* \<word; reginput points to w */
4421 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004422 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004423#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004424 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004425 {
4426 int this_class;
4427
4428 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004429 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004430 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004431 status = RA_NOMATCH; /* not on a word at all */
4432 else if (reg_prev_class() == this_class)
4433 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004434 }
4435#endif
4436 else
4437 {
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01004438 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline
4439 && vim_iswordc_buf(reginput[-1], reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004440 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004441 }
4442 break;
4443
4444 case EOW: /* word\>; reginput points after d */
4445 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004446 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004447#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004448 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004449 {
4450 int this_class, prev_class;
4451
4452 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004453 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004454 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004455 if (this_class == prev_class
4456 || prev_class == 0 || prev_class == 1)
4457 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004458 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004459#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004460 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004461 {
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004462 if (!vim_iswordc_buf(reginput[-1], reg_buf)
4463 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004464 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004465 }
4466 break; /* Matched with EOW */
4467
4468 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004469 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004470 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004471 status = RA_NOMATCH;
4472 else
4473 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004474 break;
4475
4476 case IDENT:
4477 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004478 status = RA_NOMATCH;
4479 else
4480 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004481 break;
4482
4483 case SIDENT:
4484 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004485 status = RA_NOMATCH;
4486 else
4487 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004488 break;
4489
4490 case KWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004491 if (!vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004492 status = RA_NOMATCH;
4493 else
4494 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004495 break;
4496
4497 case SKWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004498 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004499 status = RA_NOMATCH;
4500 else
4501 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004502 break;
4503
4504 case FNAME:
4505 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004506 status = RA_NOMATCH;
4507 else
4508 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004509 break;
4510
4511 case SFNAME:
4512 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004513 status = RA_NOMATCH;
4514 else
4515 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004516 break;
4517
4518 case PRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004519 if (!vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004520 status = RA_NOMATCH;
4521 else
4522 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004523 break;
4524
4525 case SPRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004526 if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004527 status = RA_NOMATCH;
4528 else
4529 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004530 break;
4531
4532 case WHITE:
4533 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004534 status = RA_NOMATCH;
4535 else
4536 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004537 break;
4538
4539 case NWHITE:
4540 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004541 status = RA_NOMATCH;
4542 else
4543 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004544 break;
4545
4546 case DIGIT:
4547 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004548 status = RA_NOMATCH;
4549 else
4550 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004551 break;
4552
4553 case NDIGIT:
4554 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004555 status = RA_NOMATCH;
4556 else
4557 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004558 break;
4559
4560 case HEX:
4561 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004562 status = RA_NOMATCH;
4563 else
4564 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004565 break;
4566
4567 case NHEX:
4568 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004569 status = RA_NOMATCH;
4570 else
4571 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004572 break;
4573
4574 case OCTAL:
4575 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004576 status = RA_NOMATCH;
4577 else
4578 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004579 break;
4580
4581 case NOCTAL:
4582 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004583 status = RA_NOMATCH;
4584 else
4585 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004586 break;
4587
4588 case WORD:
4589 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004590 status = RA_NOMATCH;
4591 else
4592 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004593 break;
4594
4595 case NWORD:
4596 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004597 status = RA_NOMATCH;
4598 else
4599 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004600 break;
4601
4602 case HEAD:
4603 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004604 status = RA_NOMATCH;
4605 else
4606 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004607 break;
4608
4609 case NHEAD:
4610 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004611 status = RA_NOMATCH;
4612 else
4613 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004614 break;
4615
4616 case ALPHA:
4617 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004618 status = RA_NOMATCH;
4619 else
4620 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004621 break;
4622
4623 case NALPHA:
4624 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004625 status = RA_NOMATCH;
4626 else
4627 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004628 break;
4629
4630 case LOWER:
4631 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004632 status = RA_NOMATCH;
4633 else
4634 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004635 break;
4636
4637 case NLOWER:
4638 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004639 status = RA_NOMATCH;
4640 else
4641 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004642 break;
4643
4644 case UPPER:
4645 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004646 status = RA_NOMATCH;
4647 else
4648 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004649 break;
4650
4651 case NUPPER:
4652 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004653 status = RA_NOMATCH;
4654 else
4655 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004656 break;
4657
4658 case EXACTLY:
4659 {
4660 int len;
4661 char_u *opnd;
4662
4663 opnd = OPERAND(scan);
4664 /* Inline the first byte, for speed. */
4665 if (*opnd != *reginput
4666 && (!ireg_ic || (
4667#ifdef FEAT_MBYTE
4668 !enc_utf8 &&
4669#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004670 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004671 status = RA_NOMATCH;
4672 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004673 {
4674 /* match empty string always works; happens when "~" is
4675 * empty. */
4676 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004677 else
4678 {
4679 if (opnd[1] == NUL
Bram Moolenaar071d4272004-06-13 20:20:40 +00004680#ifdef FEAT_MBYTE
4681 && !(enc_utf8 && ireg_ic)
4682#endif
4683 )
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004684 {
4685 len = 1; /* matched a single byte above */
4686 }
4687 else
4688 {
4689 /* Need to match first byte again for multi-byte. */
4690 len = (int)STRLEN(opnd);
4691 if (cstrncmp(opnd, reginput, &len) != 0)
4692 status = RA_NOMATCH;
4693 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004694#ifdef FEAT_MBYTE
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004695 /* Check for following composing character, unless %C
4696 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004697 if (status != RA_NOMATCH
4698 && enc_utf8
4699 && UTF_COMPOSINGLIKE(reginput, reginput + len)
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004700 && !ireg_icombine
4701 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004702 {
4703 /* raaron: This code makes a composing character get
4704 * ignored, which is the correct behavior (sometimes)
4705 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004706 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004707 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004708#endif
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004709 if (status != RA_NOMATCH)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004710 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004711 }
4712 }
4713 break;
4714
4715 case ANYOF:
4716 case ANYBUT:
4717 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004718 status = RA_NOMATCH;
4719 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4720 status = RA_NOMATCH;
4721 else
4722 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004723 break;
4724
4725#ifdef FEAT_MBYTE
4726 case MULTIBYTECODE:
4727 if (has_mbyte)
4728 {
4729 int i, len;
4730 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004731 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004732
4733 opnd = OPERAND(scan);
4734 /* Safety check (just in case 'encoding' was changed since
4735 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004736 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004737 {
4738 status = RA_NOMATCH;
4739 break;
4740 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004741 if (enc_utf8)
4742 opndc = mb_ptr2char(opnd);
4743 if (enc_utf8 && utf_iscomposing(opndc))
4744 {
4745 /* When only a composing char is given match at any
4746 * position where that composing char appears. */
4747 status = RA_NOMATCH;
Bram Moolenaar0e462412015-03-31 14:17:31 +02004748 for (i = 0; reginput[i] != NUL;
4749 i += utf_ptr2len(reginput + i))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004750 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004751 inpc = mb_ptr2char(reginput + i);
4752 if (!utf_iscomposing(inpc))
4753 {
4754 if (i > 0)
4755 break;
4756 }
4757 else if (opndc == inpc)
4758 {
4759 /* Include all following composing chars. */
4760 len = i + mb_ptr2len(reginput + i);
4761 status = RA_MATCH;
4762 break;
4763 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004764 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004765 }
4766 else
4767 for (i = 0; i < len; ++i)
4768 if (opnd[i] != reginput[i])
4769 {
4770 status = RA_NOMATCH;
4771 break;
4772 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004773 reginput += len;
4774 }
4775 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004776 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004777 break;
4778#endif
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004779 case RE_COMPOSING:
4780#ifdef FEAT_MBYTE
4781 if (enc_utf8)
4782 {
4783 /* Skip composing characters. */
4784 while (utf_iscomposing(utf_ptr2char(reginput)))
4785 mb_cptr_adv(reginput);
4786 }
4787#endif
4788 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004789
4790 case NOTHING:
4791 break;
4792
4793 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004794 {
4795 int i;
4796 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004797
Bram Moolenaar582fd852005-03-28 20:58:01 +00004798 /*
4799 * When we run into BACK we need to check if we don't keep
4800 * looping without matching any input. The second and later
4801 * times a BACK is encountered it fails if the input is still
4802 * at the same position as the previous time.
4803 * The positions are stored in "backpos" and found by the
4804 * current value of "scan", the position in the RE program.
4805 */
4806 bp = (backpos_T *)backpos.ga_data;
4807 for (i = 0; i < backpos.ga_len; ++i)
4808 if (bp[i].bp_scan == scan)
4809 break;
4810 if (i == backpos.ga_len)
4811 {
4812 /* First time at this BACK, make room to store the pos. */
4813 if (ga_grow(&backpos, 1) == FAIL)
4814 status = RA_FAIL;
4815 else
4816 {
4817 /* get "ga_data" again, it may have changed */
4818 bp = (backpos_T *)backpos.ga_data;
4819 bp[i].bp_scan = scan;
4820 ++backpos.ga_len;
4821 }
4822 }
4823 else if (reg_save_equal(&bp[i].bp_pos))
4824 /* Still at same position as last time, fail. */
4825 status = RA_NOMATCH;
4826
4827 if (status != RA_FAIL && status != RA_NOMATCH)
4828 reg_save(&bp[i].bp_pos, &backpos);
4829 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004830 break;
4831
Bram Moolenaar071d4272004-06-13 20:20:40 +00004832 case MOPEN + 0: /* Match start: \zs */
4833 case MOPEN + 1: /* \( */
4834 case MOPEN + 2:
4835 case MOPEN + 3:
4836 case MOPEN + 4:
4837 case MOPEN + 5:
4838 case MOPEN + 6:
4839 case MOPEN + 7:
4840 case MOPEN + 8:
4841 case MOPEN + 9:
4842 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004843 no = op - MOPEN;
4844 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004845 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004846 if (rp == NULL)
4847 status = RA_FAIL;
4848 else
4849 {
4850 rp->rs_no = no;
4851 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4852 &reg_startp[no]);
4853 /* We simply continue and handle the result when done. */
4854 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004855 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004856 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004857
4858 case NOPEN: /* \%( */
4859 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004860 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004861 status = RA_FAIL;
4862 /* We simply continue and handle the result when done. */
4863 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004864
4865#ifdef FEAT_SYN_HL
4866 case ZOPEN + 1:
4867 case ZOPEN + 2:
4868 case ZOPEN + 3:
4869 case ZOPEN + 4:
4870 case ZOPEN + 5:
4871 case ZOPEN + 6:
4872 case ZOPEN + 7:
4873 case ZOPEN + 8:
4874 case ZOPEN + 9:
4875 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004876 no = op - ZOPEN;
4877 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004878 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004879 if (rp == NULL)
4880 status = RA_FAIL;
4881 else
4882 {
4883 rp->rs_no = no;
4884 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4885 &reg_startzp[no]);
4886 /* We simply continue and handle the result when done. */
4887 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004888 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004889 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004890#endif
4891
4892 case MCLOSE + 0: /* Match end: \ze */
4893 case MCLOSE + 1: /* \) */
4894 case MCLOSE + 2:
4895 case MCLOSE + 3:
4896 case MCLOSE + 4:
4897 case MCLOSE + 5:
4898 case MCLOSE + 6:
4899 case MCLOSE + 7:
4900 case MCLOSE + 8:
4901 case MCLOSE + 9:
4902 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004903 no = op - MCLOSE;
4904 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004905 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004906 if (rp == NULL)
4907 status = RA_FAIL;
4908 else
4909 {
4910 rp->rs_no = no;
4911 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4912 /* We simply continue and handle the result when done. */
4913 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004914 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004915 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004916
4917#ifdef FEAT_SYN_HL
4918 case ZCLOSE + 1: /* \) after \z( */
4919 case ZCLOSE + 2:
4920 case ZCLOSE + 3:
4921 case ZCLOSE + 4:
4922 case ZCLOSE + 5:
4923 case ZCLOSE + 6:
4924 case ZCLOSE + 7:
4925 case ZCLOSE + 8:
4926 case ZCLOSE + 9:
4927 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004928 no = op - ZCLOSE;
4929 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004930 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004931 if (rp == NULL)
4932 status = RA_FAIL;
4933 else
4934 {
4935 rp->rs_no = no;
4936 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4937 &reg_endzp[no]);
4938 /* We simply continue and handle the result when done. */
4939 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004940 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004941 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004942#endif
4943
4944 case BACKREF + 1:
4945 case BACKREF + 2:
4946 case BACKREF + 3:
4947 case BACKREF + 4:
4948 case BACKREF + 5:
4949 case BACKREF + 6:
4950 case BACKREF + 7:
4951 case BACKREF + 8:
4952 case BACKREF + 9:
4953 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004954 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004955
4956 no = op - BACKREF;
4957 cleanup_subexpr();
4958 if (!REG_MULTI) /* Single-line regexp */
4959 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004960 if (reg_startp[no] == NULL || reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004961 {
4962 /* Backref was not set: Match an empty string. */
4963 len = 0;
4964 }
4965 else
4966 {
4967 /* Compare current input with back-ref in the same
4968 * line. */
4969 len = (int)(reg_endp[no] - reg_startp[no]);
4970 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004971 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004972 }
4973 }
4974 else /* Multi-line regexp */
4975 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004976 if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004977 {
4978 /* Backref was not set: Match an empty string. */
4979 len = 0;
4980 }
4981 else
4982 {
4983 if (reg_startpos[no].lnum == reglnum
4984 && reg_endpos[no].lnum == reglnum)
4985 {
4986 /* Compare back-ref within the current line. */
4987 len = reg_endpos[no].col - reg_startpos[no].col;
4988 if (cstrncmp(regline + reg_startpos[no].col,
4989 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004990 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004991 }
4992 else
4993 {
4994 /* Messy situation: Need to compare between two
4995 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02004996 int r = match_with_backref(
Bram Moolenaar580abea2013-06-14 20:31:28 +02004997 reg_startpos[no].lnum,
4998 reg_startpos[no].col,
4999 reg_endpos[no].lnum,
5000 reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02005001 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005002
5003 if (r != RA_MATCH)
5004 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005005 }
5006 }
5007 }
5008
5009 /* Matched the backref, skip over it. */
5010 reginput += len;
5011 }
5012 break;
5013
5014#ifdef FEAT_SYN_HL
5015 case ZREF + 1:
5016 case ZREF + 2:
5017 case ZREF + 3:
5018 case ZREF + 4:
5019 case ZREF + 5:
5020 case ZREF + 6:
5021 case ZREF + 7:
5022 case ZREF + 8:
5023 case ZREF + 9:
5024 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005025 int len;
5026
5027 cleanup_zsubexpr();
5028 no = op - ZREF;
5029 if (re_extmatch_in != NULL
5030 && re_extmatch_in->matches[no] != NULL)
5031 {
5032 len = (int)STRLEN(re_extmatch_in->matches[no]);
5033 if (cstrncmp(re_extmatch_in->matches[no],
5034 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005035 status = RA_NOMATCH;
5036 else
5037 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005038 }
5039 else
5040 {
5041 /* Backref was not set: Match an empty string. */
5042 }
5043 }
5044 break;
5045#endif
5046
5047 case BRANCH:
5048 {
5049 if (OP(next) != BRANCH) /* No choice. */
5050 next = OPERAND(scan); /* Avoid recursion. */
5051 else
5052 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005053 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005054 if (rp == NULL)
5055 status = RA_FAIL;
5056 else
5057 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005058 }
5059 }
5060 break;
5061
5062 case BRACE_LIMITS:
5063 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005064 if (OP(next) == BRACE_SIMPLE)
5065 {
5066 bl_minval = OPERAND_MIN(scan);
5067 bl_maxval = OPERAND_MAX(scan);
5068 }
5069 else if (OP(next) >= BRACE_COMPLEX
5070 && OP(next) < BRACE_COMPLEX + 10)
5071 {
5072 no = OP(next) - BRACE_COMPLEX;
5073 brace_min[no] = OPERAND_MIN(scan);
5074 brace_max[no] = OPERAND_MAX(scan);
5075 brace_count[no] = 0;
5076 }
5077 else
5078 {
5079 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005080 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005081 }
5082 }
5083 break;
5084
5085 case BRACE_COMPLEX + 0:
5086 case BRACE_COMPLEX + 1:
5087 case BRACE_COMPLEX + 2:
5088 case BRACE_COMPLEX + 3:
5089 case BRACE_COMPLEX + 4:
5090 case BRACE_COMPLEX + 5:
5091 case BRACE_COMPLEX + 6:
5092 case BRACE_COMPLEX + 7:
5093 case BRACE_COMPLEX + 8:
5094 case BRACE_COMPLEX + 9:
5095 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005096 no = op - BRACE_COMPLEX;
5097 ++brace_count[no];
5098
5099 /* If not matched enough times yet, try one more */
5100 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005101 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005102 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005103 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005104 if (rp == NULL)
5105 status = RA_FAIL;
5106 else
5107 {
5108 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005109 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005110 next = OPERAND(scan);
5111 /* We continue and handle the result when done. */
5112 }
5113 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005114 }
5115
5116 /* If matched enough times, may try matching some more */
5117 if (brace_min[no] <= brace_max[no])
5118 {
5119 /* Range is the normal way around, use longest match */
5120 if (brace_count[no] <= brace_max[no])
5121 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005122 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005123 if (rp == NULL)
5124 status = RA_FAIL;
5125 else
5126 {
5127 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005128 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005129 next = OPERAND(scan);
5130 /* We continue and handle the result when done. */
5131 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005132 }
5133 }
5134 else
5135 {
5136 /* Range is backwards, use shortest match first */
5137 if (brace_count[no] <= brace_min[no])
5138 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005139 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005140 if (rp == NULL)
5141 status = RA_FAIL;
5142 else
5143 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005144 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005145 /* We continue and handle the result when done. */
5146 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005147 }
5148 }
5149 }
5150 break;
5151
5152 case BRACE_SIMPLE:
5153 case STAR:
5154 case PLUS:
5155 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005156 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005157
5158 /*
5159 * Lookahead to avoid useless match attempts when we know
5160 * what character comes next.
5161 */
5162 if (OP(next) == EXACTLY)
5163 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005164 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005165 if (ireg_ic)
5166 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005167 if (MB_ISUPPER(rst.nextb))
5168 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005169 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005170 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005171 }
5172 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005173 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005174 }
5175 else
5176 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005177 rst.nextb = NUL;
5178 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005179 }
5180 if (op != BRACE_SIMPLE)
5181 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005182 rst.minval = (op == STAR) ? 0 : 1;
5183 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005184 }
5185 else
5186 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005187 rst.minval = bl_minval;
5188 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005189 }
5190
5191 /*
5192 * When maxval > minval, try matching as much as possible, up
5193 * to maxval. When maxval < minval, try matching at least the
5194 * minimal number (since the range is backwards, that's also
5195 * maxval!).
5196 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005197 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005198 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005199 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005200 status = RA_FAIL;
5201 break;
5202 }
5203 if (rst.minval <= rst.maxval
5204 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5205 {
5206 /* It could match. Prepare for trying to match what
5207 * follows. The code is below. Parameters are stored in
5208 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005209 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005210 {
5211 EMSG(_(e_maxmempat));
5212 status = RA_FAIL;
5213 }
5214 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005215 status = RA_FAIL;
5216 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005217 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005218 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005219 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005220 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005221 if (rp == NULL)
5222 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005223 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005224 {
5225 *(((regstar_T *)rp) - 1) = rst;
5226 status = RA_BREAK; /* skip the restore bits */
5227 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005228 }
5229 }
5230 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005231 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005232
Bram Moolenaar071d4272004-06-13 20:20:40 +00005233 }
5234 break;
5235
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005236 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005237 case MATCH:
5238 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005239 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005240 if (rp == NULL)
5241 status = RA_FAIL;
5242 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005243 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005244 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005245 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005246 next = OPERAND(scan);
5247 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005248 }
5249 break;
5250
5251 case BEHIND:
5252 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005253 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005254 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005255 {
5256 EMSG(_(e_maxmempat));
5257 status = RA_FAIL;
5258 }
5259 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005260 status = RA_FAIL;
5261 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005262 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005263 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005264 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005265 if (rp == NULL)
5266 status = RA_FAIL;
5267 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005268 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005269 /* Need to save the subexpr to be able to restore them
5270 * when there is a match but we don't use it. */
5271 save_subexpr(((regbehind_T *)rp) - 1);
5272
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005273 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005274 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005275 /* First try if what follows matches. If it does then we
5276 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005277 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005278 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005279 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005280
5281 case BHPOS:
5282 if (REG_MULTI)
5283 {
5284 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5285 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005286 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005287 }
5288 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005289 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005290 break;
5291
5292 case NEWL:
Bram Moolenaar640009d2006-10-17 16:48:26 +00005293 if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
5294 || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005295 status = RA_NOMATCH;
5296 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005297 ADVANCE_REGINPUT();
5298 else
5299 reg_nextline();
5300 break;
5301
5302 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005303 status = RA_MATCH; /* Success! */
5304 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005305
5306 default:
5307 EMSG(_(e_re_corr));
5308#ifdef DEBUG
5309 printf("Illegal op code %d\n", op);
5310#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005311 status = RA_FAIL;
5312 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005313 }
5314 }
5315
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005316 /* If we can't continue sequentially, break the inner loop. */
5317 if (status != RA_CONT)
5318 break;
5319
5320 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005321 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005322
5323 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005324
5325 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005326 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005327 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005328 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005329 while (regstack.ga_len > 0 && status != RA_FAIL)
5330 {
5331 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5332 switch (rp->rs_state)
5333 {
5334 case RS_NOPEN:
5335 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005336 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005337 break;
5338
5339 case RS_MOPEN:
5340 /* Pop the state. Restore pointers when there is no match. */
5341 if (status == RA_NOMATCH)
5342 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5343 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005344 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005345 break;
5346
5347#ifdef FEAT_SYN_HL
5348 case RS_ZOPEN:
5349 /* Pop the state. Restore pointers when there is no match. */
5350 if (status == RA_NOMATCH)
5351 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5352 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005353 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005354 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005355#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005356
5357 case RS_MCLOSE:
5358 /* Pop the state. Restore pointers when there is no match. */
5359 if (status == RA_NOMATCH)
5360 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5361 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005362 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005363 break;
5364
5365#ifdef FEAT_SYN_HL
5366 case RS_ZCLOSE:
5367 /* Pop the state. Restore pointers when there is no match. */
5368 if (status == RA_NOMATCH)
5369 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5370 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005371 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005372 break;
5373#endif
5374
5375 case RS_BRANCH:
5376 if (status == RA_MATCH)
5377 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005378 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005379 else
5380 {
5381 if (status != RA_BREAK)
5382 {
5383 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005384 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005385 scan = rp->rs_scan;
5386 }
5387 if (scan == NULL || OP(scan) != BRANCH)
5388 {
5389 /* no more branches, didn't find a match */
5390 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005391 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005392 }
5393 else
5394 {
5395 /* Prepare to try a branch. */
5396 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005397 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005398 scan = OPERAND(scan);
5399 }
5400 }
5401 break;
5402
5403 case RS_BRCPLX_MORE:
5404 /* Pop the state. Restore pointers when there is no match. */
5405 if (status == RA_NOMATCH)
5406 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005407 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005408 --brace_count[rp->rs_no]; /* decrement match count */
5409 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005410 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005411 break;
5412
5413 case RS_BRCPLX_LONG:
5414 /* Pop the state. Restore pointers when there is no match. */
5415 if (status == RA_NOMATCH)
5416 {
5417 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005418 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005419 --brace_count[rp->rs_no];
5420 /* continue with the items after "\{}" */
5421 status = RA_CONT;
5422 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005423 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005424 if (status == RA_CONT)
5425 scan = regnext(scan);
5426 break;
5427
5428 case RS_BRCPLX_SHORT:
5429 /* Pop the state. Restore pointers when there is no match. */
5430 if (status == RA_NOMATCH)
5431 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005432 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005433 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005434 if (status == RA_NOMATCH)
5435 {
5436 scan = OPERAND(scan);
5437 status = RA_CONT;
5438 }
5439 break;
5440
5441 case RS_NOMATCH:
5442 /* Pop the state. If the operand matches for NOMATCH or
5443 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5444 * except for SUBPAT, and continue with the next item. */
5445 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5446 status = RA_NOMATCH;
5447 else
5448 {
5449 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005450 if (rp->rs_no != SUBPAT) /* zero-width */
5451 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005452 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005453 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005454 if (status == RA_CONT)
5455 scan = regnext(scan);
5456 break;
5457
5458 case RS_BEHIND1:
5459 if (status == RA_NOMATCH)
5460 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005461 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005462 regstack.ga_len -= sizeof(regbehind_T);
5463 }
5464 else
5465 {
5466 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5467 * the behind part does (not) match before the current
5468 * position in the input. This must be done at every
5469 * position in the input and checking if the match ends at
5470 * the current position. */
5471
5472 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005473 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005474
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005475 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005476 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005477 * result, hitting the start of the line or the previous
5478 * line (for multi-line matching).
5479 * Set behind_pos to where the match should end, BHPOS
5480 * will match it. Save the current value. */
5481 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5482 behind_pos = rp->rs_un.regsave;
5483
5484 rp->rs_state = RS_BEHIND2;
5485
Bram Moolenaar582fd852005-03-28 20:58:01 +00005486 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005487 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005488 }
5489 break;
5490
5491 case RS_BEHIND2:
5492 /*
5493 * Looping for BEHIND / NOBEHIND match.
5494 */
5495 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5496 {
5497 /* found a match that ends where "next" started */
5498 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5499 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005500 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5501 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005502 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005503 {
5504 /* But we didn't want a match. Need to restore the
5505 * subexpr, because what follows matched, so they have
5506 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005507 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005508 restore_subexpr(((regbehind_T *)rp) - 1);
5509 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005510 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005511 regstack.ga_len -= sizeof(regbehind_T);
5512 }
5513 else
5514 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005515 long limit;
5516
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005517 /* No match or a match that doesn't end where we want it: Go
5518 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005519 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005520 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005521 if (REG_MULTI)
5522 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005523 if (limit > 0
5524 && ((rp->rs_un.regsave.rs_u.pos.lnum
5525 < behind_pos.rs_u.pos.lnum
5526 ? (colnr_T)STRLEN(regline)
5527 : behind_pos.rs_u.pos.col)
5528 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5529 no = FAIL;
5530 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005531 {
5532 if (rp->rs_un.regsave.rs_u.pos.lnum
5533 < behind_pos.rs_u.pos.lnum
5534 || reg_getline(
5535 --rp->rs_un.regsave.rs_u.pos.lnum)
5536 == NULL)
5537 no = FAIL;
5538 else
5539 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005540 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005541 rp->rs_un.regsave.rs_u.pos.col =
5542 (colnr_T)STRLEN(regline);
5543 }
5544 }
5545 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005546 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005547#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005548 if (has_mbyte)
5549 rp->rs_un.regsave.rs_u.pos.col -=
5550 (*mb_head_off)(regline, regline
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005551 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005552 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005553#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005554 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005555 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005556 }
5557 else
5558 {
5559 if (rp->rs_un.regsave.rs_u.ptr == regline)
5560 no = FAIL;
5561 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005562 {
5563 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
5564 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5565 - rp->rs_un.regsave.rs_u.ptr) > limit)
5566 no = FAIL;
5567 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005568 }
5569 if (no == OK)
5570 {
5571 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005572 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005573 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005574 if (status == RA_MATCH)
5575 {
5576 /* We did match, so subexpr may have been changed,
5577 * need to restore them for the next try. */
5578 status = RA_NOMATCH;
5579 restore_subexpr(((regbehind_T *)rp) - 1);
5580 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005581 }
5582 else
5583 {
5584 /* Can't advance. For NOBEHIND that's a match. */
5585 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5586 if (rp->rs_no == NOBEHIND)
5587 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005588 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5589 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005590 status = RA_MATCH;
5591 }
5592 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005593 {
5594 /* We do want a proper match. Need to restore the
5595 * subexpr if we had a match, because they may have
5596 * been set. */
5597 if (status == RA_MATCH)
5598 {
5599 status = RA_NOMATCH;
5600 restore_subexpr(((regbehind_T *)rp) - 1);
5601 }
5602 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005603 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005604 regstack.ga_len -= sizeof(regbehind_T);
5605 }
5606 }
5607 break;
5608
5609 case RS_STAR_LONG:
5610 case RS_STAR_SHORT:
5611 {
5612 regstar_T *rst = ((regstar_T *)rp) - 1;
5613
5614 if (status == RA_MATCH)
5615 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005616 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005617 regstack.ga_len -= sizeof(regstar_T);
5618 break;
5619 }
5620
5621 /* Tried once already, restore input pointers. */
5622 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005623 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005624
5625 /* Repeat until we found a position where it could match. */
5626 for (;;)
5627 {
5628 if (status != RA_BREAK)
5629 {
5630 /* Tried first position already, advance. */
5631 if (rp->rs_state == RS_STAR_LONG)
5632 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005633 /* Trying for longest match, but couldn't or
5634 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005635 if (--rst->count < rst->minval)
5636 break;
5637 if (reginput == regline)
5638 {
5639 /* backup to last char of previous line */
5640 --reglnum;
5641 regline = reg_getline(reglnum);
5642 /* Just in case regrepeat() didn't count
5643 * right. */
5644 if (regline == NULL)
5645 break;
5646 reginput = regline + STRLEN(regline);
5647 fast_breakcheck();
5648 }
5649 else
5650 mb_ptr_back(regline, reginput);
5651 }
5652 else
5653 {
5654 /* Range is backwards, use shortest match first.
5655 * Careful: maxval and minval are exchanged!
5656 * Couldn't or didn't match: try advancing one
5657 * char. */
5658 if (rst->count == rst->minval
5659 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5660 break;
5661 ++rst->count;
5662 }
5663 if (got_int)
5664 break;
5665 }
5666 else
5667 status = RA_NOMATCH;
5668
5669 /* If it could match, try it. */
5670 if (rst->nextb == NUL || *reginput == rst->nextb
5671 || *reginput == rst->nextb_ic)
5672 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005673 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005674 scan = regnext(rp->rs_scan);
5675 status = RA_CONT;
5676 break;
5677 }
5678 }
5679 if (status != RA_CONT)
5680 {
5681 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005682 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005683 regstack.ga_len -= sizeof(regstar_T);
5684 status = RA_NOMATCH;
5685 }
5686 }
5687 break;
5688 }
5689
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005690 /* If we want to continue the inner loop or didn't pop a state
5691 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005692 if (status == RA_CONT || rp == (regitem_T *)
5693 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5694 break;
5695 }
5696
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005697 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005698 if (status == RA_CONT)
5699 continue;
5700
5701 /*
5702 * If the regstack is empty or something failed we are done.
5703 */
5704 if (regstack.ga_len == 0 || status == RA_FAIL)
5705 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005706 if (scan == NULL)
5707 {
5708 /*
5709 * We get here only if there's trouble -- normally "case END" is
5710 * the terminating point.
5711 */
5712 EMSG(_(e_re_corr));
5713#ifdef DEBUG
5714 printf("Premature EOL\n");
5715#endif
5716 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005717 if (status == RA_FAIL)
5718 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005719 return (status == RA_MATCH);
5720 }
5721
5722 } /* End of loop until the regstack is empty. */
5723
5724 /* NOTREACHED */
5725}
5726
5727/*
5728 * Push an item onto the regstack.
5729 * Returns pointer to new item. Returns NULL when out of memory.
5730 */
5731 static regitem_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01005732regstack_push(regstate_T state, char_u *scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005733{
5734 regitem_T *rp;
5735
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005736 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005737 {
5738 EMSG(_(e_maxmempat));
5739 return NULL;
5740 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005741 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005742 return NULL;
5743
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005744 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005745 rp->rs_state = state;
5746 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005747
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005748 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005749 return rp;
5750}
5751
5752/*
5753 * Pop an item from the regstack.
5754 */
5755 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01005756regstack_pop(char_u **scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005757{
5758 regitem_T *rp;
5759
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005760 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005761 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005762
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005763 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005764}
5765
Bram Moolenaar071d4272004-06-13 20:20:40 +00005766/*
5767 * regrepeat - repeatedly match something simple, return how many.
5768 * Advances reginput (and reglnum) to just after the matched chars.
5769 */
5770 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01005771regrepeat(
5772 char_u *p,
5773 long maxcount) /* maximum number of matches allowed */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005774{
5775 long count = 0;
5776 char_u *scan;
5777 char_u *opnd;
5778 int mask;
5779 int testval = 0;
5780
5781 scan = reginput; /* Make local copy of reginput for speed. */
5782 opnd = OPERAND(p);
5783 switch (OP(p))
5784 {
5785 case ANY:
5786 case ANY + ADD_NL:
5787 while (count < maxcount)
5788 {
5789 /* Matching anything means we continue until end-of-line (or
5790 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5791 while (*scan != NUL && count < maxcount)
5792 {
5793 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005794 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005795 }
Bram Moolenaar640009d2006-10-17 16:48:26 +00005796 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5797 || reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005798 break;
5799 ++count; /* count the line-break */
5800 reg_nextline();
5801 scan = reginput;
5802 if (got_int)
5803 break;
5804 }
5805 break;
5806
5807 case IDENT:
5808 case IDENT + ADD_NL:
5809 testval = TRUE;
5810 /*FALLTHROUGH*/
5811 case SIDENT:
5812 case SIDENT + ADD_NL:
5813 while (count < maxcount)
5814 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005815 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005816 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005817 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005818 }
5819 else if (*scan == NUL)
5820 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005821 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5822 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005823 break;
5824 reg_nextline();
5825 scan = reginput;
5826 if (got_int)
5827 break;
5828 }
5829 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5830 ++scan;
5831 else
5832 break;
5833 ++count;
5834 }
5835 break;
5836
5837 case KWORD:
5838 case KWORD + ADD_NL:
5839 testval = TRUE;
5840 /*FALLTHROUGH*/
5841 case SKWORD:
5842 case SKWORD + ADD_NL:
5843 while (count < maxcount)
5844 {
Bram Moolenaarf813a182013-01-30 13:59:37 +01005845 if (vim_iswordp_buf(scan, reg_buf)
5846 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005847 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005848 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005849 }
5850 else if (*scan == NUL)
5851 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005852 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5853 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005854 break;
5855 reg_nextline();
5856 scan = reginput;
5857 if (got_int)
5858 break;
5859 }
5860 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5861 ++scan;
5862 else
5863 break;
5864 ++count;
5865 }
5866 break;
5867
5868 case FNAME:
5869 case FNAME + ADD_NL:
5870 testval = TRUE;
5871 /*FALLTHROUGH*/
5872 case SFNAME:
5873 case SFNAME + ADD_NL:
5874 while (count < maxcount)
5875 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005876 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005877 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005878 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005879 }
5880 else if (*scan == NUL)
5881 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005882 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5883 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005884 break;
5885 reg_nextline();
5886 scan = reginput;
5887 if (got_int)
5888 break;
5889 }
5890 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5891 ++scan;
5892 else
5893 break;
5894 ++count;
5895 }
5896 break;
5897
5898 case PRINT:
5899 case PRINT + ADD_NL:
5900 testval = TRUE;
5901 /*FALLTHROUGH*/
5902 case SPRINT:
5903 case SPRINT + ADD_NL:
5904 while (count < maxcount)
5905 {
5906 if (*scan == NUL)
5907 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005908 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5909 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005910 break;
5911 reg_nextline();
5912 scan = reginput;
5913 if (got_int)
5914 break;
5915 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005916 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5917 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005918 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005919 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005920 }
5921 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5922 ++scan;
5923 else
5924 break;
5925 ++count;
5926 }
5927 break;
5928
5929 case WHITE:
5930 case WHITE + ADD_NL:
5931 testval = mask = RI_WHITE;
5932do_class:
5933 while (count < maxcount)
5934 {
5935#ifdef FEAT_MBYTE
5936 int l;
5937#endif
5938 if (*scan == NUL)
5939 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005940 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5941 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005942 break;
5943 reg_nextline();
5944 scan = reginput;
5945 if (got_int)
5946 break;
5947 }
5948#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005949 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005950 {
5951 if (testval != 0)
5952 break;
5953 scan += l;
5954 }
5955#endif
5956 else if ((class_tab[*scan] & mask) == testval)
5957 ++scan;
5958 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5959 ++scan;
5960 else
5961 break;
5962 ++count;
5963 }
5964 break;
5965
5966 case NWHITE:
5967 case NWHITE + ADD_NL:
5968 mask = RI_WHITE;
5969 goto do_class;
5970 case DIGIT:
5971 case DIGIT + ADD_NL:
5972 testval = mask = RI_DIGIT;
5973 goto do_class;
5974 case NDIGIT:
5975 case NDIGIT + ADD_NL:
5976 mask = RI_DIGIT;
5977 goto do_class;
5978 case HEX:
5979 case HEX + ADD_NL:
5980 testval = mask = RI_HEX;
5981 goto do_class;
5982 case NHEX:
5983 case NHEX + ADD_NL:
5984 mask = RI_HEX;
5985 goto do_class;
5986 case OCTAL:
5987 case OCTAL + ADD_NL:
5988 testval = mask = RI_OCTAL;
5989 goto do_class;
5990 case NOCTAL:
5991 case NOCTAL + ADD_NL:
5992 mask = RI_OCTAL;
5993 goto do_class;
5994 case WORD:
5995 case WORD + ADD_NL:
5996 testval = mask = RI_WORD;
5997 goto do_class;
5998 case NWORD:
5999 case NWORD + ADD_NL:
6000 mask = RI_WORD;
6001 goto do_class;
6002 case HEAD:
6003 case HEAD + ADD_NL:
6004 testval = mask = RI_HEAD;
6005 goto do_class;
6006 case NHEAD:
6007 case NHEAD + ADD_NL:
6008 mask = RI_HEAD;
6009 goto do_class;
6010 case ALPHA:
6011 case ALPHA + ADD_NL:
6012 testval = mask = RI_ALPHA;
6013 goto do_class;
6014 case NALPHA:
6015 case NALPHA + ADD_NL:
6016 mask = RI_ALPHA;
6017 goto do_class;
6018 case LOWER:
6019 case LOWER + ADD_NL:
6020 testval = mask = RI_LOWER;
6021 goto do_class;
6022 case NLOWER:
6023 case NLOWER + ADD_NL:
6024 mask = RI_LOWER;
6025 goto do_class;
6026 case UPPER:
6027 case UPPER + ADD_NL:
6028 testval = mask = RI_UPPER;
6029 goto do_class;
6030 case NUPPER:
6031 case NUPPER + ADD_NL:
6032 mask = RI_UPPER;
6033 goto do_class;
6034
6035 case EXACTLY:
6036 {
6037 int cu, cl;
6038
6039 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006040 * would have been used for it. It does handle single-byte
6041 * characters, such as latin1. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006042 if (ireg_ic)
6043 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006044 cu = MB_TOUPPER(*opnd);
6045 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006046 while (count < maxcount && (*scan == cu || *scan == cl))
6047 {
6048 count++;
6049 scan++;
6050 }
6051 }
6052 else
6053 {
6054 cu = *opnd;
6055 while (count < maxcount && *scan == cu)
6056 {
6057 count++;
6058 scan++;
6059 }
6060 }
6061 break;
6062 }
6063
6064#ifdef FEAT_MBYTE
6065 case MULTIBYTECODE:
6066 {
6067 int i, len, cf = 0;
6068
6069 /* Safety check (just in case 'encoding' was changed since
6070 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006071 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006072 {
6073 if (ireg_ic && enc_utf8)
6074 cf = utf_fold(utf_ptr2char(opnd));
Bram Moolenaar069dd082015-05-04 09:56:49 +02006075 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006076 {
6077 for (i = 0; i < len; ++i)
6078 if (opnd[i] != scan[i])
6079 break;
6080 if (i < len && (!ireg_ic || !enc_utf8
6081 || utf_fold(utf_ptr2char(scan)) != cf))
6082 break;
6083 scan += len;
6084 ++count;
6085 }
6086 }
6087 }
6088 break;
6089#endif
6090
6091 case ANYOF:
6092 case ANYOF + ADD_NL:
6093 testval = TRUE;
6094 /*FALLTHROUGH*/
6095
6096 case ANYBUT:
6097 case ANYBUT + ADD_NL:
6098 while (count < maxcount)
6099 {
6100#ifdef FEAT_MBYTE
6101 int len;
6102#endif
6103 if (*scan == NUL)
6104 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00006105 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6106 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006107 break;
6108 reg_nextline();
6109 scan = reginput;
6110 if (got_int)
6111 break;
6112 }
6113 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6114 ++scan;
6115#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006116 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006117 {
6118 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6119 break;
6120 scan += len;
6121 }
6122#endif
6123 else
6124 {
6125 if ((cstrchr(opnd, *scan) == NULL) == testval)
6126 break;
6127 ++scan;
6128 }
6129 ++count;
6130 }
6131 break;
6132
6133 case NEWL:
6134 while (count < maxcount
Bram Moolenaar640009d2006-10-17 16:48:26 +00006135 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
6136 && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006137 {
6138 count++;
6139 if (reg_line_lbr)
6140 ADVANCE_REGINPUT();
6141 else
6142 reg_nextline();
6143 scan = reginput;
6144 if (got_int)
6145 break;
6146 }
6147 break;
6148
6149 default: /* Oh dear. Called inappropriately. */
6150 EMSG(_(e_re_corr));
6151#ifdef DEBUG
6152 printf("Called regrepeat with op code %d\n", OP(p));
6153#endif
6154 break;
6155 }
6156
6157 reginput = scan;
6158
6159 return (int)count;
6160}
6161
6162/*
6163 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006164 * Returns NULL when calculating size, when there is no next item and when
6165 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006166 */
6167 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006168regnext(char_u *p)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006169{
6170 int offset;
6171
Bram Moolenaard3005802009-11-25 17:21:32 +00006172 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006173 return NULL;
6174
6175 offset = NEXT(p);
6176 if (offset == 0)
6177 return NULL;
6178
Bram Moolenaar582fd852005-03-28 20:58:01 +00006179 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006180 return p - offset;
6181 else
6182 return p + offset;
6183}
6184
6185/*
6186 * Check the regexp program for its magic number.
6187 * Return TRUE if it's wrong.
6188 */
6189 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006190prog_magic_wrong(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006191{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006192 regprog_T *prog;
6193
6194 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
6195 if (prog->engine == &nfa_regengine)
6196 /* For NFA matcher we don't check the magic */
6197 return FALSE;
6198
6199 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006200 {
6201 EMSG(_(e_re_corr));
6202 return TRUE;
6203 }
6204 return FALSE;
6205}
6206
6207/*
6208 * Cleanup the subexpressions, if this wasn't done yet.
6209 * This construction is used to clear the subexpressions only when they are
6210 * used (to increase speed).
6211 */
6212 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006213cleanup_subexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006214{
6215 if (need_clear_subexpr)
6216 {
6217 if (REG_MULTI)
6218 {
6219 /* Use 0xff to set lnum to -1 */
6220 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6221 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6222 }
6223 else
6224 {
6225 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6226 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
6227 }
6228 need_clear_subexpr = FALSE;
6229 }
6230}
6231
6232#ifdef FEAT_SYN_HL
6233 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006234cleanup_zsubexpr(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006235{
6236 if (need_clear_zsubexpr)
6237 {
6238 if (REG_MULTI)
6239 {
6240 /* Use 0xff to set lnum to -1 */
6241 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6242 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6243 }
6244 else
6245 {
6246 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6247 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6248 }
6249 need_clear_zsubexpr = FALSE;
6250 }
6251}
6252#endif
6253
6254/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006255 * Save the current subexpr to "bp", so that they can be restored
6256 * later by restore_subexpr().
6257 */
6258 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006259save_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006260{
6261 int i;
6262
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006263 /* When "need_clear_subexpr" is set we don't need to save the values, only
6264 * remember that this flag needs to be set again when restoring. */
6265 bp->save_need_clear_subexpr = need_clear_subexpr;
6266 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006267 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006268 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006269 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006270 if (REG_MULTI)
6271 {
6272 bp->save_start[i].se_u.pos = reg_startpos[i];
6273 bp->save_end[i].se_u.pos = reg_endpos[i];
6274 }
6275 else
6276 {
6277 bp->save_start[i].se_u.ptr = reg_startp[i];
6278 bp->save_end[i].se_u.ptr = reg_endp[i];
6279 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006280 }
6281 }
6282}
6283
6284/*
6285 * Restore the subexpr from "bp".
6286 */
6287 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006288restore_subexpr(regbehind_T *bp)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006289{
6290 int i;
6291
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006292 /* Only need to restore saved values when they are not to be cleared. */
6293 need_clear_subexpr = bp->save_need_clear_subexpr;
6294 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006295 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006296 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006297 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006298 if (REG_MULTI)
6299 {
6300 reg_startpos[i] = bp->save_start[i].se_u.pos;
6301 reg_endpos[i] = bp->save_end[i].se_u.pos;
6302 }
6303 else
6304 {
6305 reg_startp[i] = bp->save_start[i].se_u.ptr;
6306 reg_endp[i] = bp->save_end[i].se_u.ptr;
6307 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006308 }
6309 }
6310}
6311
6312/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006313 * Advance reglnum, regline and reginput to the next line.
6314 */
6315 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006316reg_nextline(void)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006317{
6318 regline = reg_getline(++reglnum);
6319 reginput = regline;
6320 fast_breakcheck();
6321}
6322
6323/*
6324 * Save the input line and position in a regsave_T.
6325 */
6326 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006327reg_save(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006328{
6329 if (REG_MULTI)
6330 {
6331 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6332 save->rs_u.pos.lnum = reglnum;
6333 }
6334 else
6335 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006336 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006337}
6338
6339/*
6340 * Restore the input line and position from a regsave_T.
6341 */
6342 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006343reg_restore(regsave_T *save, garray_T *gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006344{
6345 if (REG_MULTI)
6346 {
6347 if (reglnum != save->rs_u.pos.lnum)
6348 {
6349 /* only call reg_getline() when the line number changed to save
6350 * a bit of time */
6351 reglnum = save->rs_u.pos.lnum;
6352 regline = reg_getline(reglnum);
6353 }
6354 reginput = regline + save->rs_u.pos.col;
6355 }
6356 else
6357 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006358 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006359}
6360
6361/*
6362 * Return TRUE if current position is equal to saved position.
6363 */
6364 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006365reg_save_equal(regsave_T *save)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006366{
6367 if (REG_MULTI)
6368 return reglnum == save->rs_u.pos.lnum
6369 && reginput == regline + save->rs_u.pos.col;
6370 return reginput == save->rs_u.ptr;
6371}
6372
6373/*
6374 * Tentatively set the sub-expression start to the current position (after
6375 * calling regmatch() they will have changed). Need to save the existing
6376 * values for when there is no match.
6377 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6378 * depending on REG_MULTI.
6379 */
6380 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006381save_se_multi(save_se_T *savep, lpos_T *posp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006382{
6383 savep->se_u.pos = *posp;
6384 posp->lnum = reglnum;
6385 posp->col = (colnr_T)(reginput - regline);
6386}
6387
6388 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006389save_se_one(save_se_T *savep, char_u **pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006390{
6391 savep->se_u.ptr = *pp;
6392 *pp = reginput;
6393}
6394
6395/*
6396 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6397 */
6398 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006399re_num_cmp(long_u val, char_u *scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006400{
6401 long_u n = OPERAND_MIN(scan);
6402
6403 if (OPERAND_CMP(scan) == '>')
6404 return val > n;
6405 if (OPERAND_CMP(scan) == '<')
6406 return val < n;
6407 return val == n;
6408}
6409
Bram Moolenaar580abea2013-06-14 20:31:28 +02006410/*
6411 * Check whether a backreference matches.
6412 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006413 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6414 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006415 */
6416 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006417match_with_backref(
6418 linenr_T start_lnum,
6419 colnr_T start_col,
6420 linenr_T end_lnum,
6421 colnr_T end_col,
6422 int *bytelen)
Bram Moolenaar580abea2013-06-14 20:31:28 +02006423{
6424 linenr_T clnum = start_lnum;
6425 colnr_T ccol = start_col;
6426 int len;
6427 char_u *p;
6428
6429 if (bytelen != NULL)
6430 *bytelen = 0;
6431 for (;;)
6432 {
6433 /* Since getting one line may invalidate the other, need to make copy.
6434 * Slow! */
6435 if (regline != reg_tofree)
6436 {
6437 len = (int)STRLEN(regline);
6438 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6439 {
6440 len += 50; /* get some extra */
6441 vim_free(reg_tofree);
6442 reg_tofree = alloc(len);
6443 if (reg_tofree == NULL)
6444 return RA_FAIL; /* out of memory!*/
6445 reg_tofreelen = len;
6446 }
6447 STRCPY(reg_tofree, regline);
6448 reginput = reg_tofree + (reginput - regline);
6449 regline = reg_tofree;
6450 }
6451
6452 /* Get the line to compare with. */
6453 p = reg_getline(clnum);
6454 if (clnum == end_lnum)
6455 len = end_col - ccol;
6456 else
6457 len = (int)STRLEN(p + ccol);
6458
6459 if (cstrncmp(p + ccol, reginput, &len) != 0)
6460 return RA_NOMATCH; /* doesn't match */
6461 if (bytelen != NULL)
6462 *bytelen += len;
6463 if (clnum == end_lnum)
6464 break; /* match and at end! */
6465 if (reglnum >= reg_maxline)
6466 return RA_NOMATCH; /* text too short */
6467
6468 /* Advance to next line. */
6469 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006470 if (bytelen != NULL)
6471 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006472 ++clnum;
6473 ccol = 0;
6474 if (got_int)
6475 return RA_FAIL;
6476 }
6477
6478 /* found a match! Note that regline may now point to a copy of the line,
6479 * that should not matter. */
6480 return RA_MATCH;
6481}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006482
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006483#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006484
6485/*
6486 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6487 */
6488 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01006489regdump(char_u *pattern, bt_regprog_T *r)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006490{
6491 char_u *s;
6492 int op = EXACTLY; /* Arbitrary non-END op. */
6493 char_u *next;
6494 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006495 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006496
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006497#ifdef BT_REGEXP_LOG
6498 f = fopen("bt_regexp_log.log", "a");
6499#else
6500 f = stdout;
6501#endif
6502 if (f == NULL)
6503 return;
6504 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006505
6506 s = r->program + 1;
6507 /*
6508 * Loop until we find the END that isn't before a referred next (an END
6509 * can also appear in a NOMATCH operand).
6510 */
6511 while (op != END || s <= end)
6512 {
6513 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006514 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006515 next = regnext(s);
6516 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006517 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006518 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006519 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006520 if (end < next)
6521 end = next;
6522 if (op == BRACE_LIMITS)
6523 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006524 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006525 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006526 s += 8;
6527 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006528 else if (op == BEHIND || op == NOBEHIND)
6529 {
6530 /* one int */
6531 fprintf(f, " count %ld", OPERAND_MIN(s));
6532 s += 4;
6533 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006534 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6535 {
6536 /* one int plus comperator */
6537 fprintf(f, " count %ld", OPERAND_MIN(s));
6538 s += 5;
6539 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006540 s += 3;
6541 if (op == ANYOF || op == ANYOF + ADD_NL
6542 || op == ANYBUT || op == ANYBUT + ADD_NL
6543 || op == EXACTLY)
6544 {
6545 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006546 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006547 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006548 fprintf(f, "%c", *s++);
6549 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006550 s++;
6551 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006552 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006553 }
6554
6555 /* Header fields of interest. */
6556 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006557 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006558 ? (char *)transchar(r->regstart)
6559 : "multibyte", r->regstart);
6560 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006561 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006562 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006563 fprintf(f, "must have \"%s\"", r->regmust);
6564 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006565
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006566#ifdef BT_REGEXP_LOG
6567 fclose(f);
6568#endif
6569}
6570#endif /* BT_REGEXP_DUMP */
6571
6572#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006573/*
6574 * regprop - printable representation of opcode
6575 */
6576 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01006577regprop(char_u *op)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006578{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006579 char *p;
6580 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006581
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006582 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006583
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006584 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006585 {
6586 case BOL:
6587 p = "BOL";
6588 break;
6589 case EOL:
6590 p = "EOL";
6591 break;
6592 case RE_BOF:
6593 p = "BOF";
6594 break;
6595 case RE_EOF:
6596 p = "EOF";
6597 break;
6598 case CURSOR:
6599 p = "CURSOR";
6600 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006601 case RE_VISUAL:
6602 p = "RE_VISUAL";
6603 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006604 case RE_LNUM:
6605 p = "RE_LNUM";
6606 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006607 case RE_MARK:
6608 p = "RE_MARK";
6609 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006610 case RE_COL:
6611 p = "RE_COL";
6612 break;
6613 case RE_VCOL:
6614 p = "RE_VCOL";
6615 break;
6616 case BOW:
6617 p = "BOW";
6618 break;
6619 case EOW:
6620 p = "EOW";
6621 break;
6622 case ANY:
6623 p = "ANY";
6624 break;
6625 case ANY + ADD_NL:
6626 p = "ANY+NL";
6627 break;
6628 case ANYOF:
6629 p = "ANYOF";
6630 break;
6631 case ANYOF + ADD_NL:
6632 p = "ANYOF+NL";
6633 break;
6634 case ANYBUT:
6635 p = "ANYBUT";
6636 break;
6637 case ANYBUT + ADD_NL:
6638 p = "ANYBUT+NL";
6639 break;
6640 case IDENT:
6641 p = "IDENT";
6642 break;
6643 case IDENT + ADD_NL:
6644 p = "IDENT+NL";
6645 break;
6646 case SIDENT:
6647 p = "SIDENT";
6648 break;
6649 case SIDENT + ADD_NL:
6650 p = "SIDENT+NL";
6651 break;
6652 case KWORD:
6653 p = "KWORD";
6654 break;
6655 case KWORD + ADD_NL:
6656 p = "KWORD+NL";
6657 break;
6658 case SKWORD:
6659 p = "SKWORD";
6660 break;
6661 case SKWORD + ADD_NL:
6662 p = "SKWORD+NL";
6663 break;
6664 case FNAME:
6665 p = "FNAME";
6666 break;
6667 case FNAME + ADD_NL:
6668 p = "FNAME+NL";
6669 break;
6670 case SFNAME:
6671 p = "SFNAME";
6672 break;
6673 case SFNAME + ADD_NL:
6674 p = "SFNAME+NL";
6675 break;
6676 case PRINT:
6677 p = "PRINT";
6678 break;
6679 case PRINT + ADD_NL:
6680 p = "PRINT+NL";
6681 break;
6682 case SPRINT:
6683 p = "SPRINT";
6684 break;
6685 case SPRINT + ADD_NL:
6686 p = "SPRINT+NL";
6687 break;
6688 case WHITE:
6689 p = "WHITE";
6690 break;
6691 case WHITE + ADD_NL:
6692 p = "WHITE+NL";
6693 break;
6694 case NWHITE:
6695 p = "NWHITE";
6696 break;
6697 case NWHITE + ADD_NL:
6698 p = "NWHITE+NL";
6699 break;
6700 case DIGIT:
6701 p = "DIGIT";
6702 break;
6703 case DIGIT + ADD_NL:
6704 p = "DIGIT+NL";
6705 break;
6706 case NDIGIT:
6707 p = "NDIGIT";
6708 break;
6709 case NDIGIT + ADD_NL:
6710 p = "NDIGIT+NL";
6711 break;
6712 case HEX:
6713 p = "HEX";
6714 break;
6715 case HEX + ADD_NL:
6716 p = "HEX+NL";
6717 break;
6718 case NHEX:
6719 p = "NHEX";
6720 break;
6721 case NHEX + ADD_NL:
6722 p = "NHEX+NL";
6723 break;
6724 case OCTAL:
6725 p = "OCTAL";
6726 break;
6727 case OCTAL + ADD_NL:
6728 p = "OCTAL+NL";
6729 break;
6730 case NOCTAL:
6731 p = "NOCTAL";
6732 break;
6733 case NOCTAL + ADD_NL:
6734 p = "NOCTAL+NL";
6735 break;
6736 case WORD:
6737 p = "WORD";
6738 break;
6739 case WORD + ADD_NL:
6740 p = "WORD+NL";
6741 break;
6742 case NWORD:
6743 p = "NWORD";
6744 break;
6745 case NWORD + ADD_NL:
6746 p = "NWORD+NL";
6747 break;
6748 case HEAD:
6749 p = "HEAD";
6750 break;
6751 case HEAD + ADD_NL:
6752 p = "HEAD+NL";
6753 break;
6754 case NHEAD:
6755 p = "NHEAD";
6756 break;
6757 case NHEAD + ADD_NL:
6758 p = "NHEAD+NL";
6759 break;
6760 case ALPHA:
6761 p = "ALPHA";
6762 break;
6763 case ALPHA + ADD_NL:
6764 p = "ALPHA+NL";
6765 break;
6766 case NALPHA:
6767 p = "NALPHA";
6768 break;
6769 case NALPHA + ADD_NL:
6770 p = "NALPHA+NL";
6771 break;
6772 case LOWER:
6773 p = "LOWER";
6774 break;
6775 case LOWER + ADD_NL:
6776 p = "LOWER+NL";
6777 break;
6778 case NLOWER:
6779 p = "NLOWER";
6780 break;
6781 case NLOWER + ADD_NL:
6782 p = "NLOWER+NL";
6783 break;
6784 case UPPER:
6785 p = "UPPER";
6786 break;
6787 case UPPER + ADD_NL:
6788 p = "UPPER+NL";
6789 break;
6790 case NUPPER:
6791 p = "NUPPER";
6792 break;
6793 case NUPPER + ADD_NL:
6794 p = "NUPPER+NL";
6795 break;
6796 case BRANCH:
6797 p = "BRANCH";
6798 break;
6799 case EXACTLY:
6800 p = "EXACTLY";
6801 break;
6802 case NOTHING:
6803 p = "NOTHING";
6804 break;
6805 case BACK:
6806 p = "BACK";
6807 break;
6808 case END:
6809 p = "END";
6810 break;
6811 case MOPEN + 0:
6812 p = "MATCH START";
6813 break;
6814 case MOPEN + 1:
6815 case MOPEN + 2:
6816 case MOPEN + 3:
6817 case MOPEN + 4:
6818 case MOPEN + 5:
6819 case MOPEN + 6:
6820 case MOPEN + 7:
6821 case MOPEN + 8:
6822 case MOPEN + 9:
6823 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6824 p = NULL;
6825 break;
6826 case MCLOSE + 0:
6827 p = "MATCH END";
6828 break;
6829 case MCLOSE + 1:
6830 case MCLOSE + 2:
6831 case MCLOSE + 3:
6832 case MCLOSE + 4:
6833 case MCLOSE + 5:
6834 case MCLOSE + 6:
6835 case MCLOSE + 7:
6836 case MCLOSE + 8:
6837 case MCLOSE + 9:
6838 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6839 p = NULL;
6840 break;
6841 case BACKREF + 1:
6842 case BACKREF + 2:
6843 case BACKREF + 3:
6844 case BACKREF + 4:
6845 case BACKREF + 5:
6846 case BACKREF + 6:
6847 case BACKREF + 7:
6848 case BACKREF + 8:
6849 case BACKREF + 9:
6850 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6851 p = NULL;
6852 break;
6853 case NOPEN:
6854 p = "NOPEN";
6855 break;
6856 case NCLOSE:
6857 p = "NCLOSE";
6858 break;
6859#ifdef FEAT_SYN_HL
6860 case ZOPEN + 1:
6861 case ZOPEN + 2:
6862 case ZOPEN + 3:
6863 case ZOPEN + 4:
6864 case ZOPEN + 5:
6865 case ZOPEN + 6:
6866 case ZOPEN + 7:
6867 case ZOPEN + 8:
6868 case ZOPEN + 9:
6869 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6870 p = NULL;
6871 break;
6872 case ZCLOSE + 1:
6873 case ZCLOSE + 2:
6874 case ZCLOSE + 3:
6875 case ZCLOSE + 4:
6876 case ZCLOSE + 5:
6877 case ZCLOSE + 6:
6878 case ZCLOSE + 7:
6879 case ZCLOSE + 8:
6880 case ZCLOSE + 9:
6881 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6882 p = NULL;
6883 break;
6884 case ZREF + 1:
6885 case ZREF + 2:
6886 case ZREF + 3:
6887 case ZREF + 4:
6888 case ZREF + 5:
6889 case ZREF + 6:
6890 case ZREF + 7:
6891 case ZREF + 8:
6892 case ZREF + 9:
6893 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6894 p = NULL;
6895 break;
6896#endif
6897 case STAR:
6898 p = "STAR";
6899 break;
6900 case PLUS:
6901 p = "PLUS";
6902 break;
6903 case NOMATCH:
6904 p = "NOMATCH";
6905 break;
6906 case MATCH:
6907 p = "MATCH";
6908 break;
6909 case BEHIND:
6910 p = "BEHIND";
6911 break;
6912 case NOBEHIND:
6913 p = "NOBEHIND";
6914 break;
6915 case SUBPAT:
6916 p = "SUBPAT";
6917 break;
6918 case BRACE_LIMITS:
6919 p = "BRACE_LIMITS";
6920 break;
6921 case BRACE_SIMPLE:
6922 p = "BRACE_SIMPLE";
6923 break;
6924 case BRACE_COMPLEX + 0:
6925 case BRACE_COMPLEX + 1:
6926 case BRACE_COMPLEX + 2:
6927 case BRACE_COMPLEX + 3:
6928 case BRACE_COMPLEX + 4:
6929 case BRACE_COMPLEX + 5:
6930 case BRACE_COMPLEX + 6:
6931 case BRACE_COMPLEX + 7:
6932 case BRACE_COMPLEX + 8:
6933 case BRACE_COMPLEX + 9:
6934 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6935 p = NULL;
6936 break;
6937#ifdef FEAT_MBYTE
6938 case MULTIBYTECODE:
6939 p = "MULTIBYTECODE";
6940 break;
6941#endif
6942 case NEWL:
6943 p = "NEWL";
6944 break;
6945 default:
6946 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6947 p = NULL;
6948 break;
6949 }
6950 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006951 STRCAT(buf, p);
6952 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006953}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006954#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006955
Bram Moolenaarfb031402014-09-09 17:18:49 +02006956/*
6957 * Used in a place where no * or \+ can follow.
6958 */
6959 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01006960re_mult_next(char *what)
Bram Moolenaarfb031402014-09-09 17:18:49 +02006961{
6962 if (re_multi_type(peekchr()) == MULTI_MULT)
6963 EMSG2_RET_FAIL(_("E888: (NFA regexp) cannot repeat %s"), what);
6964 return OK;
6965}
6966
Bram Moolenaar071d4272004-06-13 20:20:40 +00006967#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01006968static void mb_decompose(int c, int *c1, int *c2, int *c3);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006969
6970typedef struct
6971{
6972 int a, b, c;
6973} decomp_T;
6974
6975
6976/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006977static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006978{
6979 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6980 {0x5d0,0,0}, /* 0xfb21 alt alef */
6981 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6982 {0x5d4,0,0}, /* 0xfb23 alt he */
6983 {0x5db,0,0}, /* 0xfb24 alt kaf */
6984 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6985 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6986 {0x5e8,0,0}, /* 0xfb27 alt resh */
6987 {0x5ea,0,0}, /* 0xfb28 alt tav */
6988 {'+', 0, 0}, /* 0xfb29 alt plus */
6989 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6990 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6991 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6992 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6993 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6994 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6995 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6996 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6997 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6998 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6999 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
7000 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
7001 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
7002 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
7003 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
7004 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
7005 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
7006 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
7007 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
7008 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
7009 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
7010 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
7011 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
7012 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
7013 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
7014 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
7015 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
7016 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
7017 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
7018 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
7019 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7020 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7021 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7022 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7023 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7024 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7025 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7026 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7027};
7028
7029 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01007030mb_decompose(int c, int *c1, int *c2, int *c3)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007031{
7032 decomp_T d;
7033
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007034 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007035 {
7036 d = decomp_table[c - 0xfb20];
7037 *c1 = d.a;
7038 *c2 = d.b;
7039 *c3 = d.c;
7040 }
7041 else
7042 {
7043 *c1 = c;
7044 *c2 = *c3 = 0;
7045 }
7046}
7047#endif
7048
7049/*
7050 * Compare two strings, ignore case if ireg_ic set.
7051 * Return 0 if strings match, non-zero otherwise.
7052 * Correct the length "*n" when composing characters are ignored.
7053 */
7054 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007055cstrncmp(char_u *s1, char_u *s2, int *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007056{
7057 int result;
7058
7059 if (!ireg_ic)
7060 result = STRNCMP(s1, s2, *n);
7061 else
7062 result = MB_STRNICMP(s1, s2, *n);
7063
7064#ifdef FEAT_MBYTE
7065 /* if it failed and it's utf8 and we want to combineignore: */
7066 if (result != 0 && enc_utf8 && ireg_icombine)
7067 {
7068 char_u *str1, *str2;
7069 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007070 int junk;
7071
7072 /* we have to handle the strcmp ourselves, since it is necessary to
7073 * deal with the composing characters by ignoring them: */
7074 str1 = s1;
7075 str2 = s2;
7076 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007077 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007078 {
7079 c1 = mb_ptr2char_adv(&str1);
7080 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007081
7082 /* decompose the character if necessary, into 'base' characters
7083 * because I don't care about Arabic, I will hard-code the Hebrew
7084 * which I *do* care about! So sue me... */
7085 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
7086 {
7087 /* decomposition necessary? */
7088 mb_decompose(c1, &c11, &junk, &junk);
7089 mb_decompose(c2, &c12, &junk, &junk);
7090 c1 = c11;
7091 c2 = c12;
7092 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
7093 break;
7094 }
7095 }
7096 result = c2 - c1;
7097 if (result == 0)
7098 *n = (int)(str2 - s2);
7099 }
7100#endif
7101
7102 return result;
7103}
7104
7105/*
7106 * cstrchr: This function is used a lot for simple searches, keep it fast!
7107 */
7108 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007109cstrchr(char_u *s, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007110{
7111 char_u *p;
7112 int cc;
7113
7114 if (!ireg_ic
7115#ifdef FEAT_MBYTE
7116 || (!enc_utf8 && mb_char2len(c) > 1)
7117#endif
7118 )
7119 return vim_strchr(s, c);
7120
7121 /* tolower() and toupper() can be slow, comparing twice should be a lot
7122 * faster (esp. when using MS Visual C++!).
7123 * For UTF-8 need to use folded case. */
7124#ifdef FEAT_MBYTE
7125 if (enc_utf8 && c > 0x80)
7126 cc = utf_fold(c);
7127 else
7128#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007129 if (MB_ISUPPER(c))
7130 cc = MB_TOLOWER(c);
7131 else if (MB_ISLOWER(c))
7132 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007133 else
7134 return vim_strchr(s, c);
7135
7136#ifdef FEAT_MBYTE
7137 if (has_mbyte)
7138 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007139 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007140 {
7141 if (enc_utf8 && c > 0x80)
7142 {
7143 if (utf_fold(utf_ptr2char(p)) == cc)
7144 return p;
7145 }
7146 else if (*p == c || *p == cc)
7147 return p;
7148 }
7149 }
7150 else
7151#endif
7152 /* Faster version for when there are no multi-byte characters. */
7153 for (p = s; *p != NUL; ++p)
7154 if (*p == c || *p == cc)
7155 return p;
7156
7157 return NULL;
7158}
7159
7160/***************************************************************
7161 * regsub stuff *
7162 ***************************************************************/
7163
Bram Moolenaar071d4272004-06-13 20:20:40 +00007164/*
7165 * We should define ftpr as a pointer to a function returning a pointer to
7166 * a function returning a pointer to a function ...
7167 * This is impossible, so we declare a pointer to a function returning a
7168 * pointer to a function returning void. This should work for all compilers.
7169 */
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007170typedef void (*(*fptr_T)(int *, int))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007171
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007172static fptr_T do_upper(int *, int);
7173static fptr_T do_Upper(int *, int);
7174static fptr_T do_lower(int *, int);
7175static fptr_T do_Lower(int *, int);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007176
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007177static int vim_regsub_both(char_u *source, char_u *dest, int copy, int magic, int backslash);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007178
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007179 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007180do_upper(int *d, int c)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007181{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007182 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007183
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007184 return (fptr_T)NULL;
7185}
7186
7187 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007188do_Upper(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007189{
7190 *d = MB_TOUPPER(c);
7191
7192 return (fptr_T)do_Upper;
7193}
7194
7195 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007196do_lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007197{
7198 *d = MB_TOLOWER(c);
7199
7200 return (fptr_T)NULL;
7201}
7202
7203 static fptr_T
Bram Moolenaar05540972016-01-30 20:31:25 +01007204do_Lower(int *d, int c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007205{
7206 *d = MB_TOLOWER(c);
7207
7208 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007209}
7210
7211/*
7212 * regtilde(): Replace tildes in the pattern by the old pattern.
7213 *
7214 * Short explanation of the tilde: It stands for the previous replacement
7215 * pattern. If that previous pattern also contains a ~ we should go back a
7216 * step further... But we insert the previous pattern into the current one
7217 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007218 * This still does not handle the case where "magic" changes. So require the
7219 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007220 *
7221 * The tildes are parsed once before the first call to vim_regsub().
7222 */
7223 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007224regtilde(char_u *source, int magic)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007225{
7226 char_u *newsub = source;
7227 char_u *tmpsub;
7228 char_u *p;
7229 int len;
7230 int prevlen;
7231
7232 for (p = newsub; *p; ++p)
7233 {
7234 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7235 {
7236 if (reg_prev_sub != NULL)
7237 {
7238 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7239 prevlen = (int)STRLEN(reg_prev_sub);
7240 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7241 if (tmpsub != NULL)
7242 {
7243 /* copy prefix */
7244 len = (int)(p - newsub); /* not including ~ */
7245 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007246 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007247 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7248 /* copy postfix */
7249 if (!magic)
7250 ++p; /* back off \ */
7251 STRCPY(tmpsub + len + prevlen, p + 1);
7252
7253 if (newsub != source) /* already allocated newsub */
7254 vim_free(newsub);
7255 newsub = tmpsub;
7256 p = newsub + len + prevlen;
7257 }
7258 }
7259 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007260 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007261 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007262 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007263 --p;
7264 }
7265 else
7266 {
7267 if (*p == '\\' && p[1]) /* skip escaped characters */
7268 ++p;
7269#ifdef FEAT_MBYTE
7270 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007271 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007272#endif
7273 }
7274 }
7275
7276 vim_free(reg_prev_sub);
7277 if (newsub != source) /* newsub was allocated, just keep it */
7278 reg_prev_sub = newsub;
7279 else /* no ~ found, need to save newsub */
7280 reg_prev_sub = vim_strsave(newsub);
7281 return newsub;
7282}
7283
7284#ifdef FEAT_EVAL
7285static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7286
7287/* These pointers are used instead of reg_match and reg_mmatch for
7288 * reg_submatch(). Needed for when the substitution string is an expression
7289 * that contains a call to substitute() and submatch(). */
7290static regmatch_T *submatch_match;
7291static regmmatch_T *submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007292static linenr_T submatch_firstlnum;
7293static linenr_T submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007294static int submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007295#endif
7296
7297#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
7298/*
7299 * vim_regsub() - perform substitutions after a vim_regexec() or
7300 * vim_regexec_multi() match.
7301 *
7302 * If "copy" is TRUE really copy into "dest".
7303 * If "copy" is FALSE nothing is copied, this is just to find out the length
7304 * of the result.
7305 *
7306 * If "backslash" is TRUE, a backslash will be removed later, need to double
7307 * them to keep them, and insert a backslash before a CR to avoid it being
7308 * replaced with a line break later.
7309 *
7310 * Note: The matched text must not change between the call of
7311 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7312 * references invalid!
7313 *
7314 * Returns the size of the replacement, including terminating NUL.
7315 */
7316 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007317vim_regsub(
7318 regmatch_T *rmp,
7319 char_u *source,
7320 char_u *dest,
7321 int copy,
7322 int magic,
7323 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007324{
7325 reg_match = rmp;
7326 reg_mmatch = NULL;
7327 reg_maxline = 0;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01007328 reg_buf = curbuf;
Bram Moolenaar93fc4812014-04-23 18:48:47 +02007329 reg_line_lbr = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007330 return vim_regsub_both(source, dest, copy, magic, backslash);
7331}
7332#endif
7333
7334 int
Bram Moolenaar05540972016-01-30 20:31:25 +01007335vim_regsub_multi(
7336 regmmatch_T *rmp,
7337 linenr_T lnum,
7338 char_u *source,
7339 char_u *dest,
7340 int copy,
7341 int magic,
7342 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007343{
7344 reg_match = NULL;
7345 reg_mmatch = rmp;
7346 reg_buf = curbuf; /* always works on the current buffer! */
7347 reg_firstlnum = lnum;
7348 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
Bram Moolenaar93fc4812014-04-23 18:48:47 +02007349 reg_line_lbr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007350 return vim_regsub_both(source, dest, copy, magic, backslash);
7351}
7352
7353 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01007354vim_regsub_both(
7355 char_u *source,
7356 char_u *dest,
7357 int copy,
7358 int magic,
7359 int backslash)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007360{
7361 char_u *src;
7362 char_u *dst;
7363 char_u *s;
7364 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007365 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007366 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007367 fptr_T func_all = (fptr_T)NULL;
7368 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007369 linenr_T clnum = 0; /* init for GCC */
7370 int len = 0; /* init for GCC */
7371#ifdef FEAT_EVAL
7372 static char_u *eval_result = NULL;
7373#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007374
7375 /* Be paranoid... */
7376 if (source == NULL || dest == NULL)
7377 {
7378 EMSG(_(e_null));
7379 return 0;
7380 }
7381 if (prog_magic_wrong())
7382 return 0;
7383 src = source;
7384 dst = dest;
7385
7386 /*
7387 * When the substitute part starts with "\=" evaluate it as an expression.
7388 */
7389 if (source[0] == '\\' && source[1] == '='
7390#ifdef FEAT_EVAL
7391 && !can_f_submatch /* can't do this recursively */
7392#endif
7393 )
7394 {
7395#ifdef FEAT_EVAL
7396 /* To make sure that the length doesn't change between checking the
7397 * length and copying the string, and to speed up things, the
7398 * resulting string is saved from the call with "copy" == FALSE to the
7399 * call with "copy" == TRUE. */
7400 if (copy)
7401 {
7402 if (eval_result != NULL)
7403 {
7404 STRCPY(dest, eval_result);
7405 dst += STRLEN(eval_result);
7406 vim_free(eval_result);
7407 eval_result = NULL;
7408 }
7409 }
7410 else
7411 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007412 win_T *save_reg_win;
7413 int save_ireg_ic;
7414
7415 vim_free(eval_result);
7416
7417 /* The expression may contain substitute(), which calls us
7418 * recursively. Make sure submatch() gets the text from the first
7419 * level. Don't need to save "reg_buf", because
7420 * vim_regexec_multi() can't be called recursively. */
7421 submatch_match = reg_match;
7422 submatch_mmatch = reg_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007423 submatch_firstlnum = reg_firstlnum;
7424 submatch_maxline = reg_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007425 submatch_line_lbr = reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007426 save_reg_win = reg_win;
7427 save_ireg_ic = ireg_ic;
7428 can_f_submatch = TRUE;
7429
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007430 eval_result = eval_to_string(source + 2, NULL, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007431 if (eval_result != NULL)
7432 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007433 int had_backslash = FALSE;
7434
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00007435 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007436 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007437 /* Change NL to CR, so that it becomes a line break,
7438 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007439 * Skip over a backslashed character. */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007440 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007441 *s = CAR;
7442 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007443 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007444 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007445 /* Change NL to CR here too, so that this works:
7446 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7447 * abc\
7448 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007449 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007450 */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007451 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007452 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007453 had_backslash = TRUE;
7454 }
7455 }
7456 if (had_backslash && backslash)
7457 {
7458 /* Backslashes will be consumed, need to double them. */
7459 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7460 if (s != NULL)
7461 {
7462 vim_free(eval_result);
7463 eval_result = s;
7464 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007465 }
7466
7467 dst += STRLEN(eval_result);
7468 }
7469
7470 reg_match = submatch_match;
7471 reg_mmatch = submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007472 reg_firstlnum = submatch_firstlnum;
7473 reg_maxline = submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007474 reg_line_lbr = submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007475 reg_win = save_reg_win;
7476 ireg_ic = save_ireg_ic;
7477 can_f_submatch = FALSE;
7478 }
7479#endif
7480 }
7481 else
7482 while ((c = *src++) != NUL)
7483 {
7484 if (c == '&' && magic)
7485 no = 0;
7486 else if (c == '\\' && *src != NUL)
7487 {
7488 if (*src == '&' && !magic)
7489 {
7490 ++src;
7491 no = 0;
7492 }
7493 else if ('0' <= *src && *src <= '9')
7494 {
7495 no = *src++ - '0';
7496 }
7497 else if (vim_strchr((char_u *)"uUlLeE", *src))
7498 {
7499 switch (*src++)
7500 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007501 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007502 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007503 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007504 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007505 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007506 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007507 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007508 continue;
7509 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007510 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007511 continue;
7512 }
7513 }
7514 }
7515 if (no < 0) /* Ordinary character. */
7516 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007517 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7518 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007519 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007520 if (copy)
7521 {
7522 *dst++ = c;
7523 *dst++ = *src++;
7524 *dst++ = *src++;
7525 }
7526 else
7527 {
7528 dst += 3;
7529 src += 2;
7530 }
7531 continue;
7532 }
7533
Bram Moolenaar071d4272004-06-13 20:20:40 +00007534 if (c == '\\' && *src != NUL)
7535 {
7536 /* Check for abbreviations -- webb */
7537 switch (*src)
7538 {
7539 case 'r': c = CAR; ++src; break;
7540 case 'n': c = NL; ++src; break;
7541 case 't': c = TAB; ++src; break;
7542 /* Oh no! \e already has meaning in subst pat :-( */
7543 /* case 'e': c = ESC; ++src; break; */
7544 case 'b': c = Ctrl_H; ++src; break;
7545
7546 /* If "backslash" is TRUE the backslash will be removed
7547 * later. Used to insert a literal CR. */
7548 default: if (backslash)
7549 {
7550 if (copy)
7551 *dst = '\\';
7552 ++dst;
7553 }
7554 c = *src++;
7555 }
7556 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007557#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007558 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007559 c = mb_ptr2char(src - 1);
7560#endif
7561
Bram Moolenaardb552d602006-03-23 22:59:57 +00007562 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007563 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007564 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007565 func_one = (fptr_T)(func_one(&cc, c));
7566 else if (func_all != (fptr_T)NULL)
7567 /* Turbo C complains without the typecast */
7568 func_all = (fptr_T)(func_all(&cc, c));
7569 else /* just copy */
7570 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007571
7572#ifdef FEAT_MBYTE
7573 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007574 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007575 int totlen = mb_ptr2len(src - 1);
7576
Bram Moolenaar071d4272004-06-13 20:20:40 +00007577 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007578 mb_char2bytes(cc, dst);
7579 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007580 if (enc_utf8)
7581 {
7582 int clen = utf_ptr2len(src - 1);
7583
7584 /* If the character length is shorter than "totlen", there
7585 * are composing characters; copy them as-is. */
7586 if (clen < totlen)
7587 {
7588 if (copy)
7589 mch_memmove(dst + 1, src - 1 + clen,
7590 (size_t)(totlen - clen));
7591 dst += totlen - clen;
7592 }
7593 }
7594 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007595 }
7596 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007597#endif
7598 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007599 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007600 dst++;
7601 }
7602 else
7603 {
7604 if (REG_MULTI)
7605 {
7606 clnum = reg_mmatch->startpos[no].lnum;
7607 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7608 s = NULL;
7609 else
7610 {
7611 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7612 if (reg_mmatch->endpos[no].lnum == clnum)
7613 len = reg_mmatch->endpos[no].col
7614 - reg_mmatch->startpos[no].col;
7615 else
7616 len = (int)STRLEN(s);
7617 }
7618 }
7619 else
7620 {
7621 s = reg_match->startp[no];
7622 if (reg_match->endp[no] == NULL)
7623 s = NULL;
7624 else
7625 len = (int)(reg_match->endp[no] - s);
7626 }
7627 if (s != NULL)
7628 {
7629 for (;;)
7630 {
7631 if (len == 0)
7632 {
7633 if (REG_MULTI)
7634 {
7635 if (reg_mmatch->endpos[no].lnum == clnum)
7636 break;
7637 if (copy)
7638 *dst = CAR;
7639 ++dst;
7640 s = reg_getline(++clnum);
7641 if (reg_mmatch->endpos[no].lnum == clnum)
7642 len = reg_mmatch->endpos[no].col;
7643 else
7644 len = (int)STRLEN(s);
7645 }
7646 else
7647 break;
7648 }
7649 else if (*s == NUL) /* we hit NUL. */
7650 {
7651 if (copy)
7652 EMSG(_(e_re_damg));
7653 goto exit;
7654 }
7655 else
7656 {
7657 if (backslash && (*s == CAR || *s == '\\'))
7658 {
7659 /*
7660 * Insert a backslash in front of a CR, otherwise
7661 * it will be replaced by a line break.
7662 * Number of backslashes will be halved later,
7663 * double them here.
7664 */
7665 if (copy)
7666 {
7667 dst[0] = '\\';
7668 dst[1] = *s;
7669 }
7670 dst += 2;
7671 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007672 else
7673 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007674#ifdef FEAT_MBYTE
7675 if (has_mbyte)
7676 c = mb_ptr2char(s);
7677 else
7678#endif
7679 c = *s;
7680
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007681 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007682 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007683 func_one = (fptr_T)(func_one(&cc, c));
7684 else if (func_all != (fptr_T)NULL)
7685 /* Turbo C complains without the typecast */
7686 func_all = (fptr_T)(func_all(&cc, c));
7687 else /* just copy */
7688 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007689
7690#ifdef FEAT_MBYTE
7691 if (has_mbyte)
7692 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007693 int l;
7694
7695 /* Copy composing characters separately, one
7696 * at a time. */
7697 if (enc_utf8)
7698 l = utf_ptr2len(s) - 1;
7699 else
7700 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007701
7702 s += l;
7703 len -= l;
7704 if (copy)
7705 mb_char2bytes(cc, dst);
7706 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007707 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007708 else
7709#endif
7710 if (copy)
7711 *dst = cc;
7712 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007713 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007714
Bram Moolenaar071d4272004-06-13 20:20:40 +00007715 ++s;
7716 --len;
7717 }
7718 }
7719 }
7720 no = -1;
7721 }
7722 }
7723 if (copy)
7724 *dst = NUL;
7725
7726exit:
7727 return (int)((dst - dest) + 1);
7728}
7729
7730#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01007731static char_u *reg_getline_submatch(linenr_T lnum);
Bram Moolenaard32a3192009-11-26 19:40:49 +00007732
Bram Moolenaar071d4272004-06-13 20:20:40 +00007733/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007734 * Call reg_getline() with the line numbers from the submatch. If a
7735 * substitute() was used the reg_maxline and other values have been
7736 * overwritten.
7737 */
7738 static char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007739reg_getline_submatch(linenr_T lnum)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007740{
7741 char_u *s;
7742 linenr_T save_first = reg_firstlnum;
7743 linenr_T save_max = reg_maxline;
7744
7745 reg_firstlnum = submatch_firstlnum;
7746 reg_maxline = submatch_maxline;
7747
7748 s = reg_getline(lnum);
7749
7750 reg_firstlnum = save_first;
7751 reg_maxline = save_max;
7752 return s;
7753}
7754
7755/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007756 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007757 * allocated memory.
7758 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7759 */
7760 char_u *
Bram Moolenaar05540972016-01-30 20:31:25 +01007761reg_submatch(int no)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007762{
7763 char_u *retval = NULL;
7764 char_u *s;
7765 int len;
7766 int round;
7767 linenr_T lnum;
7768
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007769 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007770 return NULL;
7771
7772 if (submatch_match == NULL)
7773 {
7774 /*
7775 * First round: compute the length and allocate memory.
7776 * Second round: copy the text.
7777 */
7778 for (round = 1; round <= 2; ++round)
7779 {
7780 lnum = submatch_mmatch->startpos[no].lnum;
7781 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7782 return NULL;
7783
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007784 s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007785 if (s == NULL) /* anti-crash check, cannot happen? */
7786 break;
7787 if (submatch_mmatch->endpos[no].lnum == lnum)
7788 {
7789 /* Within one line: take form start to end col. */
7790 len = submatch_mmatch->endpos[no].col
7791 - submatch_mmatch->startpos[no].col;
7792 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007793 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007794 ++len;
7795 }
7796 else
7797 {
7798 /* Multiple lines: take start line from start col, middle
7799 * lines completely and end line up to end col. */
7800 len = (int)STRLEN(s);
7801 if (round == 2)
7802 {
7803 STRCPY(retval, s);
7804 retval[len] = '\n';
7805 }
7806 ++len;
7807 ++lnum;
7808 while (lnum < submatch_mmatch->endpos[no].lnum)
7809 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007810 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007811 if (round == 2)
7812 STRCPY(retval + len, s);
7813 len += (int)STRLEN(s);
7814 if (round == 2)
7815 retval[len] = '\n';
7816 ++len;
7817 }
7818 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007819 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00007820 submatch_mmatch->endpos[no].col);
7821 len += submatch_mmatch->endpos[no].col;
7822 if (round == 2)
7823 retval[len] = NUL;
7824 ++len;
7825 }
7826
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007827 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007828 {
7829 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007830 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007831 return NULL;
7832 }
7833 }
7834 }
7835 else
7836 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00007837 s = submatch_match->startp[no];
7838 if (s == NULL || submatch_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007839 retval = NULL;
7840 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007841 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007842 }
7843
7844 return retval;
7845}
Bram Moolenaar41571762014-04-02 19:00:58 +02007846
7847/*
7848 * Used for the submatch() function with the optional non-zero argument: get
7849 * the list of strings from the n'th submatch in allocated memory with NULs
7850 * represented in NLs.
7851 * Returns a list of allocated strings. Returns NULL when not in a ":s"
7852 * command, for a non-existing submatch and for any error.
7853 */
7854 list_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007855reg_submatch_list(int no)
Bram Moolenaar41571762014-04-02 19:00:58 +02007856{
7857 char_u *s;
7858 linenr_T slnum;
7859 linenr_T elnum;
7860 colnr_T scol;
7861 colnr_T ecol;
7862 int i;
7863 list_T *list;
7864 int error = FALSE;
7865
7866 if (!can_f_submatch || no < 0)
7867 return NULL;
7868
7869 if (submatch_match == NULL)
7870 {
7871 slnum = submatch_mmatch->startpos[no].lnum;
7872 elnum = submatch_mmatch->endpos[no].lnum;
7873 if (slnum < 0 || elnum < 0)
7874 return NULL;
7875
7876 scol = submatch_mmatch->startpos[no].col;
7877 ecol = submatch_mmatch->endpos[no].col;
7878
7879 list = list_alloc();
7880 if (list == NULL)
7881 return NULL;
7882
7883 s = reg_getline_submatch(slnum) + scol;
7884 if (slnum == elnum)
7885 {
7886 if (list_append_string(list, s, ecol - scol) == FAIL)
7887 error = TRUE;
7888 }
7889 else
7890 {
7891 if (list_append_string(list, s, -1) == FAIL)
7892 error = TRUE;
7893 for (i = 1; i < elnum - slnum; i++)
7894 {
7895 s = reg_getline_submatch(slnum + i);
7896 if (list_append_string(list, s, -1) == FAIL)
7897 error = TRUE;
7898 }
7899 s = reg_getline_submatch(elnum);
7900 if (list_append_string(list, s, ecol) == FAIL)
7901 error = TRUE;
7902 }
7903 }
7904 else
7905 {
7906 s = submatch_match->startp[no];
7907 if (s == NULL || submatch_match->endp[no] == NULL)
7908 return NULL;
7909 list = list_alloc();
7910 if (list == NULL)
7911 return NULL;
7912 if (list_append_string(list, s,
7913 (int)(submatch_match->endp[no] - s)) == FAIL)
7914 error = TRUE;
7915 }
7916
7917 if (error)
7918 {
7919 list_free(list, TRUE);
7920 return NULL;
7921 }
7922 return list;
7923}
Bram Moolenaar071d4272004-06-13 20:20:40 +00007924#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007925
7926static regengine_T bt_regengine =
7927{
7928 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02007929 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007930 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01007931 bt_regexec_multi,
7932 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007933};
7934
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007935#include "regexp_nfa.c"
7936
7937static regengine_T nfa_regengine =
7938{
7939 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02007940 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007941 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01007942 nfa_regexec_multi,
7943 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007944};
7945
7946/* Which regexp engine to use? Needed for vim_regcomp().
7947 * Must match with 'regexpengine'. */
7948static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01007949
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007950#ifdef DEBUG
7951static char_u regname[][30] = {
7952 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02007953 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007954 "NFA Regexp Engine"
7955 };
7956#endif
7957
7958/*
7959 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02007960 * Returns the program in allocated memory.
7961 * Use vim_regfree() to free the memory.
7962 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007963 */
7964 regprog_T *
Bram Moolenaar05540972016-01-30 20:31:25 +01007965vim_regcomp(char_u *expr_arg, int re_flags)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007966{
7967 regprog_T *prog = NULL;
7968 char_u *expr = expr_arg;
7969
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007970 regexp_engine = p_re;
7971
7972 /* Check for prefix "\%#=", that sets the regexp engine */
7973 if (STRNCMP(expr, "\\%#=", 4) == 0)
7974 {
7975 int newengine = expr[4] - '0';
7976
7977 if (newengine == AUTOMATIC_ENGINE
7978 || newengine == BACKTRACKING_ENGINE
7979 || newengine == NFA_ENGINE)
7980 {
7981 regexp_engine = expr[4] - '0';
7982 expr += 5;
7983#ifdef DEBUG
Bram Moolenaar6e132072014-05-13 16:46:32 +02007984 smsg((char_u *)"New regexp mode selected (%d): %s",
7985 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007986#endif
7987 }
7988 else
7989 {
7990 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
7991 regexp_engine = AUTOMATIC_ENGINE;
7992 }
7993 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007994 bt_regengine.expr = expr;
7995 nfa_regengine.expr = expr;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007996
7997 /*
7998 * First try the NFA engine, unless backtracking was requested.
7999 */
8000 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008001 prog = nfa_regengine.regcomp(expr,
8002 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008003 else
8004 prog = bt_regengine.regcomp(expr, re_flags);
8005
Bram Moolenaarfda37292014-11-05 14:27:36 +01008006 /* Check for error compiling regexp with initial engine. */
8007 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008008 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008009#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008010 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8011 {
8012 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008013 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008014 if (f)
8015 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008016 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008017 fclose(f);
8018 }
8019 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008020 EMSG2("(NFA) Could not open \"%s\" to write !!!",
8021 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008022 }
8023#endif
8024 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01008025 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008026 * The NFA engine also fails for patterns that it can't handle well
8027 * but are still valid patterns, thus a retry should work.
8028 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008029 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008030 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008031 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008032 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008033 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008034 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008035
Bram Moolenaarfda37292014-11-05 14:27:36 +01008036 if (prog != NULL)
8037 {
8038 /* Store the info needed to call regcomp() again when the engine turns
8039 * out to be very slow when executing it. */
8040 prog->re_engine = regexp_engine;
8041 prog->re_flags = re_flags;
8042 }
8043
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008044 return prog;
8045}
8046
8047/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008048 * Free a compiled regexp program, returned by vim_regcomp().
8049 */
8050 void
Bram Moolenaar05540972016-01-30 20:31:25 +01008051vim_regfree(regprog_T *prog)
Bram Moolenaar473de612013-06-08 18:19:48 +02008052{
8053 if (prog != NULL)
8054 prog->engine->regfree(prog);
8055}
8056
Bram Moolenaarfda37292014-11-05 14:27:36 +01008057#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01008058static void report_re_switch(char_u *pat);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008059
8060 static void
Bram Moolenaar05540972016-01-30 20:31:25 +01008061report_re_switch(char_u *pat)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008062{
8063 if (p_verbose > 0)
8064 {
8065 verbose_enter();
8066 MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
8067 MSG_PUTS(pat);
8068 verbose_leave();
8069 }
8070}
8071#endif
8072
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01008073static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, int nl);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008074
Bram Moolenaar473de612013-06-08 18:19:48 +02008075/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008076 * Match a regexp against a string.
8077 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008078 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008079 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01008080 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008081 *
8082 * Return TRUE if there is a match, FALSE if not.
8083 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01008084 static int
Bram Moolenaar05540972016-01-30 20:31:25 +01008085vim_regexec_both(
8086 regmatch_T *rmp,
8087 char_u *line, /* string to match against */
8088 colnr_T col, /* column to start looking for match */
8089 int nl)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008090{
8091 int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8092
8093 /* NFA engine aborted because it's very slow. */
8094 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8095 && result == NFA_TOO_EXPENSIVE)
8096 {
8097 int save_p_re = p_re;
8098 int re_flags = rmp->regprog->re_flags;
8099 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8100
8101 p_re = BACKTRACKING_ENGINE;
8102 vim_regfree(rmp->regprog);
8103 if (pat != NULL)
8104 {
8105#ifdef FEAT_EVAL
8106 report_re_switch(pat);
8107#endif
8108 rmp->regprog = vim_regcomp(pat, re_flags);
8109 if (rmp->regprog != NULL)
8110 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8111 vim_free(pat);
8112 }
8113
8114 p_re = save_p_re;
8115 }
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008116 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008117}
8118
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008119/*
8120 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008121 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008122 */
8123 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008124vim_regexec_prog(
8125 regprog_T **prog,
8126 int ignore_case,
8127 char_u *line,
8128 colnr_T col)
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008129{
8130 int r;
8131 regmatch_T regmatch;
8132
8133 regmatch.regprog = *prog;
8134 regmatch.rm_ic = ignore_case;
8135 r = vim_regexec_both(&regmatch, line, col, FALSE);
8136 *prog = regmatch.regprog;
8137 return r;
8138}
8139
8140/*
8141 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008142 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008143 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008144 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008145vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008146{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008147 return vim_regexec_both(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008148}
8149
8150#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8151 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8152/*
8153 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008154 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008155 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008156 */
8157 int
Bram Moolenaar05540972016-01-30 20:31:25 +01008158vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008159{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008160 return vim_regexec_both(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008161}
8162#endif
8163
8164/*
8165 * Match a regexp against multiple lines.
8166 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008167 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008168 * Uses curbuf for line count and 'iskeyword'.
8169 *
8170 * Return zero if there is no match. Return number of lines contained in the
8171 * match otherwise.
8172 */
8173 long
Bram Moolenaar05540972016-01-30 20:31:25 +01008174vim_regexec_multi(
8175 regmmatch_T *rmp,
8176 win_T *win, /* window in which to search or NULL */
8177 buf_T *buf, /* buffer in which to search */
8178 linenr_T lnum, /* nr of line to start looking for match */
8179 colnr_T col, /* column to start looking for match */
8180 proftime_T *tm) /* timeout limit or NULL */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008181{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008182 int result = rmp->regprog->engine->regexec_multi(
8183 rmp, win, buf, lnum, col, tm);
8184
8185 /* NFA engine aborted because it's very slow. */
8186 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8187 && result == NFA_TOO_EXPENSIVE)
8188 {
8189 int save_p_re = p_re;
8190 int re_flags = rmp->regprog->re_flags;
8191 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8192
8193 p_re = BACKTRACKING_ENGINE;
8194 vim_regfree(rmp->regprog);
8195 if (pat != NULL)
8196 {
8197#ifdef FEAT_EVAL
8198 report_re_switch(pat);
8199#endif
8200 rmp->regprog = vim_regcomp(pat, re_flags);
8201 if (rmp->regprog != NULL)
8202 result = rmp->regprog->engine->regexec_multi(
8203 rmp, win, buf, lnum, col, tm);
8204 vim_free(pat);
8205 }
8206 p_re = save_p_re;
8207 }
8208
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008209 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008210}