blob: 85e0458de0cd1d5065ada9007861b5247c7ae0ea [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
258static int no_Magic __ARGS((int x));
259static int toggle_Magic __ARGS((int x));
260
261 static int
262no_Magic(x)
263 int x;
264{
265 if (is_Magic(x))
266 return un_Magic(x);
267 return x;
268}
269
270 static int
271toggle_Magic(x)
272 int x;
273{
274 if (is_Magic(x))
275 return un_Magic(x);
276 return Magic(x);
277}
278
279/*
280 * The first byte of the regexp internal "program" is actually this magic
281 * number; the start node begins in the second byte. It's used to catch the
282 * most severe mutilation of the program by the caller.
283 */
284
285#define REGMAGIC 0234
286
287/*
288 * Opcode notes:
289 *
290 * BRANCH The set of branches constituting a single choice are hooked
291 * together with their "next" pointers, since precedence prevents
292 * anything being concatenated to any individual branch. The
293 * "next" pointer of the last BRANCH in a choice points to the
294 * thing following the whole choice. This is also where the
295 * final "next" pointer of each individual branch points; each
296 * branch starts with the operand node of a BRANCH node.
297 *
298 * BACK Normal "next" pointers all implicitly point forward; BACK
299 * exists to make loop structures possible.
300 *
301 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
302 * BRANCH structures using BACK. Simple cases (one character
303 * per match) are implemented with STAR and PLUS for speed
304 * and to minimize recursive plunges.
305 *
306 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
307 * node, and defines the min and max limits to be used for that
308 * node.
309 *
310 * MOPEN,MCLOSE ...are numbered at compile time.
311 * ZOPEN,ZCLOSE ...ditto
312 */
313
314/*
315 * A node is one char of opcode followed by two chars of "next" pointer.
316 * "Next" pointers are stored as two 8-bit bytes, high order first. The
317 * value is a positive offset from the opcode of the node containing it.
318 * An operand, if any, simply follows the node. (Note that much of the
319 * code generation knows about this implicit relationship.)
320 *
321 * Using two bytes for the "next" pointer is vast overkill for most things,
322 * but allows patterns to get big without disasters.
323 */
324#define OP(p) ((int)*(p))
325#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
326#define OPERAND(p) ((p) + 3)
327/* Obtain an operand that was stored as four bytes, MSB first. */
328#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
329 + ((long)(p)[5] << 8) + (long)(p)[6])
330/* Obtain a second operand stored as four bytes. */
331#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
332/* Obtain a second single-byte operand stored after a four bytes operand. */
333#define OPERAND_CMP(p) (p)[7]
334
335/*
336 * Utility definitions.
337 */
338#define UCHARAT(p) ((int)*(char_u *)(p))
339
340/* Used for an error (down from) vim_regcomp(): give the error message, set
341 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000342#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000343#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200344#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
345#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
346#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000347
348#define MAX_LIMIT (32767L << 16L)
349
350static int re_multi_type __ARGS((int));
351static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
352static char_u *cstrchr __ARGS((char_u *, int));
353
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200354#ifdef BT_REGEXP_DUMP
355static void regdump __ARGS((char_u *, bt_regprog_T *));
356#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000357#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +0000358static char_u *regprop __ARGS((char_u *));
359#endif
360
Bram Moolenaarfb031402014-09-09 17:18:49 +0200361static int re_mult_next __ARGS((char *what));
362
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200363static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
364static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
365static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
366static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200367#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200368static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
369static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200370#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200371static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200372static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar071d4272004-06-13 20:20:40 +0000373#define NOT_MULTI 0
374#define MULTI_ONE 1
375#define MULTI_MULT 2
376/*
377 * Return NOT_MULTI if c is not a "multi" operator.
378 * Return MULTI_ONE if c is a single "multi" operator.
379 * Return MULTI_MULT if c is a multi "multi" operator.
380 */
381 static int
382re_multi_type(c)
383 int c;
384{
385 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
386 return MULTI_ONE;
387 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
388 return MULTI_MULT;
389 return NOT_MULTI;
390}
391
392/*
393 * Flags to be passed up and down.
394 */
395#define HASWIDTH 0x1 /* Known never to match null string. */
396#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
397#define SPSTART 0x4 /* Starts with * or +. */
398#define HASNL 0x8 /* Contains some \n. */
399#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
400#define WORST 0 /* Worst case. */
401
402/*
403 * When regcode is set to this value, code is not emitted and size is computed
404 * instead.
405 */
406#define JUST_CALC_SIZE ((char_u *) -1)
407
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000408static char_u *reg_prev_sub = NULL;
409
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410/*
411 * REGEXP_INRANGE contains all characters which are always special in a []
412 * range after '\'.
413 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
414 * These are:
415 * \n - New line (NL).
416 * \r - Carriage Return (CR).
417 * \t - Tab (TAB).
418 * \e - Escape (ESC).
419 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000420 * \d - Character code in decimal, eg \d123
421 * \o - Character code in octal, eg \o80
422 * \x - Character code in hex, eg \x4a
423 * \u - Multibyte character code, eg \u20ac
424 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000425 */
426static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000427static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000428
429static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000430static int get_char_class __ARGS((char_u **pp));
431static int get_equi_class __ARGS((char_u **pp));
432static void reg_equi_class __ARGS((int c));
433static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000434static char_u *skip_anyof __ARGS((char_u *p));
435static void init_class_tab __ARGS((void));
436
437/*
438 * Translate '\x' to its control character, except "\n", which is Magic.
439 */
440 static int
441backslash_trans(c)
442 int c;
443{
444 switch (c)
445 {
446 case 'r': return CAR;
447 case 't': return TAB;
448 case 'e': return ESC;
449 case 'b': return BS;
450 }
451 return c;
452}
453
454/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000455 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000456 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
457 * recognized. Otherwise "pp" is advanced to after the item.
458 */
459 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000460get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000461 char_u **pp;
462{
463 static const char *(class_names[]) =
464 {
465 "alnum:]",
466#define CLASS_ALNUM 0
467 "alpha:]",
468#define CLASS_ALPHA 1
469 "blank:]",
470#define CLASS_BLANK 2
471 "cntrl:]",
472#define CLASS_CNTRL 3
473 "digit:]",
474#define CLASS_DIGIT 4
475 "graph:]",
476#define CLASS_GRAPH 5
477 "lower:]",
478#define CLASS_LOWER 6
479 "print:]",
480#define CLASS_PRINT 7
481 "punct:]",
482#define CLASS_PUNCT 8
483 "space:]",
484#define CLASS_SPACE 9
485 "upper:]",
486#define CLASS_UPPER 10
487 "xdigit:]",
488#define CLASS_XDIGIT 11
489 "tab:]",
490#define CLASS_TAB 12
491 "return:]",
492#define CLASS_RETURN 13
493 "backspace:]",
494#define CLASS_BACKSPACE 14
495 "escape:]",
496#define CLASS_ESCAPE 15
497 };
498#define CLASS_NONE 99
499 int i;
500
501 if ((*pp)[1] == ':')
502 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000503 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
505 {
506 *pp += STRLEN(class_names[i]) + 2;
507 return i;
508 }
509 }
510 return CLASS_NONE;
511}
512
513/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000514 * Specific version of character class functions.
515 * Using a table to keep this fast.
516 */
517static short class_tab[256];
518
519#define RI_DIGIT 0x01
520#define RI_HEX 0x02
521#define RI_OCTAL 0x04
522#define RI_WORD 0x08
523#define RI_HEAD 0x10
524#define RI_ALPHA 0x20
525#define RI_LOWER 0x40
526#define RI_UPPER 0x80
527#define RI_WHITE 0x100
528
529 static void
530init_class_tab()
531{
532 int i;
533 static int done = FALSE;
534
535 if (done)
536 return;
537
538 for (i = 0; i < 256; ++i)
539 {
540 if (i >= '0' && i <= '7')
541 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
542 else if (i >= '8' && i <= '9')
543 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
544 else if (i >= 'a' && i <= 'f')
545 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
546#ifdef EBCDIC
547 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
548 || (i >= 's' && i <= 'z'))
549#else
550 else if (i >= 'g' && i <= 'z')
551#endif
552 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
553 else if (i >= 'A' && i <= 'F')
554 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
555#ifdef EBCDIC
556 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
557 || (i >= 'S' && i <= 'Z'))
558#else
559 else if (i >= 'G' && i <= 'Z')
560#endif
561 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
562 else if (i == '_')
563 class_tab[i] = RI_WORD + RI_HEAD;
564 else
565 class_tab[i] = 0;
566 }
567 class_tab[' '] |= RI_WHITE;
568 class_tab['\t'] |= RI_WHITE;
569 done = TRUE;
570}
571
572#ifdef FEAT_MBYTE
573# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
574# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
575# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
576# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
577# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
578# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
579# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
580# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
581# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
582#else
583# define ri_digit(c) (class_tab[c] & RI_DIGIT)
584# define ri_hex(c) (class_tab[c] & RI_HEX)
585# define ri_octal(c) (class_tab[c] & RI_OCTAL)
586# define ri_word(c) (class_tab[c] & RI_WORD)
587# define ri_head(c) (class_tab[c] & RI_HEAD)
588# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
589# define ri_lower(c) (class_tab[c] & RI_LOWER)
590# define ri_upper(c) (class_tab[c] & RI_UPPER)
591# define ri_white(c) (class_tab[c] & RI_WHITE)
592#endif
593
594/* flags for regflags */
595#define RF_ICASE 1 /* ignore case */
596#define RF_NOICASE 2 /* don't ignore case */
597#define RF_HASNL 4 /* can match a NL */
598#define RF_ICOMBINE 8 /* ignore combining characters */
599#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
600
601/*
602 * Global work variables for vim_regcomp().
603 */
604
605static char_u *regparse; /* Input-scan pointer. */
606static int prevchr_len; /* byte length of previous char */
607static int num_complex_braces; /* Complex \{...} count */
608static int regnpar; /* () count. */
609#ifdef FEAT_SYN_HL
610static int regnzpar; /* \z() count. */
611static int re_has_z; /* \z item detected */
612#endif
613static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
614static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000615static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000616static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
617static unsigned regflags; /* RF_ flags for prog */
618static long brace_min[10]; /* Minimums for complex brace repeats */
619static long brace_max[10]; /* Maximums for complex brace repeats */
620static int brace_count[10]; /* Current counts for complex brace repeats */
621#if defined(FEAT_SYN_HL) || defined(PROTO)
622static int had_eol; /* TRUE when EOL found by vim_regcomp() */
623#endif
624static int one_exactly = FALSE; /* only do one char for EXACTLY */
625
626static int reg_magic; /* magicness of the pattern: */
627#define MAGIC_NONE 1 /* "\V" very unmagic */
628#define MAGIC_OFF 2 /* "\M" or 'magic' off */
629#define MAGIC_ON 3 /* "\m" or 'magic' */
630#define MAGIC_ALL 4 /* "\v" very magic */
631
632static int reg_string; /* matching with a string instead of a buffer
633 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000634static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000635
636/*
637 * META contains all characters that may be magic, except '^' and '$'.
638 */
639
640#ifdef EBCDIC
641static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
642#else
643/* META[] is used often enough to justify turning it into a table. */
644static char_u META_flags[] = {
645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647/* % & ( ) * + . */
648 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
649/* 1 2 3 4 5 6 7 8 9 < = > ? */
650 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
651/* @ A C D F H I K L M O */
652 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
653/* P S U V W X Z [ _ */
654 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
655/* a c d f h i k l m n o */
656 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
657/* p s u v w x z { | ~ */
658 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
659};
660#endif
661
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200662static int curchr; /* currently parsed character */
663/* Previous character. Note: prevchr is sometimes -1 when we are not at the
664 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
665 * because ^ was taken to be magic -- webb */
666static int prevchr;
667static int prevprevchr; /* previous-previous character */
668static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000669
670/* arguments for reg() */
671#define REG_NOPAREN 0 /* toplevel reg() */
672#define REG_PAREN 1 /* \(\) */
673#define REG_ZPAREN 2 /* \z(\) */
674#define REG_NPAREN 3 /* \%(\) */
675
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200676typedef struct
677{
678 char_u *regparse;
679 int prevchr_len;
680 int curchr;
681 int prevchr;
682 int prevprevchr;
683 int nextchr;
684 int at_start;
685 int prev_at_start;
686 int regnpar;
687} parse_state_T;
688
Bram Moolenaar071d4272004-06-13 20:20:40 +0000689/*
690 * Forward declarations for vim_regcomp()'s friends.
691 */
692static void initchr __ARGS((char_u *));
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200693static void save_parse_state __ARGS((parse_state_T *ps));
694static void restore_parse_state __ARGS((parse_state_T *ps));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000695static int getchr __ARGS((void));
696static void skipchr_keepstart __ARGS((void));
697static int peekchr __ARGS((void));
698static void skipchr __ARGS((void));
699static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000700static int gethexchrs __ARGS((int maxinputlen));
701static int getoctchrs __ARGS((void));
702static int getdecchrs __ARGS((void));
703static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000704static void regcomp_start __ARGS((char_u *expr, int flags));
705static char_u *reg __ARGS((int, int *));
706static char_u *regbranch __ARGS((int *flagp));
707static char_u *regconcat __ARGS((int *flagp));
708static char_u *regpiece __ARGS((int *));
709static char_u *regatom __ARGS((int *));
710static char_u *regnode __ARGS((int));
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000711#ifdef FEAT_MBYTE
712static int use_multibytecode __ARGS((int c));
713#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000714static int prog_magic_wrong __ARGS((void));
715static char_u *regnext __ARGS((char_u *));
716static void regc __ARGS((int b));
717#ifdef FEAT_MBYTE
718static void regmbc __ARGS((int c));
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200719# define REGMBC(x) regmbc(x);
720# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000721#else
722# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200723# define REGMBC(x)
724# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000725#endif
726static void reginsert __ARGS((int, char_u *));
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200727static void reginsert_nr __ARGS((int op, long val, char_u *opnd));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000728static void reginsert_limits __ARGS((int, long, long, char_u *));
729static char_u *re_put_long __ARGS((char_u *pr, long_u val));
730static int read_limits __ARGS((long *, long *));
731static void regtail __ARGS((char_u *, char_u *));
732static void regoptail __ARGS((char_u *, char_u *));
733
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200734static regengine_T bt_regengine;
735static regengine_T nfa_regengine;
736
Bram Moolenaar071d4272004-06-13 20:20:40 +0000737/*
738 * Return TRUE if compiled regular expression "prog" can match a line break.
739 */
740 int
741re_multiline(prog)
742 regprog_T *prog;
743{
744 return (prog->regflags & RF_HASNL);
745}
746
747/*
748 * Return TRUE if compiled regular expression "prog" looks before the start
749 * position (pattern contains "\@<=" or "\@<!").
750 */
751 int
752re_lookbehind(prog)
753 regprog_T *prog;
754{
755 return (prog->regflags & RF_LOOKBH);
756}
757
758/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000759 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
760 * Returns a character representing the class. Zero means that no item was
761 * recognized. Otherwise "pp" is advanced to after the item.
762 */
763 static int
764get_equi_class(pp)
765 char_u **pp;
766{
767 int c;
768 int l = 1;
769 char_u *p = *pp;
770
771 if (p[1] == '=')
772 {
773#ifdef FEAT_MBYTE
774 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000775 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000776#endif
777 if (p[l + 2] == '=' && p[l + 3] == ']')
778 {
779#ifdef FEAT_MBYTE
780 if (has_mbyte)
781 c = mb_ptr2char(p + 2);
782 else
783#endif
784 c = p[2];
785 *pp += l + 4;
786 return c;
787 }
788 }
789 return 0;
790}
791
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200792#ifdef EBCDIC
793/*
794 * Table for equivalence class "c". (IBM-1047)
795 */
796char *EQUIVAL_CLASS_C[16] = {
797 "A\x62\x63\x64\x65\x66\x67",
798 "C\x68",
799 "E\x71\x72\x73\x74",
800 "I\x75\x76\x77\x78",
801 "N\x69",
802 "O\xEB\xEC\xED\xEE\xEF",
803 "U\xFB\xFC\xFD\xFE",
804 "Y\xBA",
805 "a\x42\x43\x44\x45\x46\x47",
806 "c\x48",
807 "e\x51\x52\x53\x54",
808 "i\x55\x56\x57\x58",
809 "n\x49",
810 "o\xCB\xCC\xCD\xCE\xCF",
811 "u\xDB\xDC\xDD\xDE",
812 "y\x8D\xDF",
813};
814#endif
815
Bram Moolenaardf177f62005-02-22 08:39:57 +0000816/*
817 * Produce the bytes for equivalence class "c".
818 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200819 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000820 */
821 static void
822reg_equi_class(c)
823 int c;
824{
825#ifdef FEAT_MBYTE
826 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000827 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000828#endif
829 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200830#ifdef EBCDIC
831 int i;
832
833 /* This might be slower than switch/case below. */
834 for (i = 0; i < 16; i++)
835 {
836 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
837 {
838 char *p = EQUIVAL_CLASS_C[i];
839
840 while (*p != 0)
841 regmbc(*p++);
842 return;
843 }
844 }
845#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000846 switch (c)
847 {
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200848 /* Do not use '\300' style, it results in a negative number. */
849 case 'A': case 0xc0: case 0xc1: case 0xc2:
850 case 0xc3: case 0xc4: case 0xc5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200851 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
852 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200853 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
854 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
855 regmbc(0xc5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200856 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
857 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
858 REGMBC(0x1ea2)
859 return;
860 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
861 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000862 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200863 case 'C': case 0xc7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200864 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200865 regmbc('C'); regmbc(0xc7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200866 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
867 REGMBC(0x10c)
868 return;
869 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
870 CASEMBC(0x1e0e) CASEMBC(0x1e10)
871 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
872 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000873 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200874 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200875 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
876 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200877 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
878 regmbc(0xca); regmbc(0xcb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200879 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
880 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
881 REGMBC(0x1ebc)
882 return;
883 case 'F': CASEMBC(0x1e1e)
884 regmbc('F'); REGMBC(0x1e1e)
885 return;
886 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
887 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
888 CASEMBC(0x1e20)
889 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
890 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
891 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
892 return;
893 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
894 CASEMBC(0x1e26) CASEMBC(0x1e28)
895 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
896 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000897 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200898 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200899 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
900 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200901 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
902 regmbc(0xce); regmbc(0xcf);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200903 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
904 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
905 REGMBC(0x1ec8)
906 return;
907 case 'J': CASEMBC(0x134)
908 regmbc('J'); REGMBC(0x134)
909 return;
910 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
911 CASEMBC(0x1e34)
912 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
913 REGMBC(0x1e30) REGMBC(0x1e34)
914 return;
915 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
916 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
917 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
918 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
919 REGMBC(0x1e3a)
920 return;
921 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
922 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000923 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200924 case 'N': case 0xd1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200925 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
926 CASEMBC(0x1e48)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200927 regmbc('N'); regmbc(0xd1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200928 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
929 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000930 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200931 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
932 case 0xd6: case 0xd8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200933 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
934 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200935 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
936 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
937 regmbc(0xd8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200938 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
939 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
940 REGMBC(0x1ec) REGMBC(0x1ece)
941 return;
942 case 'P': case 0x1e54: case 0x1e56:
943 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
944 return;
945 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
946 CASEMBC(0x1e58) CASEMBC(0x1e5e)
947 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
948 REGMBC(0x1e58) REGMBC(0x1e5e)
949 return;
950 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
951 CASEMBC(0x160) CASEMBC(0x1e60)
952 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
953 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
954 return;
955 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
956 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
957 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
958 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000959 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200960 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200961 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
962 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
963 CASEMBC(0x1ee6)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200964 regmbc('U'); regmbc(0xd9); regmbc(0xda);
965 regmbc(0xdb); regmbc(0xdc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200966 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
967 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
968 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
969 return;
970 case 'V': CASEMBC(0x1e7c)
971 regmbc('V'); REGMBC(0x1e7c)
972 return;
973 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
974 CASEMBC(0x1e84) CASEMBC(0x1e86)
975 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
976 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
977 return;
978 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
979 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000980 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200981 case 'Y': case 0xdd:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200982 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
983 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200984 regmbc('Y'); regmbc(0xdd);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200985 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
986 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
987 return;
988 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
989 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
990 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
991 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
992 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000993 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200994 case 'a': case 0xe0: case 0xe1: case 0xe2:
995 case 0xe3: case 0xe4: case 0xe5:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200996 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
997 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaard82a2a92015-04-21 14:02:35 +0200998 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
999 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
1000 regmbc(0xe5);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001001 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
1002 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
1003 REGMBC(0x1ea3)
1004 return;
1005 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
1006 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001007 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001008 case 'c': case 0xe7:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001009 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001010 regmbc('c'); regmbc(0xe7);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001011 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
1012 REGMBC(0x10d)
1013 return;
1014 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1d0b)
1015 CASEMBC(0x1e11)
1016 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
1017 REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001018 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001019 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001020 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1021 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001022 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
1023 regmbc(0xea); regmbc(0xeb);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001024 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1025 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1026 REGMBC(0x1ebd)
1027 return;
1028 case 'f': CASEMBC(0x1e1f)
1029 regmbc('f'); REGMBC(0x1e1f)
1030 return;
1031 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1032 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1033 CASEMBC(0x1e21)
1034 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1035 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1036 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1037 return;
1038 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1039 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1040 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1041 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1042 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001043 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001044 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001045 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1046 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001047 regmbc('i'); regmbc(0xec); regmbc(0xed);
1048 regmbc(0xee); regmbc(0xef);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001049 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1050 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1051 return;
1052 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1053 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1054 return;
1055 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1056 CASEMBC(0x1e35)
1057 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1058 REGMBC(0x1e31) REGMBC(0x1e35)
1059 return;
1060 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1061 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1062 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1063 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1064 REGMBC(0x1e3b)
1065 return;
1066 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1067 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001068 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001069 case 'n': case 0xf1:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001070 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1071 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001072 regmbc('n'); regmbc(0xf1);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001073 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1074 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001075 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001076 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1077 case 0xf6: case 0xf8:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001078 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1079 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001080 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1081 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1082 regmbc(0xf8);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001083 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1084 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1085 REGMBC(0x1ed) REGMBC(0x1ecf)
1086 return;
1087 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1088 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1089 return;
1090 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1091 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1092 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1093 REGMBC(0x1e59) REGMBC(0x1e5f)
1094 return;
1095 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1096 CASEMBC(0x161) CASEMBC(0x1e61)
1097 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1098 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1099 return;
1100 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1101 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1102 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1103 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001104 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001105 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001106 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1107 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1108 CASEMBC(0x1ee7)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001109 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1110 regmbc(0xfb); regmbc(0xfc);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001111 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1112 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1113 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1114 return;
1115 case 'v': CASEMBC(0x1e7d)
1116 regmbc('v'); REGMBC(0x1e7d)
1117 return;
1118 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1119 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1120 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1121 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1122 REGMBC(0x1e98)
1123 return;
1124 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1125 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001126 return;
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001127 case 'y': case 0xfd: case 0xff:
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001128 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1129 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaard82a2a92015-04-21 14:02:35 +02001130 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001131 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1132 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1133 return;
1134 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1135 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1136 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1137 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1138 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001139 return;
1140 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001141#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001142 }
1143 regmbc(c);
1144}
1145
1146/*
1147 * Check for a collating element "[.a.]". "pp" points to the '['.
1148 * Returns a character. Zero means that no item was recognized. Otherwise
1149 * "pp" is advanced to after the item.
1150 * Currently only single characters are recognized!
1151 */
1152 static int
1153get_coll_element(pp)
1154 char_u **pp;
1155{
1156 int c;
1157 int l = 1;
1158 char_u *p = *pp;
1159
1160 if (p[1] == '.')
1161 {
1162#ifdef FEAT_MBYTE
1163 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001164 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001165#endif
1166 if (p[l + 2] == '.' && p[l + 3] == ']')
1167 {
1168#ifdef FEAT_MBYTE
1169 if (has_mbyte)
1170 c = mb_ptr2char(p + 2);
1171 else
1172#endif
1173 c = p[2];
1174 *pp += l + 4;
1175 return c;
1176 }
1177 }
1178 return 0;
1179}
1180
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001181static void get_cpo_flags __ARGS((void));
1182static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1183static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1184
1185 static void
1186get_cpo_flags()
1187{
1188 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1189 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1190}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001191
1192/*
1193 * Skip over a "[]" range.
1194 * "p" must point to the character after the '['.
1195 * The returned pointer is on the matching ']', or the terminating NUL.
1196 */
1197 static char_u *
1198skip_anyof(p)
1199 char_u *p;
1200{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001201#ifdef FEAT_MBYTE
1202 int l;
1203#endif
1204
Bram Moolenaardf177f62005-02-22 08:39:57 +00001205 if (*p == '^') /* Complement of range. */
1206 ++p;
1207 if (*p == ']' || *p == '-')
1208 ++p;
1209 while (*p != NUL && *p != ']')
1210 {
1211#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001212 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001213 p += l;
1214 else
1215#endif
1216 if (*p == '-')
1217 {
1218 ++p;
1219 if (*p != ']' && *p != NUL)
1220 mb_ptr_adv(p);
1221 }
1222 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001223 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001224 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001225 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001226 p += 2;
1227 else if (*p == '[')
1228 {
1229 if (get_char_class(&p) == CLASS_NONE
1230 && get_equi_class(&p) == 0
1231 && get_coll_element(&p) == 0)
1232 ++p; /* It was not a class name */
1233 }
1234 else
1235 ++p;
1236 }
1237
1238 return p;
1239}
1240
1241/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001242 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001243 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001244 * Take care of characters with a backslash in front of it.
1245 * Skip strings inside [ and ].
1246 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1247 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1248 * is changed in-place.
1249 */
1250 char_u *
1251skip_regexp(startp, dirc, magic, newp)
1252 char_u *startp;
1253 int dirc;
1254 int magic;
1255 char_u **newp;
1256{
1257 int mymagic;
1258 char_u *p = startp;
1259
1260 if (magic)
1261 mymagic = MAGIC_ON;
1262 else
1263 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001264 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001265
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001266 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267 {
1268 if (p[0] == dirc) /* found end of regexp */
1269 break;
1270 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1271 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1272 {
1273 p = skip_anyof(p + 1);
1274 if (p[0] == NUL)
1275 break;
1276 }
1277 else if (p[0] == '\\' && p[1] != NUL)
1278 {
1279 if (dirc == '?' && newp != NULL && p[1] == '?')
1280 {
1281 /* change "\?" to "?", make a copy first. */
1282 if (*newp == NULL)
1283 {
1284 *newp = vim_strsave(startp);
1285 if (*newp != NULL)
1286 p = *newp + (p - startp);
1287 }
1288 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001289 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001290 else
1291 ++p;
1292 }
1293 else
1294 ++p; /* skip next character */
1295 if (*p == 'v')
1296 mymagic = MAGIC_ALL;
1297 else if (*p == 'V')
1298 mymagic = MAGIC_NONE;
1299 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001300 }
1301 return p;
1302}
1303
Bram Moolenaar473de612013-06-08 18:19:48 +02001304static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
1305static void bt_regfree __ARGS((regprog_T *prog));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001306
Bram Moolenaar071d4272004-06-13 20:20:40 +00001307/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001308 * bt_regcomp() - compile a regular expression into internal code for the
1309 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001310 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001311 *
1312 * We can't allocate space until we know how big the compiled form will be,
1313 * but we can't compile it (and thus know how big it is) until we've got a
1314 * place to put the code. So we cheat: we compile it twice, once with code
1315 * generation turned off and size counting turned on, and once "for real".
1316 * This also means that we don't allocate space until we are sure that the
1317 * thing really will compile successfully, and we never have to move the
1318 * code and thus invalidate pointers into it. (Note that it has to be in
1319 * one piece because vim_free() must be able to free it all.)
1320 *
1321 * Whether upper/lower case is to be ignored is decided when executing the
1322 * program, it does not matter here.
1323 *
1324 * Beware that the optimization-preparation code in here knows about some
1325 * of the structure of the compiled regexp.
1326 * "re_flags": RE_MAGIC and/or RE_STRING.
1327 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001328 static regprog_T *
1329bt_regcomp(expr, re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 char_u *expr;
1331 int re_flags;
1332{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001333 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001334 char_u *scan;
1335 char_u *longest;
1336 int len;
1337 int flags;
1338
1339 if (expr == NULL)
1340 EMSG_RET_NULL(_(e_null));
1341
1342 init_class_tab();
1343
1344 /*
1345 * First pass: determine size, legality.
1346 */
1347 regcomp_start(expr, re_flags);
1348 regcode = JUST_CALC_SIZE;
1349 regc(REGMAGIC);
1350 if (reg(REG_NOPAREN, &flags) == NULL)
1351 return NULL;
1352
1353 /* Small enough for pointer-storage convention? */
1354#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1355 if (regsize >= 65536L - 256L)
1356 EMSG_RET_NULL(_("E339: Pattern too long"));
1357#endif
1358
1359 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001360 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001361 if (r == NULL)
1362 return NULL;
1363
1364 /*
1365 * Second pass: emit code.
1366 */
1367 regcomp_start(expr, re_flags);
1368 regcode = r->program;
1369 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001370 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001371 {
1372 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001373 if (reg_toolong)
1374 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001375 return NULL;
1376 }
1377
1378 /* Dig out information for optimizations. */
1379 r->regstart = NUL; /* Worst-case defaults. */
1380 r->reganch = 0;
1381 r->regmust = NULL;
1382 r->regmlen = 0;
1383 r->regflags = regflags;
1384 if (flags & HASNL)
1385 r->regflags |= RF_HASNL;
1386 if (flags & HASLOOKBH)
1387 r->regflags |= RF_LOOKBH;
1388#ifdef FEAT_SYN_HL
1389 /* Remember whether this pattern has any \z specials in it. */
1390 r->reghasz = re_has_z;
1391#endif
1392 scan = r->program + 1; /* First BRANCH. */
1393 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1394 {
1395 scan = OPERAND(scan);
1396
1397 /* Starting-point info. */
1398 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1399 {
1400 r->reganch++;
1401 scan = regnext(scan);
1402 }
1403
1404 if (OP(scan) == EXACTLY)
1405 {
1406#ifdef FEAT_MBYTE
1407 if (has_mbyte)
1408 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1409 else
1410#endif
1411 r->regstart = *OPERAND(scan);
1412 }
1413 else if ((OP(scan) == BOW
1414 || OP(scan) == EOW
1415 || OP(scan) == NOTHING
1416 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1417 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1418 && OP(regnext(scan)) == EXACTLY)
1419 {
1420#ifdef FEAT_MBYTE
1421 if (has_mbyte)
1422 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1423 else
1424#endif
1425 r->regstart = *OPERAND(regnext(scan));
1426 }
1427
1428 /*
1429 * If there's something expensive in the r.e., find the longest
1430 * literal string that must appear and make it the regmust. Resolve
1431 * ties in favor of later strings, since the regstart check works
1432 * with the beginning of the r.e. and avoiding duplication
1433 * strengthens checking. Not a strong reason, but sufficient in the
1434 * absence of others.
1435 */
1436 /*
1437 * When the r.e. starts with BOW, it is faster to look for a regmust
1438 * first. Used a lot for "#" and "*" commands. (Added by mool).
1439 */
1440 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1441 && !(flags & HASNL))
1442 {
1443 longest = NULL;
1444 len = 0;
1445 for (; scan != NULL; scan = regnext(scan))
1446 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1447 {
1448 longest = OPERAND(scan);
1449 len = (int)STRLEN(OPERAND(scan));
1450 }
1451 r->regmust = longest;
1452 r->regmlen = len;
1453 }
1454 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001455#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001456 regdump(expr, r);
1457#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001458 r->engine = &bt_regengine;
1459 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001460}
1461
1462/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001463 * Free a compiled regexp program, returned by bt_regcomp().
1464 */
1465 static void
1466bt_regfree(prog)
1467 regprog_T *prog;
1468{
1469 vim_free(prog);
1470}
1471
1472/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001473 * Setup to parse the regexp. Used once to get the length and once to do it.
1474 */
1475 static void
1476regcomp_start(expr, re_flags)
1477 char_u *expr;
1478 int re_flags; /* see vim_regcomp() */
1479{
1480 initchr(expr);
1481 if (re_flags & RE_MAGIC)
1482 reg_magic = MAGIC_ON;
1483 else
1484 reg_magic = MAGIC_OFF;
1485 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001486 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001487 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001488
1489 num_complex_braces = 0;
1490 regnpar = 1;
1491 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1492#ifdef FEAT_SYN_HL
1493 regnzpar = 1;
1494 re_has_z = 0;
1495#endif
1496 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001497 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001498 regflags = 0;
1499#if defined(FEAT_SYN_HL) || defined(PROTO)
1500 had_eol = FALSE;
1501#endif
1502}
1503
1504#if defined(FEAT_SYN_HL) || defined(PROTO)
1505/*
1506 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1507 * found. This is messy, but it works fine.
1508 */
1509 int
1510vim_regcomp_had_eol()
1511{
1512 return had_eol;
1513}
1514#endif
1515
1516/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001517 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001518 *
1519 * Caller must absorb opening parenthesis.
1520 *
1521 * Combining parenthesis handling with the base level of regular expression
1522 * is a trifle forced, but the need to tie the tails of the branches to what
1523 * follows makes it hard to avoid.
1524 */
1525 static char_u *
1526reg(paren, flagp)
1527 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1528 int *flagp;
1529{
1530 char_u *ret;
1531 char_u *br;
1532 char_u *ender;
1533 int parno = 0;
1534 int flags;
1535
1536 *flagp = HASWIDTH; /* Tentatively. */
1537
1538#ifdef FEAT_SYN_HL
1539 if (paren == REG_ZPAREN)
1540 {
1541 /* Make a ZOPEN node. */
1542 if (regnzpar >= NSUBEXP)
1543 EMSG_RET_NULL(_("E50: Too many \\z("));
1544 parno = regnzpar;
1545 regnzpar++;
1546 ret = regnode(ZOPEN + parno);
1547 }
1548 else
1549#endif
1550 if (paren == REG_PAREN)
1551 {
1552 /* Make a MOPEN node. */
1553 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001554 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001555 parno = regnpar;
1556 ++regnpar;
1557 ret = regnode(MOPEN + parno);
1558 }
1559 else if (paren == REG_NPAREN)
1560 {
1561 /* Make a NOPEN node. */
1562 ret = regnode(NOPEN);
1563 }
1564 else
1565 ret = NULL;
1566
1567 /* Pick up the branches, linking them together. */
1568 br = regbranch(&flags);
1569 if (br == NULL)
1570 return NULL;
1571 if (ret != NULL)
1572 regtail(ret, br); /* [MZ]OPEN -> first. */
1573 else
1574 ret = br;
1575 /* If one of the branches can be zero-width, the whole thing can.
1576 * If one of the branches has * at start or matches a line-break, the
1577 * whole thing can. */
1578 if (!(flags & HASWIDTH))
1579 *flagp &= ~HASWIDTH;
1580 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1581 while (peekchr() == Magic('|'))
1582 {
1583 skipchr();
1584 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001585 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001586 return NULL;
1587 regtail(ret, br); /* BRANCH -> BRANCH. */
1588 if (!(flags & HASWIDTH))
1589 *flagp &= ~HASWIDTH;
1590 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1591 }
1592
1593 /* Make a closing node, and hook it on the end. */
1594 ender = regnode(
1595#ifdef FEAT_SYN_HL
1596 paren == REG_ZPAREN ? ZCLOSE + parno :
1597#endif
1598 paren == REG_PAREN ? MCLOSE + parno :
1599 paren == REG_NPAREN ? NCLOSE : END);
1600 regtail(ret, ender);
1601
1602 /* Hook the tails of the branches to the closing node. */
1603 for (br = ret; br != NULL; br = regnext(br))
1604 regoptail(br, ender);
1605
1606 /* Check for proper termination. */
1607 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1608 {
1609#ifdef FEAT_SYN_HL
1610 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001611 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001612 else
1613#endif
1614 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001615 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001616 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001617 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001618 }
1619 else if (paren == REG_NOPAREN && peekchr() != NUL)
1620 {
1621 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001622 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001623 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001624 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001625 /* NOTREACHED */
1626 }
1627 /*
1628 * Here we set the flag allowing back references to this set of
1629 * parentheses.
1630 */
1631 if (paren == REG_PAREN)
1632 had_endbrace[parno] = TRUE; /* have seen the close paren */
1633 return ret;
1634}
1635
1636/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001637 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001638 * Implements the & operator.
1639 */
1640 static char_u *
1641regbranch(flagp)
1642 int *flagp;
1643{
1644 char_u *ret;
1645 char_u *chain = NULL;
1646 char_u *latest;
1647 int flags;
1648
1649 *flagp = WORST | HASNL; /* Tentatively. */
1650
1651 ret = regnode(BRANCH);
1652 for (;;)
1653 {
1654 latest = regconcat(&flags);
1655 if (latest == NULL)
1656 return NULL;
1657 /* If one of the branches has width, the whole thing has. If one of
1658 * the branches anchors at start-of-line, the whole thing does.
1659 * If one of the branches uses look-behind, the whole thing does. */
1660 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1661 /* If one of the branches doesn't match a line-break, the whole thing
1662 * doesn't. */
1663 *flagp &= ~HASNL | (flags & HASNL);
1664 if (chain != NULL)
1665 regtail(chain, latest);
1666 if (peekchr() != Magic('&'))
1667 break;
1668 skipchr();
1669 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001670 if (reg_toolong)
1671 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001672 reginsert(MATCH, latest);
1673 chain = latest;
1674 }
1675
1676 return ret;
1677}
1678
1679/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001680 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001681 * Implements the concatenation operator.
1682 */
1683 static char_u *
1684regconcat(flagp)
1685 int *flagp;
1686{
1687 char_u *first = NULL;
1688 char_u *chain = NULL;
1689 char_u *latest;
1690 int flags;
1691 int cont = TRUE;
1692
1693 *flagp = WORST; /* Tentatively. */
1694
1695 while (cont)
1696 {
1697 switch (peekchr())
1698 {
1699 case NUL:
1700 case Magic('|'):
1701 case Magic('&'):
1702 case Magic(')'):
1703 cont = FALSE;
1704 break;
1705 case Magic('Z'):
1706#ifdef FEAT_MBYTE
1707 regflags |= RF_ICOMBINE;
1708#endif
1709 skipchr_keepstart();
1710 break;
1711 case Magic('c'):
1712 regflags |= RF_ICASE;
1713 skipchr_keepstart();
1714 break;
1715 case Magic('C'):
1716 regflags |= RF_NOICASE;
1717 skipchr_keepstart();
1718 break;
1719 case Magic('v'):
1720 reg_magic = MAGIC_ALL;
1721 skipchr_keepstart();
1722 curchr = -1;
1723 break;
1724 case Magic('m'):
1725 reg_magic = MAGIC_ON;
1726 skipchr_keepstart();
1727 curchr = -1;
1728 break;
1729 case Magic('M'):
1730 reg_magic = MAGIC_OFF;
1731 skipchr_keepstart();
1732 curchr = -1;
1733 break;
1734 case Magic('V'):
1735 reg_magic = MAGIC_NONE;
1736 skipchr_keepstart();
1737 curchr = -1;
1738 break;
1739 default:
1740 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001741 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001742 return NULL;
1743 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1744 if (chain == NULL) /* First piece. */
1745 *flagp |= flags & SPSTART;
1746 else
1747 regtail(chain, latest);
1748 chain = latest;
1749 if (first == NULL)
1750 first = latest;
1751 break;
1752 }
1753 }
1754 if (first == NULL) /* Loop ran zero times. */
1755 first = regnode(NOTHING);
1756 return first;
1757}
1758
1759/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001760 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001761 *
1762 * Note that the branching code sequences used for = and the general cases
1763 * of * and + are somewhat optimized: they use the same NOTHING node as
1764 * both the endmarker for their branch list and the body of the last branch.
1765 * It might seem that this node could be dispensed with entirely, but the
1766 * endmarker role is not redundant.
1767 */
1768 static char_u *
1769regpiece(flagp)
1770 int *flagp;
1771{
1772 char_u *ret;
1773 int op;
1774 char_u *next;
1775 int flags;
1776 long minval;
1777 long maxval;
1778
1779 ret = regatom(&flags);
1780 if (ret == NULL)
1781 return NULL;
1782
1783 op = peekchr();
1784 if (re_multi_type(op) == NOT_MULTI)
1785 {
1786 *flagp = flags;
1787 return ret;
1788 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001789 /* default flags */
1790 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1791
1792 skipchr();
1793 switch (op)
1794 {
1795 case Magic('*'):
1796 if (flags & SIMPLE)
1797 reginsert(STAR, ret);
1798 else
1799 {
1800 /* Emit x* as (x&|), where & means "self". */
1801 reginsert(BRANCH, ret); /* Either x */
1802 regoptail(ret, regnode(BACK)); /* and loop */
1803 regoptail(ret, ret); /* back */
1804 regtail(ret, regnode(BRANCH)); /* or */
1805 regtail(ret, regnode(NOTHING)); /* null. */
1806 }
1807 break;
1808
1809 case Magic('+'):
1810 if (flags & SIMPLE)
1811 reginsert(PLUS, ret);
1812 else
1813 {
1814 /* Emit x+ as x(&|), where & means "self". */
1815 next = regnode(BRANCH); /* Either */
1816 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001817 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001818 regtail(next, regnode(BRANCH)); /* or */
1819 regtail(ret, regnode(NOTHING)); /* null. */
1820 }
1821 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1822 break;
1823
1824 case Magic('@'):
1825 {
1826 int lop = END;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001827 int nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001828
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001829 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001830 switch (no_Magic(getchr()))
1831 {
1832 case '=': lop = MATCH; break; /* \@= */
1833 case '!': lop = NOMATCH; break; /* \@! */
1834 case '>': lop = SUBPAT; break; /* \@> */
1835 case '<': switch (no_Magic(getchr()))
1836 {
1837 case '=': lop = BEHIND; break; /* \@<= */
1838 case '!': lop = NOBEHIND; break; /* \@<! */
1839 }
1840 }
1841 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001842 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001843 reg_magic == MAGIC_ALL);
1844 /* Look behind must match with behind_pos. */
1845 if (lop == BEHIND || lop == NOBEHIND)
1846 {
1847 regtail(ret, regnode(BHPOS));
1848 *flagp |= HASLOOKBH;
1849 }
1850 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001851 if (lop == BEHIND || lop == NOBEHIND)
1852 {
1853 if (nr < 0)
1854 nr = 0; /* no limit is same as zero limit */
1855 reginsert_nr(lop, nr, ret);
1856 }
1857 else
1858 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001859 break;
1860 }
1861
1862 case Magic('?'):
1863 case Magic('='):
1864 /* Emit x= as (x|) */
1865 reginsert(BRANCH, ret); /* Either x */
1866 regtail(ret, regnode(BRANCH)); /* or */
1867 next = regnode(NOTHING); /* null. */
1868 regtail(ret, next);
1869 regoptail(ret, next);
1870 break;
1871
1872 case Magic('{'):
1873 if (!read_limits(&minval, &maxval))
1874 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001875 if (flags & SIMPLE)
1876 {
1877 reginsert(BRACE_SIMPLE, ret);
1878 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1879 }
1880 else
1881 {
1882 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001883 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001884 reg_magic == MAGIC_ALL);
1885 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1886 regoptail(ret, regnode(BACK));
1887 regoptail(ret, ret);
1888 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1889 ++num_complex_braces;
1890 }
1891 if (minval > 0 && maxval > 0)
1892 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1893 break;
1894 }
1895 if (re_multi_type(peekchr()) != NOT_MULTI)
1896 {
1897 /* Can't have a multi follow a multi. */
1898 if (peekchr() == Magic('*'))
1899 sprintf((char *)IObuff, _("E61: Nested %s*"),
1900 reg_magic >= MAGIC_ON ? "" : "\\");
1901 else
1902 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1903 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1904 EMSG_RET_NULL(IObuff);
1905 }
1906
1907 return ret;
1908}
1909
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001910/* When making changes to classchars also change nfa_classcodes. */
1911static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1912static int classcodes[] = {
1913 ANY, IDENT, SIDENT, KWORD, SKWORD,
1914 FNAME, SFNAME, PRINT, SPRINT,
1915 WHITE, NWHITE, DIGIT, NDIGIT,
1916 HEX, NHEX, OCTAL, NOCTAL,
1917 WORD, NWORD, HEAD, NHEAD,
1918 ALPHA, NALPHA, LOWER, NLOWER,
1919 UPPER, NUPPER
1920};
1921
Bram Moolenaar071d4272004-06-13 20:20:40 +00001922/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001923 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001924 *
1925 * Optimization: gobbles an entire sequence of ordinary characters so that
1926 * it can turn them into a single node, which is smaller to store and
1927 * faster to run. Don't do this when one_exactly is set.
1928 */
1929 static char_u *
1930regatom(flagp)
1931 int *flagp;
1932{
1933 char_u *ret;
1934 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001935 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001936 char_u *p;
1937 int extra = 0;
1938
1939 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001940
1941 c = getchr();
1942 switch (c)
1943 {
1944 case Magic('^'):
1945 ret = regnode(BOL);
1946 break;
1947
1948 case Magic('$'):
1949 ret = regnode(EOL);
1950#if defined(FEAT_SYN_HL) || defined(PROTO)
1951 had_eol = TRUE;
1952#endif
1953 break;
1954
1955 case Magic('<'):
1956 ret = regnode(BOW);
1957 break;
1958
1959 case Magic('>'):
1960 ret = regnode(EOW);
1961 break;
1962
1963 case Magic('_'):
1964 c = no_Magic(getchr());
1965 if (c == '^') /* "\_^" is start-of-line */
1966 {
1967 ret = regnode(BOL);
1968 break;
1969 }
1970 if (c == '$') /* "\_$" is end-of-line */
1971 {
1972 ret = regnode(EOL);
1973#if defined(FEAT_SYN_HL) || defined(PROTO)
1974 had_eol = TRUE;
1975#endif
1976 break;
1977 }
1978
1979 extra = ADD_NL;
1980 *flagp |= HASNL;
1981
1982 /* "\_[" is character range plus newline */
1983 if (c == '[')
1984 goto collection;
1985
1986 /* "\_x" is character class plus newline */
1987 /*FALLTHROUGH*/
1988
1989 /*
1990 * Character classes.
1991 */
1992 case Magic('.'):
1993 case Magic('i'):
1994 case Magic('I'):
1995 case Magic('k'):
1996 case Magic('K'):
1997 case Magic('f'):
1998 case Magic('F'):
1999 case Magic('p'):
2000 case Magic('P'):
2001 case Magic('s'):
2002 case Magic('S'):
2003 case Magic('d'):
2004 case Magic('D'):
2005 case Magic('x'):
2006 case Magic('X'):
2007 case Magic('o'):
2008 case Magic('O'):
2009 case Magic('w'):
2010 case Magic('W'):
2011 case Magic('h'):
2012 case Magic('H'):
2013 case Magic('a'):
2014 case Magic('A'):
2015 case Magic('l'):
2016 case Magic('L'):
2017 case Magic('u'):
2018 case Magic('U'):
2019 p = vim_strchr(classchars, no_Magic(c));
2020 if (p == NULL)
2021 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002022#ifdef FEAT_MBYTE
2023 /* When '.' is followed by a composing char ignore the dot, so that
2024 * the composing char is matched here. */
2025 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2026 {
2027 c = getchr();
2028 goto do_multibyte;
2029 }
2030#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002031 ret = regnode(classcodes[p - classchars] + extra);
2032 *flagp |= HASWIDTH | SIMPLE;
2033 break;
2034
2035 case Magic('n'):
2036 if (reg_string)
2037 {
2038 /* In a string "\n" matches a newline character. */
2039 ret = regnode(EXACTLY);
2040 regc(NL);
2041 regc(NUL);
2042 *flagp |= HASWIDTH | SIMPLE;
2043 }
2044 else
2045 {
2046 /* In buffer text "\n" matches the end of a line. */
2047 ret = regnode(NEWL);
2048 *flagp |= HASWIDTH | HASNL;
2049 }
2050 break;
2051
2052 case Magic('('):
2053 if (one_exactly)
2054 EMSG_ONE_RET_NULL;
2055 ret = reg(REG_PAREN, &flags);
2056 if (ret == NULL)
2057 return NULL;
2058 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2059 break;
2060
2061 case NUL:
2062 case Magic('|'):
2063 case Magic('&'):
2064 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002065 if (one_exactly)
2066 EMSG_ONE_RET_NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002067 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
2068 /* NOTREACHED */
2069
2070 case Magic('='):
2071 case Magic('?'):
2072 case Magic('+'):
2073 case Magic('@'):
2074 case Magic('{'):
2075 case Magic('*'):
2076 c = no_Magic(c);
2077 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2078 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2079 ? "" : "\\", c);
2080 EMSG_RET_NULL(IObuff);
2081 /* NOTREACHED */
2082
2083 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002084 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002085 {
2086 char_u *lp;
2087
2088 ret = regnode(EXACTLY);
2089 lp = reg_prev_sub;
2090 while (*lp != NUL)
2091 regc(*lp++);
2092 regc(NUL);
2093 if (*reg_prev_sub != NUL)
2094 {
2095 *flagp |= HASWIDTH;
2096 if ((lp - reg_prev_sub) == 1)
2097 *flagp |= SIMPLE;
2098 }
2099 }
2100 else
2101 EMSG_RET_NULL(_(e_nopresub));
2102 break;
2103
2104 case Magic('1'):
2105 case Magic('2'):
2106 case Magic('3'):
2107 case Magic('4'):
2108 case Magic('5'):
2109 case Magic('6'):
2110 case Magic('7'):
2111 case Magic('8'):
2112 case Magic('9'):
2113 {
2114 int refnum;
2115
2116 refnum = c - Magic('0');
2117 /*
2118 * Check if the back reference is legal. We must have seen the
2119 * close brace.
2120 * TODO: Should also check that we don't refer to something
2121 * that is repeated (+*=): what instance of the repetition
2122 * should we match?
2123 */
2124 if (!had_endbrace[refnum])
2125 {
2126 /* Trick: check if "@<=" or "@<!" follows, in which case
2127 * the \1 can appear before the referenced match. */
2128 for (p = regparse; *p != NUL; ++p)
2129 if (p[0] == '@' && p[1] == '<'
2130 && (p[2] == '!' || p[2] == '='))
2131 break;
2132 if (*p == NUL)
2133 EMSG_RET_NULL(_("E65: Illegal back reference"));
2134 }
2135 ret = regnode(BACKREF + refnum);
2136 }
2137 break;
2138
Bram Moolenaar071d4272004-06-13 20:20:40 +00002139 case Magic('z'):
2140 {
2141 c = no_Magic(getchr());
2142 switch (c)
2143 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002144#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002145 case '(': if (reg_do_extmatch != REX_SET)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002146 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002147 if (one_exactly)
2148 EMSG_ONE_RET_NULL;
2149 ret = reg(REG_ZPAREN, &flags);
2150 if (ret == NULL)
2151 return NULL;
2152 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2153 re_has_z = REX_SET;
2154 break;
2155
2156 case '1':
2157 case '2':
2158 case '3':
2159 case '4':
2160 case '5':
2161 case '6':
2162 case '7':
2163 case '8':
2164 case '9': if (reg_do_extmatch != REX_USE)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002165 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002166 ret = regnode(ZREF + c - '0');
2167 re_has_z = REX_USE;
2168 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002169#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002170
2171 case 's': ret = regnode(MOPEN + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002172 if (re_mult_next("\\zs") == FAIL)
2173 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002174 break;
2175
2176 case 'e': ret = regnode(MCLOSE + 0);
Bram Moolenaarfb031402014-09-09 17:18:49 +02002177 if (re_mult_next("\\ze") == FAIL)
2178 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002179 break;
2180
2181 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2182 }
2183 }
2184 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002185
2186 case Magic('%'):
2187 {
2188 c = no_Magic(getchr());
2189 switch (c)
2190 {
2191 /* () without a back reference */
2192 case '(':
2193 if (one_exactly)
2194 EMSG_ONE_RET_NULL;
2195 ret = reg(REG_NPAREN, &flags);
2196 if (ret == NULL)
2197 return NULL;
2198 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2199 break;
2200
2201 /* Catch \%^ and \%$ regardless of where they appear in the
2202 * pattern -- regardless of whether or not it makes sense. */
2203 case '^':
2204 ret = regnode(RE_BOF);
2205 break;
2206
2207 case '$':
2208 ret = regnode(RE_EOF);
2209 break;
2210
2211 case '#':
2212 ret = regnode(CURSOR);
2213 break;
2214
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002215 case 'V':
2216 ret = regnode(RE_VISUAL);
2217 break;
2218
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002219 case 'C':
2220 ret = regnode(RE_COMPOSING);
2221 break;
2222
Bram Moolenaar071d4272004-06-13 20:20:40 +00002223 /* \%[abc]: Emit as a list of branches, all ending at the last
2224 * branch which matches nothing. */
2225 case '[':
2226 if (one_exactly) /* doesn't nest */
2227 EMSG_ONE_RET_NULL;
2228 {
2229 char_u *lastbranch;
2230 char_u *lastnode = NULL;
2231 char_u *br;
2232
2233 ret = NULL;
2234 while ((c = getchr()) != ']')
2235 {
2236 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002237 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002238 reg_magic == MAGIC_ALL);
2239 br = regnode(BRANCH);
2240 if (ret == NULL)
2241 ret = br;
2242 else
2243 regtail(lastnode, br);
2244
2245 ungetchr();
2246 one_exactly = TRUE;
2247 lastnode = regatom(flagp);
2248 one_exactly = FALSE;
2249 if (lastnode == NULL)
2250 return NULL;
2251 }
2252 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002253 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002254 reg_magic == MAGIC_ALL);
2255 lastbranch = regnode(BRANCH);
2256 br = regnode(NOTHING);
2257 if (ret != JUST_CALC_SIZE)
2258 {
2259 regtail(lastnode, br);
2260 regtail(lastbranch, br);
2261 /* connect all branches to the NOTHING
2262 * branch at the end */
2263 for (br = ret; br != lastnode; )
2264 {
2265 if (OP(br) == BRANCH)
2266 {
2267 regtail(br, lastbranch);
2268 br = OPERAND(br);
2269 }
2270 else
2271 br = regnext(br);
2272 }
2273 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002274 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002275 break;
2276 }
2277
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002278 case 'd': /* %d123 decimal */
2279 case 'o': /* %o123 octal */
2280 case 'x': /* %xab hex 2 */
2281 case 'u': /* %uabcd hex 4 */
2282 case 'U': /* %U1234abcd hex 8 */
2283 {
2284 int i;
2285
2286 switch (c)
2287 {
2288 case 'd': i = getdecchrs(); break;
2289 case 'o': i = getoctchrs(); break;
2290 case 'x': i = gethexchrs(2); break;
2291 case 'u': i = gethexchrs(4); break;
2292 case 'U': i = gethexchrs(8); break;
2293 default: i = -1; break;
2294 }
2295
2296 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002297 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002298 _("E678: Invalid character after %s%%[dxouU]"),
2299 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002300#ifdef FEAT_MBYTE
2301 if (use_multibytecode(i))
2302 ret = regnode(MULTIBYTECODE);
2303 else
2304#endif
2305 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002306 if (i == 0)
2307 regc(0x0a);
2308 else
2309#ifdef FEAT_MBYTE
2310 regmbc(i);
2311#else
2312 regc(i);
2313#endif
2314 regc(NUL);
2315 *flagp |= HASWIDTH;
2316 break;
2317 }
2318
Bram Moolenaar071d4272004-06-13 20:20:40 +00002319 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002320 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2321 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002322 {
2323 long_u n = 0;
2324 int cmp;
2325
2326 cmp = c;
2327 if (cmp == '<' || cmp == '>')
2328 c = getchr();
2329 while (VIM_ISDIGIT(c))
2330 {
2331 n = n * 10 + (c - '0');
2332 c = getchr();
2333 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002334 if (c == '\'' && n == 0)
2335 {
2336 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2337 c = getchr();
2338 ret = regnode(RE_MARK);
2339 if (ret == JUST_CALC_SIZE)
2340 regsize += 2;
2341 else
2342 {
2343 *regcode++ = c;
2344 *regcode++ = cmp;
2345 }
2346 break;
2347 }
2348 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002349 {
2350 if (c == 'l')
2351 ret = regnode(RE_LNUM);
2352 else if (c == 'c')
2353 ret = regnode(RE_COL);
2354 else
2355 ret = regnode(RE_VCOL);
2356 if (ret == JUST_CALC_SIZE)
2357 regsize += 5;
2358 else
2359 {
2360 /* put the number and the optional
2361 * comparator after the opcode */
2362 regcode = re_put_long(regcode, n);
2363 *regcode++ = cmp;
2364 }
2365 break;
2366 }
2367 }
2368
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002369 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002370 reg_magic == MAGIC_ALL);
2371 }
2372 }
2373 break;
2374
2375 case Magic('['):
2376collection:
2377 {
2378 char_u *lp;
2379
2380 /*
2381 * If there is no matching ']', we assume the '[' is a normal
2382 * character. This makes 'incsearch' and ":help [" work.
2383 */
2384 lp = skip_anyof(regparse);
2385 if (*lp == ']') /* there is a matching ']' */
2386 {
2387 int startc = -1; /* > 0 when next '-' is a range */
2388 int endc;
2389
2390 /*
2391 * In a character class, different parsing rules apply.
2392 * Not even \ is special anymore, nothing is.
2393 */
2394 if (*regparse == '^') /* Complement of range. */
2395 {
2396 ret = regnode(ANYBUT + extra);
2397 regparse++;
2398 }
2399 else
2400 ret = regnode(ANYOF + extra);
2401
2402 /* At the start ']' and '-' mean the literal character. */
2403 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002404 {
2405 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002406 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002407 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002408
2409 while (*regparse != NUL && *regparse != ']')
2410 {
2411 if (*regparse == '-')
2412 {
2413 ++regparse;
2414 /* The '-' is not used for a range at the end and
2415 * after or before a '\n'. */
2416 if (*regparse == ']' || *regparse == NUL
2417 || startc == -1
2418 || (regparse[0] == '\\' && regparse[1] == 'n'))
2419 {
2420 regc('-');
2421 startc = '-'; /* [--x] is a range */
2422 }
2423 else
2424 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002425 /* Also accept "a-[.z.]" */
2426 endc = 0;
2427 if (*regparse == '[')
2428 endc = get_coll_element(&regparse);
2429 if (endc == 0)
2430 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002431#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002432 if (has_mbyte)
2433 endc = mb_ptr2char_adv(&regparse);
2434 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002435#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002436 endc = *regparse++;
2437 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002438
2439 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002440 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002441 endc = coll_get_char();
2442
Bram Moolenaar071d4272004-06-13 20:20:40 +00002443 if (startc > endc)
2444 EMSG_RET_NULL(_(e_invrange));
2445#ifdef FEAT_MBYTE
2446 if (has_mbyte && ((*mb_char2len)(startc) > 1
2447 || (*mb_char2len)(endc) > 1))
2448 {
2449 /* Limit to a range of 256 chars */
2450 if (endc > startc + 256)
2451 EMSG_RET_NULL(_(e_invrange));
2452 while (++startc <= endc)
2453 regmbc(startc);
2454 }
2455 else
2456#endif
2457 {
2458#ifdef EBCDIC
2459 int alpha_only = FALSE;
2460
2461 /* for alphabetical range skip the gaps
2462 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2463 if (isalpha(startc) && isalpha(endc))
2464 alpha_only = TRUE;
2465#endif
2466 while (++startc <= endc)
2467#ifdef EBCDIC
2468 if (!alpha_only || isalpha(startc))
2469#endif
2470 regc(startc);
2471 }
2472 startc = -1;
2473 }
2474 }
2475 /*
2476 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2477 * accepts "\t", "\e", etc., but only when the 'l' flag in
2478 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002479 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002480 */
2481 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002482 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002483 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002484 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002485 && vim_strchr(REGEXP_ABBR,
2486 regparse[1]) != NULL)))
2487 {
2488 regparse++;
2489 if (*regparse == 'n')
2490 {
2491 /* '\n' in range: also match NL */
2492 if (ret != JUST_CALC_SIZE)
2493 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002494 /* Using \n inside [^] does not change what
2495 * matches. "[^\n]" is the same as ".". */
2496 if (*ret == ANYOF)
2497 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002498 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002499 *flagp |= HASNL;
2500 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002501 /* else: must have had a \n already */
2502 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002503 regparse++;
2504 startc = -1;
2505 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002506 else if (*regparse == 'd'
2507 || *regparse == 'o'
2508 || *regparse == 'x'
2509 || *regparse == 'u'
2510 || *regparse == 'U')
2511 {
2512 startc = coll_get_char();
2513 if (startc == 0)
2514 regc(0x0a);
2515 else
2516#ifdef FEAT_MBYTE
2517 regmbc(startc);
2518#else
2519 regc(startc);
2520#endif
2521 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002522 else
2523 {
2524 startc = backslash_trans(*regparse++);
2525 regc(startc);
2526 }
2527 }
2528 else if (*regparse == '[')
2529 {
2530 int c_class;
2531 int cu;
2532
Bram Moolenaardf177f62005-02-22 08:39:57 +00002533 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002534 startc = -1;
2535 /* Characters assumed to be 8 bits! */
2536 switch (c_class)
2537 {
2538 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002539 c_class = get_equi_class(&regparse);
2540 if (c_class != 0)
2541 {
2542 /* produce equivalence class */
2543 reg_equi_class(c_class);
2544 }
2545 else if ((c_class =
2546 get_coll_element(&regparse)) != 0)
2547 {
2548 /* produce a collating element */
2549 regmbc(c_class);
2550 }
2551 else
2552 {
2553 /* literal '[', allow [[-x] as a range */
2554 startc = *regparse++;
2555 regc(startc);
2556 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002557 break;
2558 case CLASS_ALNUM:
2559 for (cu = 1; cu <= 255; cu++)
2560 if (isalnum(cu))
2561 regc(cu);
2562 break;
2563 case CLASS_ALPHA:
2564 for (cu = 1; cu <= 255; cu++)
2565 if (isalpha(cu))
2566 regc(cu);
2567 break;
2568 case CLASS_BLANK:
2569 regc(' ');
2570 regc('\t');
2571 break;
2572 case CLASS_CNTRL:
2573 for (cu = 1; cu <= 255; cu++)
2574 if (iscntrl(cu))
2575 regc(cu);
2576 break;
2577 case CLASS_DIGIT:
2578 for (cu = 1; cu <= 255; cu++)
2579 if (VIM_ISDIGIT(cu))
2580 regc(cu);
2581 break;
2582 case CLASS_GRAPH:
2583 for (cu = 1; cu <= 255; cu++)
2584 if (isgraph(cu))
2585 regc(cu);
2586 break;
2587 case CLASS_LOWER:
2588 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002589 if (MB_ISLOWER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002590 regc(cu);
2591 break;
2592 case CLASS_PRINT:
2593 for (cu = 1; cu <= 255; cu++)
2594 if (vim_isprintc(cu))
2595 regc(cu);
2596 break;
2597 case CLASS_PUNCT:
2598 for (cu = 1; cu <= 255; cu++)
2599 if (ispunct(cu))
2600 regc(cu);
2601 break;
2602 case CLASS_SPACE:
2603 for (cu = 9; cu <= 13; cu++)
2604 regc(cu);
2605 regc(' ');
2606 break;
2607 case CLASS_UPPER:
2608 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002609 if (MB_ISUPPER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002610 regc(cu);
2611 break;
2612 case CLASS_XDIGIT:
2613 for (cu = 1; cu <= 255; cu++)
2614 if (vim_isxdigit(cu))
2615 regc(cu);
2616 break;
2617 case CLASS_TAB:
2618 regc('\t');
2619 break;
2620 case CLASS_RETURN:
2621 regc('\r');
2622 break;
2623 case CLASS_BACKSPACE:
2624 regc('\b');
2625 break;
2626 case CLASS_ESCAPE:
2627 regc('\033');
2628 break;
2629 }
2630 }
2631 else
2632 {
2633#ifdef FEAT_MBYTE
2634 if (has_mbyte)
2635 {
2636 int len;
2637
2638 /* produce a multibyte character, including any
2639 * following composing characters */
2640 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002641 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002642 if (enc_utf8 && utf_char2len(startc) != len)
2643 startc = -1; /* composing chars */
2644 while (--len >= 0)
2645 regc(*regparse++);
2646 }
2647 else
2648#endif
2649 {
2650 startc = *regparse++;
2651 regc(startc);
2652 }
2653 }
2654 }
2655 regc(NUL);
2656 prevchr_len = 1; /* last char was the ']' */
2657 if (*regparse != ']')
2658 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2659 skipchr(); /* let's be friends with the lexer again */
2660 *flagp |= HASWIDTH | SIMPLE;
2661 break;
2662 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002663 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002664 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002665 }
2666 /* FALLTHROUGH */
2667
2668 default:
2669 {
2670 int len;
2671
2672#ifdef FEAT_MBYTE
2673 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002674 * before a multi and when it's a composing char. */
2675 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002676 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002677do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002678 ret = regnode(MULTIBYTECODE);
2679 regmbc(c);
2680 *flagp |= HASWIDTH | SIMPLE;
2681 break;
2682 }
2683#endif
2684
2685 ret = regnode(EXACTLY);
2686
2687 /*
2688 * Append characters as long as:
2689 * - there is no following multi, we then need the character in
2690 * front of it as a single character operand
2691 * - not running into a Magic character
2692 * - "one_exactly" is not set
2693 * But always emit at least one character. Might be a Multi,
2694 * e.g., a "[" without matching "]".
2695 */
2696 for (len = 0; c != NUL && (len == 0
2697 || (re_multi_type(peekchr()) == NOT_MULTI
2698 && !one_exactly
2699 && !is_Magic(c))); ++len)
2700 {
2701 c = no_Magic(c);
2702#ifdef FEAT_MBYTE
2703 if (has_mbyte)
2704 {
2705 regmbc(c);
2706 if (enc_utf8)
2707 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002708 int l;
2709
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002710 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002711 for (;;)
2712 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002713 l = utf_ptr2len(regparse);
2714 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002715 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002716 regmbc(utf_ptr2char(regparse));
2717 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002718 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002719 }
2720 }
2721 else
2722#endif
2723 regc(c);
2724 c = getchr();
2725 }
2726 ungetchr();
2727
2728 regc(NUL);
2729 *flagp |= HASWIDTH;
2730 if (len == 1)
2731 *flagp |= SIMPLE;
2732 }
2733 break;
2734 }
2735
2736 return ret;
2737}
2738
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002739#ifdef FEAT_MBYTE
2740/*
2741 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2742 * character "c".
2743 */
2744 static int
2745use_multibytecode(c)
2746 int c;
2747{
2748 return has_mbyte && (*mb_char2len)(c) > 1
2749 && (re_multi_type(peekchr()) != NOT_MULTI
2750 || (enc_utf8 && utf_iscomposing(c)));
2751}
2752#endif
2753
Bram Moolenaar071d4272004-06-13 20:20:40 +00002754/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002755 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002756 * Return pointer to generated code.
2757 */
2758 static char_u *
2759regnode(op)
2760 int op;
2761{
2762 char_u *ret;
2763
2764 ret = regcode;
2765 if (ret == JUST_CALC_SIZE)
2766 regsize += 3;
2767 else
2768 {
2769 *regcode++ = op;
2770 *regcode++ = NUL; /* Null "next" pointer. */
2771 *regcode++ = NUL;
2772 }
2773 return ret;
2774}
2775
2776/*
2777 * Emit (if appropriate) a byte of code
2778 */
2779 static void
2780regc(b)
2781 int b;
2782{
2783 if (regcode == JUST_CALC_SIZE)
2784 regsize++;
2785 else
2786 *regcode++ = b;
2787}
2788
2789#ifdef FEAT_MBYTE
2790/*
2791 * Emit (if appropriate) a multi-byte character of code
2792 */
2793 static void
2794regmbc(c)
2795 int c;
2796{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002797 if (!has_mbyte && c > 0xff)
2798 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002799 if (regcode == JUST_CALC_SIZE)
2800 regsize += (*mb_char2len)(c);
2801 else
2802 regcode += (*mb_char2bytes)(c, regcode);
2803}
2804#endif
2805
2806/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002807 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002808 *
2809 * Means relocating the operand.
2810 */
2811 static void
2812reginsert(op, opnd)
2813 int op;
2814 char_u *opnd;
2815{
2816 char_u *src;
2817 char_u *dst;
2818 char_u *place;
2819
2820 if (regcode == JUST_CALC_SIZE)
2821 {
2822 regsize += 3;
2823 return;
2824 }
2825 src = regcode;
2826 regcode += 3;
2827 dst = regcode;
2828 while (src > opnd)
2829 *--dst = *--src;
2830
2831 place = opnd; /* Op node, where operand used to be. */
2832 *place++ = op;
2833 *place++ = NUL;
2834 *place = NUL;
2835}
2836
2837/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002838 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002839 * Add a number to the operator.
2840 */
2841 static void
2842reginsert_nr(op, val, opnd)
2843 int op;
2844 long val;
2845 char_u *opnd;
2846{
2847 char_u *src;
2848 char_u *dst;
2849 char_u *place;
2850
2851 if (regcode == JUST_CALC_SIZE)
2852 {
2853 regsize += 7;
2854 return;
2855 }
2856 src = regcode;
2857 regcode += 7;
2858 dst = regcode;
2859 while (src > opnd)
2860 *--dst = *--src;
2861
2862 place = opnd; /* Op node, where operand used to be. */
2863 *place++ = op;
2864 *place++ = NUL;
2865 *place++ = NUL;
2866 place = re_put_long(place, (long_u)val);
2867}
2868
2869/*
2870 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002871 * The operator has the given limit values as operands. Also set next pointer.
2872 *
2873 * Means relocating the operand.
2874 */
2875 static void
2876reginsert_limits(op, minval, maxval, opnd)
2877 int op;
2878 long minval;
2879 long maxval;
2880 char_u *opnd;
2881{
2882 char_u *src;
2883 char_u *dst;
2884 char_u *place;
2885
2886 if (regcode == JUST_CALC_SIZE)
2887 {
2888 regsize += 11;
2889 return;
2890 }
2891 src = regcode;
2892 regcode += 11;
2893 dst = regcode;
2894 while (src > opnd)
2895 *--dst = *--src;
2896
2897 place = opnd; /* Op node, where operand used to be. */
2898 *place++ = op;
2899 *place++ = NUL;
2900 *place++ = NUL;
2901 place = re_put_long(place, (long_u)minval);
2902 place = re_put_long(place, (long_u)maxval);
2903 regtail(opnd, place);
2904}
2905
2906/*
2907 * Write a long as four bytes at "p" and return pointer to the next char.
2908 */
2909 static char_u *
2910re_put_long(p, val)
2911 char_u *p;
2912 long_u val;
2913{
2914 *p++ = (char_u) ((val >> 24) & 0377);
2915 *p++ = (char_u) ((val >> 16) & 0377);
2916 *p++ = (char_u) ((val >> 8) & 0377);
2917 *p++ = (char_u) (val & 0377);
2918 return p;
2919}
2920
2921/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002922 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002923 */
2924 static void
2925regtail(p, val)
2926 char_u *p;
2927 char_u *val;
2928{
2929 char_u *scan;
2930 char_u *temp;
2931 int offset;
2932
2933 if (p == JUST_CALC_SIZE)
2934 return;
2935
2936 /* Find last node. */
2937 scan = p;
2938 for (;;)
2939 {
2940 temp = regnext(scan);
2941 if (temp == NULL)
2942 break;
2943 scan = temp;
2944 }
2945
Bram Moolenaar582fd852005-03-28 20:58:01 +00002946 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002947 offset = (int)(scan - val);
2948 else
2949 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002950 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002951 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002952 * values in too many places. */
2953 if (offset > 0xffff)
2954 reg_toolong = TRUE;
2955 else
2956 {
2957 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2958 *(scan + 2) = (char_u) (offset & 0377);
2959 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002960}
2961
2962/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002963 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002964 */
2965 static void
2966regoptail(p, val)
2967 char_u *p;
2968 char_u *val;
2969{
2970 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2971 if (p == NULL || p == JUST_CALC_SIZE
2972 || (OP(p) != BRANCH
2973 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2974 return;
2975 regtail(OPERAND(p), val);
2976}
2977
2978/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002979 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002980 */
2981
Bram Moolenaar071d4272004-06-13 20:20:40 +00002982static int at_start; /* True when on the first character */
2983static int prev_at_start; /* True when on the second character */
2984
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002985/*
2986 * Start parsing at "str".
2987 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002988 static void
2989initchr(str)
2990 char_u *str;
2991{
2992 regparse = str;
2993 prevchr_len = 0;
2994 curchr = prevprevchr = prevchr = nextchr = -1;
2995 at_start = TRUE;
2996 prev_at_start = FALSE;
2997}
2998
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002999/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02003000 * Save the current parse state, so that it can be restored and parsing
3001 * starts in the same state again.
3002 */
3003 static void
3004save_parse_state(ps)
3005 parse_state_T *ps;
3006{
3007 ps->regparse = regparse;
3008 ps->prevchr_len = prevchr_len;
3009 ps->curchr = curchr;
3010 ps->prevchr = prevchr;
3011 ps->prevprevchr = prevprevchr;
3012 ps->nextchr = nextchr;
3013 ps->at_start = at_start;
3014 ps->prev_at_start = prev_at_start;
3015 ps->regnpar = regnpar;
3016}
3017
3018/*
3019 * Restore a previously saved parse state.
3020 */
3021 static void
3022restore_parse_state(ps)
3023 parse_state_T *ps;
3024{
3025 regparse = ps->regparse;
3026 prevchr_len = ps->prevchr_len;
3027 curchr = ps->curchr;
3028 prevchr = ps->prevchr;
3029 prevprevchr = ps->prevprevchr;
3030 nextchr = ps->nextchr;
3031 at_start = ps->at_start;
3032 prev_at_start = ps->prev_at_start;
3033 regnpar = ps->regnpar;
3034}
3035
3036
3037/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003038 * Get the next character without advancing.
3039 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003040 static int
3041peekchr()
3042{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003043 static int after_slash = FALSE;
3044
Bram Moolenaar071d4272004-06-13 20:20:40 +00003045 if (curchr == -1)
3046 {
3047 switch (curchr = regparse[0])
3048 {
3049 case '.':
3050 case '[':
3051 case '~':
3052 /* magic when 'magic' is on */
3053 if (reg_magic >= MAGIC_ON)
3054 curchr = Magic(curchr);
3055 break;
3056 case '(':
3057 case ')':
3058 case '{':
3059 case '%':
3060 case '+':
3061 case '=':
3062 case '?':
3063 case '@':
3064 case '!':
3065 case '&':
3066 case '|':
3067 case '<':
3068 case '>':
3069 case '#': /* future ext. */
3070 case '"': /* future ext. */
3071 case '\'': /* future ext. */
3072 case ',': /* future ext. */
3073 case '-': /* future ext. */
3074 case ':': /* future ext. */
3075 case ';': /* future ext. */
3076 case '`': /* future ext. */
3077 case '/': /* Can't be used in / command */
3078 /* magic only after "\v" */
3079 if (reg_magic == MAGIC_ALL)
3080 curchr = Magic(curchr);
3081 break;
3082 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003083 /* * is not magic as the very first character, eg "?*ptr", when
3084 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3085 * "\(\*" is not magic, thus must be magic if "after_slash" */
3086 if (reg_magic >= MAGIC_ON
3087 && !at_start
3088 && !(prev_at_start && prevchr == Magic('^'))
3089 && (after_slash
3090 || (prevchr != Magic('(')
3091 && prevchr != Magic('&')
3092 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003093 curchr = Magic('*');
3094 break;
3095 case '^':
3096 /* '^' is only magic as the very first character and if it's after
3097 * "\(", "\|", "\&' or "\n" */
3098 if (reg_magic >= MAGIC_OFF
3099 && (at_start
3100 || reg_magic == MAGIC_ALL
3101 || prevchr == Magic('(')
3102 || prevchr == Magic('|')
3103 || prevchr == Magic('&')
3104 || prevchr == Magic('n')
3105 || (no_Magic(prevchr) == '('
3106 && prevprevchr == Magic('%'))))
3107 {
3108 curchr = Magic('^');
3109 at_start = TRUE;
3110 prev_at_start = FALSE;
3111 }
3112 break;
3113 case '$':
3114 /* '$' is only magic as the very last char and if it's in front of
3115 * either "\|", "\)", "\&", or "\n" */
3116 if (reg_magic >= MAGIC_OFF)
3117 {
3118 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003119 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003120
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003121 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003122 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003123 || p[1] == 'm' || p[1] == 'M'
3124 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3125 {
3126 if (p[1] == 'v')
3127 is_magic_all = TRUE;
3128 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3129 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003130 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003131 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003132 if (p[0] == NUL
3133 || (p[0] == '\\'
3134 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3135 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003136 || (is_magic_all
3137 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003138 || reg_magic == MAGIC_ALL)
3139 curchr = Magic('$');
3140 }
3141 break;
3142 case '\\':
3143 {
3144 int c = regparse[1];
3145
3146 if (c == NUL)
3147 curchr = '\\'; /* trailing '\' */
3148 else if (
3149#ifdef EBCDIC
3150 vim_strchr(META, c)
3151#else
3152 c <= '~' && META_flags[c]
3153#endif
3154 )
3155 {
3156 /*
3157 * META contains everything that may be magic sometimes,
3158 * except ^ and $ ("\^" and "\$" are only magic after
3159 * "\v"). We now fetch the next character and toggle its
3160 * magicness. Therefore, \ is so meta-magic that it is
3161 * not in META.
3162 */
3163 curchr = -1;
3164 prev_at_start = at_start;
3165 at_start = FALSE; /* be able to say "/\*ptr" */
3166 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003167 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003168 peekchr();
3169 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003170 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003171 curchr = toggle_Magic(curchr);
3172 }
3173 else if (vim_strchr(REGEXP_ABBR, c))
3174 {
3175 /*
3176 * Handle abbreviations, like "\t" for TAB -- webb
3177 */
3178 curchr = backslash_trans(c);
3179 }
3180 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3181 curchr = toggle_Magic(c);
3182 else
3183 {
3184 /*
3185 * Next character can never be (made) magic?
3186 * Then backslashing it won't do anything.
3187 */
3188#ifdef FEAT_MBYTE
3189 if (has_mbyte)
3190 curchr = (*mb_ptr2char)(regparse + 1);
3191 else
3192#endif
3193 curchr = c;
3194 }
3195 break;
3196 }
3197
3198#ifdef FEAT_MBYTE
3199 default:
3200 if (has_mbyte)
3201 curchr = (*mb_ptr2char)(regparse);
3202#endif
3203 }
3204 }
3205
3206 return curchr;
3207}
3208
3209/*
3210 * Eat one lexed character. Do this in a way that we can undo it.
3211 */
3212 static void
3213skipchr()
3214{
3215 /* peekchr() eats a backslash, do the same here */
3216 if (*regparse == '\\')
3217 prevchr_len = 1;
3218 else
3219 prevchr_len = 0;
3220 if (regparse[prevchr_len] != NUL)
3221 {
3222#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003223 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003224 /* exclude composing chars that mb_ptr2len does include */
3225 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003226 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003227 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003228 else
3229#endif
3230 ++prevchr_len;
3231 }
3232 regparse += prevchr_len;
3233 prev_at_start = at_start;
3234 at_start = FALSE;
3235 prevprevchr = prevchr;
3236 prevchr = curchr;
3237 curchr = nextchr; /* use previously unget char, or -1 */
3238 nextchr = -1;
3239}
3240
3241/*
3242 * Skip a character while keeping the value of prev_at_start for at_start.
3243 * prevchr and prevprevchr are also kept.
3244 */
3245 static void
3246skipchr_keepstart()
3247{
3248 int as = prev_at_start;
3249 int pr = prevchr;
3250 int prpr = prevprevchr;
3251
3252 skipchr();
3253 at_start = as;
3254 prevchr = pr;
3255 prevprevchr = prpr;
3256}
3257
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003258/*
3259 * Get the next character from the pattern. We know about magic and such, so
3260 * therefore we need a lexical analyzer.
3261 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003262 static int
3263getchr()
3264{
3265 int chr = peekchr();
3266
3267 skipchr();
3268 return chr;
3269}
3270
3271/*
3272 * put character back. Works only once!
3273 */
3274 static void
3275ungetchr()
3276{
3277 nextchr = curchr;
3278 curchr = prevchr;
3279 prevchr = prevprevchr;
3280 at_start = prev_at_start;
3281 prev_at_start = FALSE;
3282
3283 /* Backup regparse, so that it's at the same position as before the
3284 * getchr(). */
3285 regparse -= prevchr_len;
3286}
3287
3288/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003289 * Get and return the value of the hex string at the current position.
3290 * Return -1 if there is no valid hex number.
3291 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003292 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003293 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003294 * The parameter controls the maximum number of input characters. This will be
3295 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3296 */
3297 static int
3298gethexchrs(maxinputlen)
3299 int maxinputlen;
3300{
3301 int nr = 0;
3302 int c;
3303 int i;
3304
3305 for (i = 0; i < maxinputlen; ++i)
3306 {
3307 c = regparse[0];
3308 if (!vim_isxdigit(c))
3309 break;
3310 nr <<= 4;
3311 nr |= hex2nr(c);
3312 ++regparse;
3313 }
3314
3315 if (i == 0)
3316 return -1;
3317 return nr;
3318}
3319
3320/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003321 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003322 * current position. Return -1 for invalid. Consumes all digits.
3323 */
3324 static int
3325getdecchrs()
3326{
3327 int nr = 0;
3328 int c;
3329 int i;
3330
3331 for (i = 0; ; ++i)
3332 {
3333 c = regparse[0];
3334 if (c < '0' || c > '9')
3335 break;
3336 nr *= 10;
3337 nr += c - '0';
3338 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003339 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003340 }
3341
3342 if (i == 0)
3343 return -1;
3344 return nr;
3345}
3346
3347/*
3348 * get and return the value of the octal string immediately after the current
3349 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3350 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3351 * treat 8 or 9 as recognised characters. Position is updated:
3352 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003353 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003354 */
3355 static int
3356getoctchrs()
3357{
3358 int nr = 0;
3359 int c;
3360 int i;
3361
3362 for (i = 0; i < 3 && nr < 040; ++i)
3363 {
3364 c = regparse[0];
3365 if (c < '0' || c > '7')
3366 break;
3367 nr <<= 3;
3368 nr |= hex2nr(c);
3369 ++regparse;
3370 }
3371
3372 if (i == 0)
3373 return -1;
3374 return nr;
3375}
3376
3377/*
3378 * Get a number after a backslash that is inside [].
3379 * When nothing is recognized return a backslash.
3380 */
3381 static int
3382coll_get_char()
3383{
3384 int nr = -1;
3385
3386 switch (*regparse++)
3387 {
3388 case 'd': nr = getdecchrs(); break;
3389 case 'o': nr = getoctchrs(); break;
3390 case 'x': nr = gethexchrs(2); break;
3391 case 'u': nr = gethexchrs(4); break;
3392 case 'U': nr = gethexchrs(8); break;
3393 }
3394 if (nr < 0)
3395 {
3396 /* If getting the number fails be backwards compatible: the character
3397 * is a backslash. */
3398 --regparse;
3399 nr = '\\';
3400 }
3401 return nr;
3402}
3403
3404/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003405 * read_limits - Read two integers to be taken as a minimum and maximum.
3406 * If the first character is '-', then the range is reversed.
3407 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3408 * missing, a very big number is the default.
3409 */
3410 static int
3411read_limits(minval, maxval)
3412 long *minval;
3413 long *maxval;
3414{
3415 int reverse = FALSE;
3416 char_u *first_char;
3417 long tmp;
3418
3419 if (*regparse == '-')
3420 {
3421 /* Starts with '-', so reverse the range later */
3422 regparse++;
3423 reverse = TRUE;
3424 }
3425 first_char = regparse;
3426 *minval = getdigits(&regparse);
3427 if (*regparse == ',') /* There is a comma */
3428 {
3429 if (vim_isdigit(*++regparse))
3430 *maxval = getdigits(&regparse);
3431 else
3432 *maxval = MAX_LIMIT;
3433 }
3434 else if (VIM_ISDIGIT(*first_char))
3435 *maxval = *minval; /* It was \{n} or \{-n} */
3436 else
3437 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3438 if (*regparse == '\\')
3439 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003440 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003441 {
3442 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3443 reg_magic == MAGIC_ALL ? "" : "\\");
3444 EMSG_RET_FAIL(IObuff);
3445 }
3446
3447 /*
3448 * Reverse the range if there was a '-', or make sure it is in the right
3449 * order otherwise.
3450 */
3451 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3452 {
3453 tmp = *minval;
3454 *minval = *maxval;
3455 *maxval = tmp;
3456 }
3457 skipchr(); /* let's be friends with the lexer again */
3458 return OK;
3459}
3460
3461/*
3462 * vim_regexec and friends
3463 */
3464
3465/*
3466 * Global work variables for vim_regexec().
3467 */
3468
3469/* The current match-position is remembered with these variables: */
3470static linenr_T reglnum; /* line number, relative to first line */
3471static char_u *regline; /* start of current line */
3472static char_u *reginput; /* current input, points into "regline" */
3473
3474static int need_clear_subexpr; /* subexpressions still need to be
3475 * cleared */
3476#ifdef FEAT_SYN_HL
3477static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3478 * still need to be cleared */
3479#endif
3480
Bram Moolenaar071d4272004-06-13 20:20:40 +00003481/*
3482 * Structure used to save the current input state, when it needs to be
3483 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003484 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003485 */
3486typedef struct
3487{
3488 union
3489 {
3490 char_u *ptr; /* reginput pointer, for single-line regexp */
3491 lpos_T pos; /* reginput pos, for multi-line regexp */
3492 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003493 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003494} regsave_T;
3495
3496/* struct to save start/end pointer/position in for \(\) */
3497typedef struct
3498{
3499 union
3500 {
3501 char_u *ptr;
3502 lpos_T pos;
3503 } se_u;
3504} save_se_T;
3505
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003506/* used for BEHIND and NOBEHIND matching */
3507typedef struct regbehind_S
3508{
3509 regsave_T save_after;
3510 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003511 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003512 save_se_T save_start[NSUBEXP];
3513 save_se_T save_end[NSUBEXP];
3514} regbehind_T;
3515
Bram Moolenaar071d4272004-06-13 20:20:40 +00003516static char_u *reg_getline __ARGS((linenr_T lnum));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003517static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3518static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003519static void cleanup_subexpr __ARGS((void));
3520#ifdef FEAT_SYN_HL
3521static void cleanup_zsubexpr __ARGS((void));
3522#endif
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003523static void save_subexpr __ARGS((regbehind_T *bp));
3524static void restore_subexpr __ARGS((regbehind_T *bp));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003525static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003526static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3527static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003528static int reg_save_equal __ARGS((regsave_T *save));
3529static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3530static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3531
3532/* Save the sub-expressions before attempting a match. */
3533#define save_se(savep, posp, pp) \
3534 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3535
3536/* After a failed match restore the sub-expressions. */
3537#define restore_se(savep, posp, pp) { \
3538 if (REG_MULTI) \
3539 *(posp) = (savep)->se_u.pos; \
3540 else \
3541 *(pp) = (savep)->se_u.ptr; }
3542
3543static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaar580abea2013-06-14 20:31:28 +02003544static int match_with_backref __ARGS((linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003545static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003546static int regrepeat __ARGS((char_u *p, long maxcount));
3547
3548#ifdef DEBUG
3549int regnarrate = 0;
3550#endif
3551
3552/*
3553 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3554 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3555 * contains '\c' or '\C' the value is overruled.
3556 */
3557static int ireg_ic;
3558
3559#ifdef FEAT_MBYTE
3560/*
3561 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3562 * in the regexp. Defaults to false, always.
3563 */
3564static int ireg_icombine;
3565#endif
3566
3567/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003568 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3569 * there is no maximum.
3570 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003571static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003572
3573/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003574 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3575 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003576 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003577 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003578static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003579static unsigned reg_tofreelen;
3580
3581/*
3582 * These variables are set when executing a regexp to speed up the execution.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003583 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003584 * done:
3585 * single-line multi-line
3586 * reg_match &regmatch_T NULL
3587 * reg_mmatch NULL &regmmatch_T
3588 * reg_startp reg_match->startp <invalid>
3589 * reg_endp reg_match->endp <invalid>
3590 * reg_startpos <invalid> reg_mmatch->startpos
3591 * reg_endpos <invalid> reg_mmatch->endpos
3592 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003593 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003594 * reg_firstlnum <invalid> first line in which to search
3595 * reg_maxline 0 last line nr
3596 * reg_line_lbr FALSE or TRUE FALSE
3597 */
3598static regmatch_T *reg_match;
3599static regmmatch_T *reg_mmatch;
3600static char_u **reg_startp = NULL;
3601static char_u **reg_endp = NULL;
3602static lpos_T *reg_startpos = NULL;
3603static lpos_T *reg_endpos = NULL;
3604static win_T *reg_win;
3605static buf_T *reg_buf;
3606static linenr_T reg_firstlnum;
3607static linenr_T reg_maxline;
3608static int reg_line_lbr; /* "\n" in string is line break */
3609
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003610/* Values for rs_state in regitem_T. */
3611typedef enum regstate_E
3612{
3613 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3614 , RS_MOPEN /* MOPEN + [0-9] */
3615 , RS_MCLOSE /* MCLOSE + [0-9] */
3616#ifdef FEAT_SYN_HL
3617 , RS_ZOPEN /* ZOPEN + [0-9] */
3618 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3619#endif
3620 , RS_BRANCH /* BRANCH */
3621 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3622 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3623 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3624 , RS_NOMATCH /* NOMATCH */
3625 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3626 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3627 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3628 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3629} regstate_T;
3630
3631/*
3632 * When there are alternatives a regstate_T is put on the regstack to remember
3633 * what we are doing.
3634 * Before it may be another type of item, depending on rs_state, to remember
3635 * more things.
3636 */
3637typedef struct regitem_S
3638{
3639 regstate_T rs_state; /* what we are doing, one of RS_ above */
3640 char_u *rs_scan; /* current node in program */
3641 union
3642 {
3643 save_se_T sesave;
3644 regsave_T regsave;
3645 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003646 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003647} regitem_T;
3648
3649static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3650static void regstack_pop __ARGS((char_u **scan));
3651
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003652/* used for STAR, PLUS and BRACE_SIMPLE matching */
3653typedef struct regstar_S
3654{
3655 int nextb; /* next byte */
3656 int nextb_ic; /* next byte reverse case */
3657 long count;
3658 long minval;
3659 long maxval;
3660} regstar_T;
3661
3662/* used to store input position when a BACK was encountered, so that we now if
3663 * we made any progress since the last time. */
3664typedef struct backpos_S
3665{
3666 char_u *bp_scan; /* "scan" where BACK was encountered */
3667 regsave_T bp_pos; /* last input position */
3668} backpos_T;
3669
3670/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003671 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3672 * to avoid invoking malloc() and free() often.
3673 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3674 * or regbehind_T.
3675 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003676 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003677static garray_T regstack = {0, 0, 0, 0, NULL};
3678static garray_T backpos = {0, 0, 0, 0, NULL};
3679
3680/*
3681 * Both for regstack and backpos tables we use the following strategy of
3682 * allocation (to reduce malloc/free calls):
3683 * - Initial size is fairly small.
3684 * - When needed, the tables are grown bigger (8 times at first, double after
3685 * that).
3686 * - After executing the match we free the memory only if the array has grown.
3687 * Thus the memory is kept allocated when it's at the initial size.
3688 * This makes it fast while not keeping a lot of memory allocated.
3689 * A three times speed increase was observed when using many simple patterns.
3690 */
3691#define REGSTACK_INITIAL 2048
3692#define BACKPOS_INITIAL 64
3693
3694#if defined(EXITFREE) || defined(PROTO)
3695 void
3696free_regexp_stuff()
3697{
3698 ga_clear(&regstack);
3699 ga_clear(&backpos);
3700 vim_free(reg_tofree);
3701 vim_free(reg_prev_sub);
3702}
3703#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003704
Bram Moolenaar071d4272004-06-13 20:20:40 +00003705/*
3706 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3707 */
3708 static char_u *
3709reg_getline(lnum)
3710 linenr_T lnum;
3711{
3712 /* when looking behind for a match/no-match lnum is negative. But we
3713 * can't go before line 1 */
3714 if (reg_firstlnum + lnum < 1)
3715 return NULL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00003716 if (lnum > reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003717 /* Must have matched the "\n" in the last line. */
3718 return (char_u *)"";
Bram Moolenaar071d4272004-06-13 20:20:40 +00003719 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3720}
3721
3722static regsave_T behind_pos;
3723
3724#ifdef FEAT_SYN_HL
3725static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3726static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3727static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3728static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3729#endif
3730
3731/* TRUE if using multi-line regexp. */
3732#define REG_MULTI (reg_match == NULL)
3733
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003734static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int line_lbr));
3735
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003736
Bram Moolenaar071d4272004-06-13 20:20:40 +00003737/*
3738 * Match a regexp against a string.
3739 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3740 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003741 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003742 *
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003743 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003744 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003745 static int
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003746bt_regexec_nl(rmp, line, col, line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003747 regmatch_T *rmp;
3748 char_u *line; /* string to match against */
3749 colnr_T col; /* column to start looking for match */
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003750 int line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003751{
3752 reg_match = rmp;
3753 reg_mmatch = NULL;
3754 reg_maxline = 0;
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003755 reg_line_lbr = line_lbr;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003756 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003757 reg_win = NULL;
3758 ireg_ic = rmp->rm_ic;
3759#ifdef FEAT_MBYTE
3760 ireg_icombine = FALSE;
3761#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003762 ireg_maxcol = 0;
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003763
3764 return bt_regexec_both(line, col, NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003765}
3766
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003767static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
3768
Bram Moolenaar071d4272004-06-13 20:20:40 +00003769/*
3770 * Match a regexp against multiple lines.
3771 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3772 * Uses curbuf for line count and 'iskeyword'.
3773 *
3774 * Return zero if there is no match. Return number of lines contained in the
3775 * match otherwise.
3776 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003777 static long
3778bt_regexec_multi(rmp, win, buf, lnum, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003779 regmmatch_T *rmp;
3780 win_T *win; /* window in which to search or NULL */
3781 buf_T *buf; /* buffer in which to search */
3782 linenr_T lnum; /* nr of line to start looking for match */
3783 colnr_T col; /* column to start looking for match */
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003784 proftime_T *tm; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003785{
Bram Moolenaar071d4272004-06-13 20:20:40 +00003786 reg_match = NULL;
3787 reg_mmatch = rmp;
3788 reg_buf = buf;
3789 reg_win = win;
3790 reg_firstlnum = lnum;
3791 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3792 reg_line_lbr = FALSE;
3793 ireg_ic = rmp->rmm_ic;
3794#ifdef FEAT_MBYTE
3795 ireg_icombine = FALSE;
3796#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003797 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003798
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003799 return bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003800}
3801
3802/*
3803 * Match a regexp against a string ("line" points to the string) or multiple
3804 * lines ("line" is NULL, use reg_getline()).
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003805 * Returns 0 for failure, number of lines contained in the match otherwise.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003806 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003807 static long
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003808bt_regexec_both(line, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003809 char_u *line;
3810 colnr_T col; /* column to start looking for match */
Bram Moolenaar78a15312009-05-15 19:33:18 +00003811 proftime_T *tm UNUSED; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003812{
Bram Moolenaar66a3e792014-11-20 23:07:05 +01003813 bt_regprog_T *prog;
3814 char_u *s;
3815 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003816
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003817 /* Create "regstack" and "backpos" if they are not allocated yet.
3818 * We allocate *_INITIAL amount of bytes first and then set the grow size
3819 * to much bigger value to avoid many malloc calls in case of deep regular
3820 * expressions. */
3821 if (regstack.ga_data == NULL)
3822 {
3823 /* Use an item size of 1 byte, since we push different things
3824 * onto the regstack. */
3825 ga_init2(&regstack, 1, REGSTACK_INITIAL);
3826 ga_grow(&regstack, REGSTACK_INITIAL);
3827 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3828 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003829
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003830 if (backpos.ga_data == NULL)
3831 {
3832 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3833 ga_grow(&backpos, BACKPOS_INITIAL);
3834 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3835 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003836
Bram Moolenaar071d4272004-06-13 20:20:40 +00003837 if (REG_MULTI)
3838 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003839 prog = (bt_regprog_T *)reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003840 line = reg_getline((linenr_T)0);
3841 reg_startpos = reg_mmatch->startpos;
3842 reg_endpos = reg_mmatch->endpos;
3843 }
3844 else
3845 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003846 prog = (bt_regprog_T *)reg_match->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003847 reg_startp = reg_match->startp;
3848 reg_endp = reg_match->endp;
3849 }
3850
3851 /* Be paranoid... */
3852 if (prog == NULL || line == NULL)
3853 {
3854 EMSG(_(e_null));
3855 goto theend;
3856 }
3857
3858 /* Check validity of program. */
3859 if (prog_magic_wrong())
3860 goto theend;
3861
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003862 /* If the start column is past the maximum column: no need to try. */
3863 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3864 goto theend;
3865
Bram Moolenaar071d4272004-06-13 20:20:40 +00003866 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3867 if (prog->regflags & RF_ICASE)
3868 ireg_ic = TRUE;
3869 else if (prog->regflags & RF_NOICASE)
3870 ireg_ic = FALSE;
3871
3872#ifdef FEAT_MBYTE
3873 /* If pattern contains "\Z" overrule value of ireg_icombine */
3874 if (prog->regflags & RF_ICOMBINE)
3875 ireg_icombine = TRUE;
3876#endif
3877
3878 /* If there is a "must appear" string, look for it. */
3879 if (prog->regmust != NULL)
3880 {
3881 int c;
3882
3883#ifdef FEAT_MBYTE
3884 if (has_mbyte)
3885 c = (*mb_ptr2char)(prog->regmust);
3886 else
3887#endif
3888 c = *prog->regmust;
3889 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003890
3891 /*
3892 * This is used very often, esp. for ":global". Use three versions of
3893 * the loop to avoid overhead of conditions.
3894 */
3895 if (!ireg_ic
3896#ifdef FEAT_MBYTE
3897 && !has_mbyte
3898#endif
3899 )
3900 while ((s = vim_strbyte(s, c)) != NULL)
3901 {
3902 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3903 break; /* Found it. */
3904 ++s;
3905 }
3906#ifdef FEAT_MBYTE
3907 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3908 while ((s = vim_strchr(s, c)) != NULL)
3909 {
3910 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3911 break; /* Found it. */
3912 mb_ptr_adv(s);
3913 }
3914#endif
3915 else
3916 while ((s = cstrchr(s, c)) != NULL)
3917 {
3918 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3919 break; /* Found it. */
3920 mb_ptr_adv(s);
3921 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003922 if (s == NULL) /* Not present. */
3923 goto theend;
3924 }
3925
3926 regline = line;
3927 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003928 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003929
3930 /* Simplest case: Anchored match need be tried only once. */
3931 if (prog->reganch)
3932 {
3933 int c;
3934
3935#ifdef FEAT_MBYTE
3936 if (has_mbyte)
3937 c = (*mb_ptr2char)(regline + col);
3938 else
3939#endif
3940 c = regline[col];
3941 if (prog->regstart == NUL
3942 || prog->regstart == c
3943 || (ireg_ic && ((
3944#ifdef FEAT_MBYTE
3945 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3946 || (c < 255 && prog->regstart < 255 &&
3947#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003948 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003949 retval = regtry(prog, col);
3950 else
3951 retval = 0;
3952 }
3953 else
3954 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003955#ifdef FEAT_RELTIME
3956 int tm_count = 0;
3957#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003958 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003959 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003960 {
3961 if (prog->regstart != NUL)
3962 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003963 /* Skip until the char we know it must start with.
3964 * Used often, do some work to avoid call overhead. */
3965 if (!ireg_ic
3966#ifdef FEAT_MBYTE
3967 && !has_mbyte
3968#endif
3969 )
3970 s = vim_strbyte(regline + col, prog->regstart);
3971 else
3972 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003973 if (s == NULL)
3974 {
3975 retval = 0;
3976 break;
3977 }
3978 col = (int)(s - regline);
3979 }
3980
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003981 /* Check for maximum column to try. */
3982 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3983 {
3984 retval = 0;
3985 break;
3986 }
3987
Bram Moolenaar071d4272004-06-13 20:20:40 +00003988 retval = regtry(prog, col);
3989 if (retval > 0)
3990 break;
3991
3992 /* if not currently on the first line, get it again */
3993 if (reglnum != 0)
3994 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003995 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003996 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003997 }
3998 if (regline[col] == NUL)
3999 break;
4000#ifdef FEAT_MBYTE
4001 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004002 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004003 else
4004#endif
4005 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00004006#ifdef FEAT_RELTIME
4007 /* Check for timeout once in a twenty times to avoid overhead. */
4008 if (tm != NULL && ++tm_count == 20)
4009 {
4010 tm_count = 0;
4011 if (profile_passed_limit(tm))
4012 break;
4013 }
4014#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00004015 }
4016 }
4017
Bram Moolenaar071d4272004-06-13 20:20:40 +00004018theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004019 /* Free "reg_tofree" when it's a bit big.
4020 * Free regstack and backpos if they are bigger than their initial size. */
4021 if (reg_tofreelen > 400)
4022 {
4023 vim_free(reg_tofree);
4024 reg_tofree = NULL;
4025 }
4026 if (regstack.ga_maxlen > REGSTACK_INITIAL)
4027 ga_clear(&regstack);
4028 if (backpos.ga_maxlen > BACKPOS_INITIAL)
4029 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004030
Bram Moolenaar071d4272004-06-13 20:20:40 +00004031 return retval;
4032}
4033
4034#ifdef FEAT_SYN_HL
4035static reg_extmatch_T *make_extmatch __ARGS((void));
4036
4037/*
4038 * Create a new extmatch and mark it as referenced once.
4039 */
4040 static reg_extmatch_T *
4041make_extmatch()
4042{
4043 reg_extmatch_T *em;
4044
4045 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4046 if (em != NULL)
4047 em->refcnt = 1;
4048 return em;
4049}
4050
4051/*
4052 * Add a reference to an extmatch.
4053 */
4054 reg_extmatch_T *
4055ref_extmatch(em)
4056 reg_extmatch_T *em;
4057{
4058 if (em != NULL)
4059 em->refcnt++;
4060 return em;
4061}
4062
4063/*
4064 * Remove a reference to an extmatch. If there are no references left, free
4065 * the info.
4066 */
4067 void
4068unref_extmatch(em)
4069 reg_extmatch_T *em;
4070{
4071 int i;
4072
4073 if (em != NULL && --em->refcnt <= 0)
4074 {
4075 for (i = 0; i < NSUBEXP; ++i)
4076 vim_free(em->matches[i]);
4077 vim_free(em);
4078 }
4079}
4080#endif
4081
4082/*
4083 * regtry - try match of "prog" with at regline["col"].
4084 * Returns 0 for failure, number of lines contained in the match otherwise.
4085 */
4086 static long
4087regtry(prog, col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004088 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004089 colnr_T col;
4090{
4091 reginput = regline + col;
4092 need_clear_subexpr = TRUE;
4093#ifdef FEAT_SYN_HL
4094 /* Clear the external match subpointers if necessary. */
4095 if (prog->reghasz == REX_SET)
4096 need_clear_zsubexpr = TRUE;
4097#endif
4098
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004099 if (regmatch(prog->program + 1) == 0)
4100 return 0;
4101
4102 cleanup_subexpr();
4103 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004104 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004105 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004106 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004107 reg_startpos[0].lnum = 0;
4108 reg_startpos[0].col = col;
4109 }
4110 if (reg_endpos[0].lnum < 0)
4111 {
4112 reg_endpos[0].lnum = reglnum;
4113 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004114 }
4115 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004116 /* Use line number of "\ze". */
4117 reglnum = reg_endpos[0].lnum;
4118 }
4119 else
4120 {
4121 if (reg_startp[0] == NULL)
4122 reg_startp[0] = regline + col;
4123 if (reg_endp[0] == NULL)
4124 reg_endp[0] = reginput;
4125 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004126#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004127 /* Package any found \z(...\) matches for export. Default is none. */
4128 unref_extmatch(re_extmatch_out);
4129 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004130
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004131 if (prog->reghasz == REX_SET)
4132 {
4133 int i;
4134
4135 cleanup_zsubexpr();
4136 re_extmatch_out = make_extmatch();
4137 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004138 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004139 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004140 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004141 /* Only accept single line matches. */
4142 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004143 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4144 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004145 re_extmatch_out->matches[i] =
4146 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004147 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004148 reg_endzpos[i].col - reg_startzpos[i].col);
4149 }
4150 else
4151 {
4152 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4153 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004154 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004155 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004156 }
4157 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004158 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004159#endif
4160 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004161}
4162
4163#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00004164static int reg_prev_class __ARGS((void));
4165
Bram Moolenaar071d4272004-06-13 20:20:40 +00004166/*
4167 * Get class of previous character.
4168 */
4169 static int
4170reg_prev_class()
4171{
4172 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004173 return mb_get_class_buf(reginput - 1
4174 - (*mb_head_off)(regline, reginput - 1), reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004175 return -1;
4176}
Bram Moolenaar071d4272004-06-13 20:20:40 +00004177#endif
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004178
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004179static int reg_match_visual __ARGS((void));
4180
4181/*
4182 * Return TRUE if the current reginput position matches the Visual area.
4183 */
4184 static int
4185reg_match_visual()
4186{
4187 pos_T top, bot;
4188 linenr_T lnum;
4189 colnr_T col;
4190 win_T *wp = reg_win == NULL ? curwin : reg_win;
4191 int mode;
4192 colnr_T start, end;
4193 colnr_T start2, end2;
4194 colnr_T cols;
4195
4196 /* Check if the buffer is the current buffer. */
4197 if (reg_buf != curbuf || VIsual.lnum == 0)
4198 return FALSE;
4199
4200 if (VIsual_active)
4201 {
4202 if (lt(VIsual, wp->w_cursor))
4203 {
4204 top = VIsual;
4205 bot = wp->w_cursor;
4206 }
4207 else
4208 {
4209 top = wp->w_cursor;
4210 bot = VIsual;
4211 }
4212 mode = VIsual_mode;
4213 }
4214 else
4215 {
4216 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
4217 {
4218 top = curbuf->b_visual.vi_start;
4219 bot = curbuf->b_visual.vi_end;
4220 }
4221 else
4222 {
4223 top = curbuf->b_visual.vi_end;
4224 bot = curbuf->b_visual.vi_start;
4225 }
4226 mode = curbuf->b_visual.vi_mode;
4227 }
4228 lnum = reglnum + reg_firstlnum;
4229 if (lnum < top.lnum || lnum > bot.lnum)
4230 return FALSE;
4231
4232 if (mode == 'v')
4233 {
4234 col = (colnr_T)(reginput - regline);
4235 if ((lnum == top.lnum && col < top.col)
4236 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4237 return FALSE;
4238 }
4239 else if (mode == Ctrl_V)
4240 {
4241 getvvcol(wp, &top, &start, NULL, &end);
4242 getvvcol(wp, &bot, &start2, NULL, &end2);
4243 if (start2 < start)
4244 start = start2;
4245 if (end2 > end)
4246 end = end2;
4247 if (top.col == MAXCOL || bot.col == MAXCOL)
4248 end = MAXCOL;
4249 cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline));
4250 if (cols < start || cols > end - (*p_sel == 'e'))
4251 return FALSE;
4252 }
4253 return TRUE;
4254}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004255
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00004256#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004257
4258/*
4259 * The arguments from BRACE_LIMITS are stored here. They are actually local
4260 * to regmatch(), but they are here to reduce the amount of stack space used
4261 * (it can be called recursively many times).
4262 */
4263static long bl_minval;
4264static long bl_maxval;
4265
4266/*
4267 * regmatch - main matching routine
4268 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004269 * Conceptually the strategy is simple: Check to see whether the current node
4270 * matches, push an item onto the regstack and loop to see whether the rest
4271 * matches, and then act accordingly. In practice we make some effort to
4272 * avoid using the regstack, in particular by going through "ordinary" nodes
4273 * (that don't need to know whether the rest of the match failed) by a nested
4274 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004275 *
4276 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4277 * the last matched character.
4278 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4279 * undefined state!
4280 */
4281 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004282regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004283 char_u *scan; /* Current node. */
4284{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004285 char_u *next; /* Next node. */
4286 int op;
4287 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004288 regitem_T *rp;
4289 int no;
4290 int status; /* one of the RA_ values: */
4291#define RA_FAIL 1 /* something failed, abort */
4292#define RA_CONT 2 /* continue in inner loop */
4293#define RA_BREAK 3 /* break inner loop */
4294#define RA_MATCH 4 /* successful match */
4295#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004296
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004297 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004298 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004299 regstack.ga_len = 0;
4300 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004301
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004302 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004303 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004304 */
4305 for (;;)
4306 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004307 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4308 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004309 fast_breakcheck();
4310
4311#ifdef DEBUG
4312 if (scan != NULL && regnarrate)
4313 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004314 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004315 mch_errmsg("(\n");
4316 }
4317#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004318
4319 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004320 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004321 * regstack.
4322 */
4323 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004324 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004325 if (got_int || scan == NULL)
4326 {
4327 status = RA_FAIL;
4328 break;
4329 }
4330 status = RA_CONT;
4331
Bram Moolenaar071d4272004-06-13 20:20:40 +00004332#ifdef DEBUG
4333 if (regnarrate)
4334 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004335 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004336 mch_errmsg("...\n");
4337# ifdef FEAT_SYN_HL
4338 if (re_extmatch_in != NULL)
4339 {
4340 int i;
4341
4342 mch_errmsg(_("External submatches:\n"));
4343 for (i = 0; i < NSUBEXP; i++)
4344 {
4345 mch_errmsg(" \"");
4346 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004347 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004348 mch_errmsg("\"\n");
4349 }
4350 }
4351# endif
4352 }
4353#endif
4354 next = regnext(scan);
4355
4356 op = OP(scan);
4357 /* Check for character class with NL added. */
Bram Moolenaar640009d2006-10-17 16:48:26 +00004358 if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
4359 && *reginput == NUL && reglnum <= reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004360 {
4361 reg_nextline();
4362 }
4363 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
4364 {
4365 ADVANCE_REGINPUT();
4366 }
4367 else
4368 {
4369 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004370 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004371#ifdef FEAT_MBYTE
4372 if (has_mbyte)
4373 c = (*mb_ptr2char)(reginput);
4374 else
4375#endif
4376 c = *reginput;
4377 switch (op)
4378 {
4379 case BOL:
4380 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004381 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004382 break;
4383
4384 case EOL:
4385 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004386 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004387 break;
4388
4389 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004390 /* We're not at the beginning of the file when below the first
4391 * line where we started, not at the start of the line or we
4392 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004393 if (reglnum != 0 || reginput != regline
Bram Moolenaara7139332007-12-09 18:26:22 +00004394 || (REG_MULTI && reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004395 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004396 break;
4397
4398 case RE_EOF:
4399 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004400 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004401 break;
4402
4403 case CURSOR:
4404 /* Check if the buffer is in a window and compare the
4405 * reg_win->w_cursor position to the match position. */
4406 if (reg_win == NULL
4407 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
4408 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004409 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004410 break;
4411
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004412 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004413 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004414 {
4415 int mark = OPERAND(scan)[0];
4416 int cmp = OPERAND(scan)[1];
4417 pos_T *pos;
4418
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004419 pos = getmark_buf(reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004420 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004421 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004422 || (pos->lnum == reglnum + reg_firstlnum
4423 ? (pos->col == (colnr_T)(reginput - regline)
4424 ? (cmp == '<' || cmp == '>')
4425 : (pos->col < (colnr_T)(reginput - regline)
4426 ? cmp != '>'
4427 : cmp != '<'))
4428 : (pos->lnum < reglnum + reg_firstlnum
4429 ? cmp != '>'
4430 : cmp != '<')))
4431 status = RA_NOMATCH;
4432 }
4433 break;
4434
4435 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004436 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004437 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004438 break;
4439
Bram Moolenaar071d4272004-06-13 20:20:40 +00004440 case RE_LNUM:
4441 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4442 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004443 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004444 break;
4445
4446 case RE_COL:
4447 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004448 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004449 break;
4450
4451 case RE_VCOL:
4452 if (!re_num_cmp((long_u)win_linetabsize(
4453 reg_win == NULL ? curwin : reg_win,
4454 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004455 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004456 break;
4457
4458 case BOW: /* \<word; reginput points to w */
4459 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004460 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004461#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004462 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004463 {
4464 int this_class;
4465
4466 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004467 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004468 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004469 status = RA_NOMATCH; /* not on a word at all */
4470 else if (reg_prev_class() == this_class)
4471 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004472 }
4473#endif
4474 else
4475 {
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01004476 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline
4477 && vim_iswordc_buf(reginput[-1], reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004478 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004479 }
4480 break;
4481
4482 case EOW: /* word\>; reginput points after d */
4483 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004484 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004485#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004486 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004487 {
4488 int this_class, prev_class;
4489
4490 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004491 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004492 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004493 if (this_class == prev_class
4494 || prev_class == 0 || prev_class == 1)
4495 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004496 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004497#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004498 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004499 {
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004500 if (!vim_iswordc_buf(reginput[-1], reg_buf)
4501 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004502 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004503 }
4504 break; /* Matched with EOW */
4505
4506 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004507 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004508 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004509 status = RA_NOMATCH;
4510 else
4511 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004512 break;
4513
4514 case IDENT:
4515 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004516 status = RA_NOMATCH;
4517 else
4518 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004519 break;
4520
4521 case SIDENT:
4522 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004523 status = RA_NOMATCH;
4524 else
4525 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004526 break;
4527
4528 case KWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004529 if (!vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004530 status = RA_NOMATCH;
4531 else
4532 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004533 break;
4534
4535 case SKWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004536 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004537 status = RA_NOMATCH;
4538 else
4539 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004540 break;
4541
4542 case FNAME:
4543 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004544 status = RA_NOMATCH;
4545 else
4546 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004547 break;
4548
4549 case SFNAME:
4550 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004551 status = RA_NOMATCH;
4552 else
4553 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004554 break;
4555
4556 case PRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004557 if (!vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004558 status = RA_NOMATCH;
4559 else
4560 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004561 break;
4562
4563 case SPRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004564 if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004565 status = RA_NOMATCH;
4566 else
4567 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004568 break;
4569
4570 case WHITE:
4571 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004572 status = RA_NOMATCH;
4573 else
4574 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004575 break;
4576
4577 case NWHITE:
4578 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004579 status = RA_NOMATCH;
4580 else
4581 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004582 break;
4583
4584 case DIGIT:
4585 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004586 status = RA_NOMATCH;
4587 else
4588 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004589 break;
4590
4591 case NDIGIT:
4592 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004593 status = RA_NOMATCH;
4594 else
4595 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004596 break;
4597
4598 case HEX:
4599 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004600 status = RA_NOMATCH;
4601 else
4602 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004603 break;
4604
4605 case NHEX:
4606 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004607 status = RA_NOMATCH;
4608 else
4609 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004610 break;
4611
4612 case OCTAL:
4613 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004614 status = RA_NOMATCH;
4615 else
4616 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004617 break;
4618
4619 case NOCTAL:
4620 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004621 status = RA_NOMATCH;
4622 else
4623 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004624 break;
4625
4626 case WORD:
4627 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004628 status = RA_NOMATCH;
4629 else
4630 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004631 break;
4632
4633 case NWORD:
4634 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004635 status = RA_NOMATCH;
4636 else
4637 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004638 break;
4639
4640 case HEAD:
4641 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004642 status = RA_NOMATCH;
4643 else
4644 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004645 break;
4646
4647 case NHEAD:
4648 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004649 status = RA_NOMATCH;
4650 else
4651 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004652 break;
4653
4654 case ALPHA:
4655 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004656 status = RA_NOMATCH;
4657 else
4658 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004659 break;
4660
4661 case NALPHA:
4662 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004663 status = RA_NOMATCH;
4664 else
4665 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004666 break;
4667
4668 case LOWER:
4669 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004670 status = RA_NOMATCH;
4671 else
4672 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004673 break;
4674
4675 case NLOWER:
4676 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004677 status = RA_NOMATCH;
4678 else
4679 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004680 break;
4681
4682 case UPPER:
4683 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004684 status = RA_NOMATCH;
4685 else
4686 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004687 break;
4688
4689 case NUPPER:
4690 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004691 status = RA_NOMATCH;
4692 else
4693 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004694 break;
4695
4696 case EXACTLY:
4697 {
4698 int len;
4699 char_u *opnd;
4700
4701 opnd = OPERAND(scan);
4702 /* Inline the first byte, for speed. */
4703 if (*opnd != *reginput
4704 && (!ireg_ic || (
4705#ifdef FEAT_MBYTE
4706 !enc_utf8 &&
4707#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004708 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004709 status = RA_NOMATCH;
4710 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004711 {
4712 /* match empty string always works; happens when "~" is
4713 * empty. */
4714 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004715 else
4716 {
4717 if (opnd[1] == NUL
Bram Moolenaar071d4272004-06-13 20:20:40 +00004718#ifdef FEAT_MBYTE
4719 && !(enc_utf8 && ireg_ic)
4720#endif
4721 )
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004722 {
4723 len = 1; /* matched a single byte above */
4724 }
4725 else
4726 {
4727 /* Need to match first byte again for multi-byte. */
4728 len = (int)STRLEN(opnd);
4729 if (cstrncmp(opnd, reginput, &len) != 0)
4730 status = RA_NOMATCH;
4731 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004732#ifdef FEAT_MBYTE
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004733 /* Check for following composing character, unless %C
4734 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004735 if (status != RA_NOMATCH
4736 && enc_utf8
4737 && UTF_COMPOSINGLIKE(reginput, reginput + len)
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004738 && !ireg_icombine
4739 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004740 {
4741 /* raaron: This code makes a composing character get
4742 * ignored, which is the correct behavior (sometimes)
4743 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004744 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004745 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004746#endif
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004747 if (status != RA_NOMATCH)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004748 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004749 }
4750 }
4751 break;
4752
4753 case ANYOF:
4754 case ANYBUT:
4755 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004756 status = RA_NOMATCH;
4757 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4758 status = RA_NOMATCH;
4759 else
4760 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004761 break;
4762
4763#ifdef FEAT_MBYTE
4764 case MULTIBYTECODE:
4765 if (has_mbyte)
4766 {
4767 int i, len;
4768 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004769 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004770
4771 opnd = OPERAND(scan);
4772 /* Safety check (just in case 'encoding' was changed since
4773 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004774 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004775 {
4776 status = RA_NOMATCH;
4777 break;
4778 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004779 if (enc_utf8)
4780 opndc = mb_ptr2char(opnd);
4781 if (enc_utf8 && utf_iscomposing(opndc))
4782 {
4783 /* When only a composing char is given match at any
4784 * position where that composing char appears. */
4785 status = RA_NOMATCH;
Bram Moolenaar0e462412015-03-31 14:17:31 +02004786 for (i = 0; reginput[i] != NUL;
4787 i += utf_ptr2len(reginput + i))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004788 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004789 inpc = mb_ptr2char(reginput + i);
4790 if (!utf_iscomposing(inpc))
4791 {
4792 if (i > 0)
4793 break;
4794 }
4795 else if (opndc == inpc)
4796 {
4797 /* Include all following composing chars. */
4798 len = i + mb_ptr2len(reginput + i);
4799 status = RA_MATCH;
4800 break;
4801 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004802 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004803 }
4804 else
4805 for (i = 0; i < len; ++i)
4806 if (opnd[i] != reginput[i])
4807 {
4808 status = RA_NOMATCH;
4809 break;
4810 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004811 reginput += len;
4812 }
4813 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004814 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004815 break;
4816#endif
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004817 case RE_COMPOSING:
4818#ifdef FEAT_MBYTE
4819 if (enc_utf8)
4820 {
4821 /* Skip composing characters. */
4822 while (utf_iscomposing(utf_ptr2char(reginput)))
4823 mb_cptr_adv(reginput);
4824 }
4825#endif
4826 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004827
4828 case NOTHING:
4829 break;
4830
4831 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004832 {
4833 int i;
4834 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004835
Bram Moolenaar582fd852005-03-28 20:58:01 +00004836 /*
4837 * When we run into BACK we need to check if we don't keep
4838 * looping without matching any input. The second and later
4839 * times a BACK is encountered it fails if the input is still
4840 * at the same position as the previous time.
4841 * The positions are stored in "backpos" and found by the
4842 * current value of "scan", the position in the RE program.
4843 */
4844 bp = (backpos_T *)backpos.ga_data;
4845 for (i = 0; i < backpos.ga_len; ++i)
4846 if (bp[i].bp_scan == scan)
4847 break;
4848 if (i == backpos.ga_len)
4849 {
4850 /* First time at this BACK, make room to store the pos. */
4851 if (ga_grow(&backpos, 1) == FAIL)
4852 status = RA_FAIL;
4853 else
4854 {
4855 /* get "ga_data" again, it may have changed */
4856 bp = (backpos_T *)backpos.ga_data;
4857 bp[i].bp_scan = scan;
4858 ++backpos.ga_len;
4859 }
4860 }
4861 else if (reg_save_equal(&bp[i].bp_pos))
4862 /* Still at same position as last time, fail. */
4863 status = RA_NOMATCH;
4864
4865 if (status != RA_FAIL && status != RA_NOMATCH)
4866 reg_save(&bp[i].bp_pos, &backpos);
4867 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004868 break;
4869
Bram Moolenaar071d4272004-06-13 20:20:40 +00004870 case MOPEN + 0: /* Match start: \zs */
4871 case MOPEN + 1: /* \( */
4872 case MOPEN + 2:
4873 case MOPEN + 3:
4874 case MOPEN + 4:
4875 case MOPEN + 5:
4876 case MOPEN + 6:
4877 case MOPEN + 7:
4878 case MOPEN + 8:
4879 case MOPEN + 9:
4880 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004881 no = op - MOPEN;
4882 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004883 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004884 if (rp == NULL)
4885 status = RA_FAIL;
4886 else
4887 {
4888 rp->rs_no = no;
4889 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4890 &reg_startp[no]);
4891 /* We simply continue and handle the result when done. */
4892 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004893 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004894 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004895
4896 case NOPEN: /* \%( */
4897 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004898 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004899 status = RA_FAIL;
4900 /* We simply continue and handle the result when done. */
4901 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004902
4903#ifdef FEAT_SYN_HL
4904 case ZOPEN + 1:
4905 case ZOPEN + 2:
4906 case ZOPEN + 3:
4907 case ZOPEN + 4:
4908 case ZOPEN + 5:
4909 case ZOPEN + 6:
4910 case ZOPEN + 7:
4911 case ZOPEN + 8:
4912 case ZOPEN + 9:
4913 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004914 no = op - ZOPEN;
4915 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004916 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004917 if (rp == NULL)
4918 status = RA_FAIL;
4919 else
4920 {
4921 rp->rs_no = no;
4922 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4923 &reg_startzp[no]);
4924 /* We simply continue and handle the result when done. */
4925 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004926 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004927 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004928#endif
4929
4930 case MCLOSE + 0: /* Match end: \ze */
4931 case MCLOSE + 1: /* \) */
4932 case MCLOSE + 2:
4933 case MCLOSE + 3:
4934 case MCLOSE + 4:
4935 case MCLOSE + 5:
4936 case MCLOSE + 6:
4937 case MCLOSE + 7:
4938 case MCLOSE + 8:
4939 case MCLOSE + 9:
4940 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004941 no = op - MCLOSE;
4942 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004943 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004944 if (rp == NULL)
4945 status = RA_FAIL;
4946 else
4947 {
4948 rp->rs_no = no;
4949 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4950 /* We simply continue and handle the result when done. */
4951 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004952 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004953 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004954
4955#ifdef FEAT_SYN_HL
4956 case ZCLOSE + 1: /* \) after \z( */
4957 case ZCLOSE + 2:
4958 case ZCLOSE + 3:
4959 case ZCLOSE + 4:
4960 case ZCLOSE + 5:
4961 case ZCLOSE + 6:
4962 case ZCLOSE + 7:
4963 case ZCLOSE + 8:
4964 case ZCLOSE + 9:
4965 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004966 no = op - ZCLOSE;
4967 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004968 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004969 if (rp == NULL)
4970 status = RA_FAIL;
4971 else
4972 {
4973 rp->rs_no = no;
4974 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4975 &reg_endzp[no]);
4976 /* We simply continue and handle the result when done. */
4977 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004978 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004979 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004980#endif
4981
4982 case BACKREF + 1:
4983 case BACKREF + 2:
4984 case BACKREF + 3:
4985 case BACKREF + 4:
4986 case BACKREF + 5:
4987 case BACKREF + 6:
4988 case BACKREF + 7:
4989 case BACKREF + 8:
4990 case BACKREF + 9:
4991 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004992 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004993
4994 no = op - BACKREF;
4995 cleanup_subexpr();
4996 if (!REG_MULTI) /* Single-line regexp */
4997 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004998 if (reg_startp[no] == NULL || reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004999 {
5000 /* Backref was not set: Match an empty string. */
5001 len = 0;
5002 }
5003 else
5004 {
5005 /* Compare current input with back-ref in the same
5006 * line. */
5007 len = (int)(reg_endp[no] - reg_startp[no]);
5008 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005009 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005010 }
5011 }
5012 else /* Multi-line regexp */
5013 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00005014 if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005015 {
5016 /* Backref was not set: Match an empty string. */
5017 len = 0;
5018 }
5019 else
5020 {
5021 if (reg_startpos[no].lnum == reglnum
5022 && reg_endpos[no].lnum == reglnum)
5023 {
5024 /* Compare back-ref within the current line. */
5025 len = reg_endpos[no].col - reg_startpos[no].col;
5026 if (cstrncmp(regline + reg_startpos[no].col,
5027 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005028 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005029 }
5030 else
5031 {
5032 /* Messy situation: Need to compare between two
5033 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005034 int r = match_with_backref(
Bram Moolenaar580abea2013-06-14 20:31:28 +02005035 reg_startpos[no].lnum,
5036 reg_startpos[no].col,
5037 reg_endpos[no].lnum,
5038 reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02005039 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005040
5041 if (r != RA_MATCH)
5042 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005043 }
5044 }
5045 }
5046
5047 /* Matched the backref, skip over it. */
5048 reginput += len;
5049 }
5050 break;
5051
5052#ifdef FEAT_SYN_HL
5053 case ZREF + 1:
5054 case ZREF + 2:
5055 case ZREF + 3:
5056 case ZREF + 4:
5057 case ZREF + 5:
5058 case ZREF + 6:
5059 case ZREF + 7:
5060 case ZREF + 8:
5061 case ZREF + 9:
5062 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005063 int len;
5064
5065 cleanup_zsubexpr();
5066 no = op - ZREF;
5067 if (re_extmatch_in != NULL
5068 && re_extmatch_in->matches[no] != NULL)
5069 {
5070 len = (int)STRLEN(re_extmatch_in->matches[no]);
5071 if (cstrncmp(re_extmatch_in->matches[no],
5072 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005073 status = RA_NOMATCH;
5074 else
5075 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005076 }
5077 else
5078 {
5079 /* Backref was not set: Match an empty string. */
5080 }
5081 }
5082 break;
5083#endif
5084
5085 case BRANCH:
5086 {
5087 if (OP(next) != BRANCH) /* No choice. */
5088 next = OPERAND(scan); /* Avoid recursion. */
5089 else
5090 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005091 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005092 if (rp == NULL)
5093 status = RA_FAIL;
5094 else
5095 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005096 }
5097 }
5098 break;
5099
5100 case BRACE_LIMITS:
5101 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005102 if (OP(next) == BRACE_SIMPLE)
5103 {
5104 bl_minval = OPERAND_MIN(scan);
5105 bl_maxval = OPERAND_MAX(scan);
5106 }
5107 else if (OP(next) >= BRACE_COMPLEX
5108 && OP(next) < BRACE_COMPLEX + 10)
5109 {
5110 no = OP(next) - BRACE_COMPLEX;
5111 brace_min[no] = OPERAND_MIN(scan);
5112 brace_max[no] = OPERAND_MAX(scan);
5113 brace_count[no] = 0;
5114 }
5115 else
5116 {
5117 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005118 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005119 }
5120 }
5121 break;
5122
5123 case BRACE_COMPLEX + 0:
5124 case BRACE_COMPLEX + 1:
5125 case BRACE_COMPLEX + 2:
5126 case BRACE_COMPLEX + 3:
5127 case BRACE_COMPLEX + 4:
5128 case BRACE_COMPLEX + 5:
5129 case BRACE_COMPLEX + 6:
5130 case BRACE_COMPLEX + 7:
5131 case BRACE_COMPLEX + 8:
5132 case BRACE_COMPLEX + 9:
5133 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005134 no = op - BRACE_COMPLEX;
5135 ++brace_count[no];
5136
5137 /* If not matched enough times yet, try one more */
5138 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005139 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005140 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005141 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005142 if (rp == NULL)
5143 status = RA_FAIL;
5144 else
5145 {
5146 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005147 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005148 next = OPERAND(scan);
5149 /* We continue and handle the result when done. */
5150 }
5151 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005152 }
5153
5154 /* If matched enough times, may try matching some more */
5155 if (brace_min[no] <= brace_max[no])
5156 {
5157 /* Range is the normal way around, use longest match */
5158 if (brace_count[no] <= brace_max[no])
5159 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005160 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005161 if (rp == NULL)
5162 status = RA_FAIL;
5163 else
5164 {
5165 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005166 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005167 next = OPERAND(scan);
5168 /* We continue and handle the result when done. */
5169 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005170 }
5171 }
5172 else
5173 {
5174 /* Range is backwards, use shortest match first */
5175 if (brace_count[no] <= brace_min[no])
5176 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005177 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005178 if (rp == NULL)
5179 status = RA_FAIL;
5180 else
5181 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005182 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005183 /* We continue and handle the result when done. */
5184 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005185 }
5186 }
5187 }
5188 break;
5189
5190 case BRACE_SIMPLE:
5191 case STAR:
5192 case PLUS:
5193 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005194 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005195
5196 /*
5197 * Lookahead to avoid useless match attempts when we know
5198 * what character comes next.
5199 */
5200 if (OP(next) == EXACTLY)
5201 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005202 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005203 if (ireg_ic)
5204 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005205 if (MB_ISUPPER(rst.nextb))
5206 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005207 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005208 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005209 }
5210 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005211 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005212 }
5213 else
5214 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005215 rst.nextb = NUL;
5216 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005217 }
5218 if (op != BRACE_SIMPLE)
5219 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005220 rst.minval = (op == STAR) ? 0 : 1;
5221 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005222 }
5223 else
5224 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005225 rst.minval = bl_minval;
5226 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005227 }
5228
5229 /*
5230 * When maxval > minval, try matching as much as possible, up
5231 * to maxval. When maxval < minval, try matching at least the
5232 * minimal number (since the range is backwards, that's also
5233 * maxval!).
5234 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005235 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005236 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005237 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005238 status = RA_FAIL;
5239 break;
5240 }
5241 if (rst.minval <= rst.maxval
5242 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5243 {
5244 /* It could match. Prepare for trying to match what
5245 * follows. The code is below. Parameters are stored in
5246 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005247 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005248 {
5249 EMSG(_(e_maxmempat));
5250 status = RA_FAIL;
5251 }
5252 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005253 status = RA_FAIL;
5254 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005255 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005256 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005257 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005258 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005259 if (rp == NULL)
5260 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005261 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005262 {
5263 *(((regstar_T *)rp) - 1) = rst;
5264 status = RA_BREAK; /* skip the restore bits */
5265 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005266 }
5267 }
5268 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005269 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005270
Bram Moolenaar071d4272004-06-13 20:20:40 +00005271 }
5272 break;
5273
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005274 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005275 case MATCH:
5276 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005277 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005278 if (rp == NULL)
5279 status = RA_FAIL;
5280 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005281 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005282 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005283 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005284 next = OPERAND(scan);
5285 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005286 }
5287 break;
5288
5289 case BEHIND:
5290 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005291 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005292 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005293 {
5294 EMSG(_(e_maxmempat));
5295 status = RA_FAIL;
5296 }
5297 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005298 status = RA_FAIL;
5299 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005300 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005301 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005302 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005303 if (rp == NULL)
5304 status = RA_FAIL;
5305 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005306 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005307 /* Need to save the subexpr to be able to restore them
5308 * when there is a match but we don't use it. */
5309 save_subexpr(((regbehind_T *)rp) - 1);
5310
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005311 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005312 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005313 /* First try if what follows matches. If it does then we
5314 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005315 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005316 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005317 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005318
5319 case BHPOS:
5320 if (REG_MULTI)
5321 {
5322 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5323 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005324 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005325 }
5326 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005327 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005328 break;
5329
5330 case NEWL:
Bram Moolenaar640009d2006-10-17 16:48:26 +00005331 if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
5332 || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005333 status = RA_NOMATCH;
5334 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005335 ADVANCE_REGINPUT();
5336 else
5337 reg_nextline();
5338 break;
5339
5340 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005341 status = RA_MATCH; /* Success! */
5342 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005343
5344 default:
5345 EMSG(_(e_re_corr));
5346#ifdef DEBUG
5347 printf("Illegal op code %d\n", op);
5348#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005349 status = RA_FAIL;
5350 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005351 }
5352 }
5353
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005354 /* If we can't continue sequentially, break the inner loop. */
5355 if (status != RA_CONT)
5356 break;
5357
5358 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005359 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005360
5361 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005362
5363 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005364 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005365 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005366 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005367 while (regstack.ga_len > 0 && status != RA_FAIL)
5368 {
5369 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5370 switch (rp->rs_state)
5371 {
5372 case RS_NOPEN:
5373 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005374 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005375 break;
5376
5377 case RS_MOPEN:
5378 /* Pop the state. Restore pointers when there is no match. */
5379 if (status == RA_NOMATCH)
5380 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5381 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005382 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005383 break;
5384
5385#ifdef FEAT_SYN_HL
5386 case RS_ZOPEN:
5387 /* Pop the state. Restore pointers when there is no match. */
5388 if (status == RA_NOMATCH)
5389 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5390 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005391 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005392 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005393#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005394
5395 case RS_MCLOSE:
5396 /* Pop the state. Restore pointers when there is no match. */
5397 if (status == RA_NOMATCH)
5398 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5399 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005400 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005401 break;
5402
5403#ifdef FEAT_SYN_HL
5404 case RS_ZCLOSE:
5405 /* Pop the state. Restore pointers when there is no match. */
5406 if (status == RA_NOMATCH)
5407 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5408 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005409 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005410 break;
5411#endif
5412
5413 case RS_BRANCH:
5414 if (status == RA_MATCH)
5415 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005416 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005417 else
5418 {
5419 if (status != RA_BREAK)
5420 {
5421 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005422 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005423 scan = rp->rs_scan;
5424 }
5425 if (scan == NULL || OP(scan) != BRANCH)
5426 {
5427 /* no more branches, didn't find a match */
5428 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005429 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005430 }
5431 else
5432 {
5433 /* Prepare to try a branch. */
5434 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005435 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005436 scan = OPERAND(scan);
5437 }
5438 }
5439 break;
5440
5441 case RS_BRCPLX_MORE:
5442 /* Pop the state. Restore pointers when there is no match. */
5443 if (status == RA_NOMATCH)
5444 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005445 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005446 --brace_count[rp->rs_no]; /* decrement match count */
5447 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005448 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005449 break;
5450
5451 case RS_BRCPLX_LONG:
5452 /* Pop the state. Restore pointers when there is no match. */
5453 if (status == RA_NOMATCH)
5454 {
5455 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005456 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005457 --brace_count[rp->rs_no];
5458 /* continue with the items after "\{}" */
5459 status = RA_CONT;
5460 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005461 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005462 if (status == RA_CONT)
5463 scan = regnext(scan);
5464 break;
5465
5466 case RS_BRCPLX_SHORT:
5467 /* Pop the state. Restore pointers when there is no match. */
5468 if (status == RA_NOMATCH)
5469 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005470 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005471 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005472 if (status == RA_NOMATCH)
5473 {
5474 scan = OPERAND(scan);
5475 status = RA_CONT;
5476 }
5477 break;
5478
5479 case RS_NOMATCH:
5480 /* Pop the state. If the operand matches for NOMATCH or
5481 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5482 * except for SUBPAT, and continue with the next item. */
5483 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5484 status = RA_NOMATCH;
5485 else
5486 {
5487 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005488 if (rp->rs_no != SUBPAT) /* zero-width */
5489 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005490 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005491 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005492 if (status == RA_CONT)
5493 scan = regnext(scan);
5494 break;
5495
5496 case RS_BEHIND1:
5497 if (status == RA_NOMATCH)
5498 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005499 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005500 regstack.ga_len -= sizeof(regbehind_T);
5501 }
5502 else
5503 {
5504 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5505 * the behind part does (not) match before the current
5506 * position in the input. This must be done at every
5507 * position in the input and checking if the match ends at
5508 * the current position. */
5509
5510 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005511 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005512
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005513 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005514 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005515 * result, hitting the start of the line or the previous
5516 * line (for multi-line matching).
5517 * Set behind_pos to where the match should end, BHPOS
5518 * will match it. Save the current value. */
5519 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5520 behind_pos = rp->rs_un.regsave;
5521
5522 rp->rs_state = RS_BEHIND2;
5523
Bram Moolenaar582fd852005-03-28 20:58:01 +00005524 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005525 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005526 }
5527 break;
5528
5529 case RS_BEHIND2:
5530 /*
5531 * Looping for BEHIND / NOBEHIND match.
5532 */
5533 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5534 {
5535 /* found a match that ends where "next" started */
5536 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5537 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005538 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5539 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005540 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005541 {
5542 /* But we didn't want a match. Need to restore the
5543 * subexpr, because what follows matched, so they have
5544 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005545 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005546 restore_subexpr(((regbehind_T *)rp) - 1);
5547 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005548 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005549 regstack.ga_len -= sizeof(regbehind_T);
5550 }
5551 else
5552 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005553 long limit;
5554
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005555 /* No match or a match that doesn't end where we want it: Go
5556 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005557 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005558 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005559 if (REG_MULTI)
5560 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005561 if (limit > 0
5562 && ((rp->rs_un.regsave.rs_u.pos.lnum
5563 < behind_pos.rs_u.pos.lnum
5564 ? (colnr_T)STRLEN(regline)
5565 : behind_pos.rs_u.pos.col)
5566 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5567 no = FAIL;
5568 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005569 {
5570 if (rp->rs_un.regsave.rs_u.pos.lnum
5571 < behind_pos.rs_u.pos.lnum
5572 || reg_getline(
5573 --rp->rs_un.regsave.rs_u.pos.lnum)
5574 == NULL)
5575 no = FAIL;
5576 else
5577 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005578 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005579 rp->rs_un.regsave.rs_u.pos.col =
5580 (colnr_T)STRLEN(regline);
5581 }
5582 }
5583 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005584 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005585#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005586 if (has_mbyte)
5587 rp->rs_un.regsave.rs_u.pos.col -=
5588 (*mb_head_off)(regline, regline
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005589 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005590 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005591#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005592 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005593 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005594 }
5595 else
5596 {
5597 if (rp->rs_un.regsave.rs_u.ptr == regline)
5598 no = FAIL;
5599 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005600 {
5601 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
5602 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5603 - rp->rs_un.regsave.rs_u.ptr) > limit)
5604 no = FAIL;
5605 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005606 }
5607 if (no == OK)
5608 {
5609 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005610 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005611 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005612 if (status == RA_MATCH)
5613 {
5614 /* We did match, so subexpr may have been changed,
5615 * need to restore them for the next try. */
5616 status = RA_NOMATCH;
5617 restore_subexpr(((regbehind_T *)rp) - 1);
5618 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005619 }
5620 else
5621 {
5622 /* Can't advance. For NOBEHIND that's a match. */
5623 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5624 if (rp->rs_no == NOBEHIND)
5625 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005626 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5627 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005628 status = RA_MATCH;
5629 }
5630 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005631 {
5632 /* We do want a proper match. Need to restore the
5633 * subexpr if we had a match, because they may have
5634 * been set. */
5635 if (status == RA_MATCH)
5636 {
5637 status = RA_NOMATCH;
5638 restore_subexpr(((regbehind_T *)rp) - 1);
5639 }
5640 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005641 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005642 regstack.ga_len -= sizeof(regbehind_T);
5643 }
5644 }
5645 break;
5646
5647 case RS_STAR_LONG:
5648 case RS_STAR_SHORT:
5649 {
5650 regstar_T *rst = ((regstar_T *)rp) - 1;
5651
5652 if (status == RA_MATCH)
5653 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005654 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005655 regstack.ga_len -= sizeof(regstar_T);
5656 break;
5657 }
5658
5659 /* Tried once already, restore input pointers. */
5660 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005661 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005662
5663 /* Repeat until we found a position where it could match. */
5664 for (;;)
5665 {
5666 if (status != RA_BREAK)
5667 {
5668 /* Tried first position already, advance. */
5669 if (rp->rs_state == RS_STAR_LONG)
5670 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005671 /* Trying for longest match, but couldn't or
5672 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005673 if (--rst->count < rst->minval)
5674 break;
5675 if (reginput == regline)
5676 {
5677 /* backup to last char of previous line */
5678 --reglnum;
5679 regline = reg_getline(reglnum);
5680 /* Just in case regrepeat() didn't count
5681 * right. */
5682 if (regline == NULL)
5683 break;
5684 reginput = regline + STRLEN(regline);
5685 fast_breakcheck();
5686 }
5687 else
5688 mb_ptr_back(regline, reginput);
5689 }
5690 else
5691 {
5692 /* Range is backwards, use shortest match first.
5693 * Careful: maxval and minval are exchanged!
5694 * Couldn't or didn't match: try advancing one
5695 * char. */
5696 if (rst->count == rst->minval
5697 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5698 break;
5699 ++rst->count;
5700 }
5701 if (got_int)
5702 break;
5703 }
5704 else
5705 status = RA_NOMATCH;
5706
5707 /* If it could match, try it. */
5708 if (rst->nextb == NUL || *reginput == rst->nextb
5709 || *reginput == rst->nextb_ic)
5710 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005711 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005712 scan = regnext(rp->rs_scan);
5713 status = RA_CONT;
5714 break;
5715 }
5716 }
5717 if (status != RA_CONT)
5718 {
5719 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005720 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005721 regstack.ga_len -= sizeof(regstar_T);
5722 status = RA_NOMATCH;
5723 }
5724 }
5725 break;
5726 }
5727
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005728 /* If we want to continue the inner loop or didn't pop a state
5729 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005730 if (status == RA_CONT || rp == (regitem_T *)
5731 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5732 break;
5733 }
5734
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005735 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005736 if (status == RA_CONT)
5737 continue;
5738
5739 /*
5740 * If the regstack is empty or something failed we are done.
5741 */
5742 if (regstack.ga_len == 0 || status == RA_FAIL)
5743 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005744 if (scan == NULL)
5745 {
5746 /*
5747 * We get here only if there's trouble -- normally "case END" is
5748 * the terminating point.
5749 */
5750 EMSG(_(e_re_corr));
5751#ifdef DEBUG
5752 printf("Premature EOL\n");
5753#endif
5754 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005755 if (status == RA_FAIL)
5756 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005757 return (status == RA_MATCH);
5758 }
5759
5760 } /* End of loop until the regstack is empty. */
5761
5762 /* NOTREACHED */
5763}
5764
5765/*
5766 * Push an item onto the regstack.
5767 * Returns pointer to new item. Returns NULL when out of memory.
5768 */
5769 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005770regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005771 regstate_T state;
5772 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005773{
5774 regitem_T *rp;
5775
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005776 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005777 {
5778 EMSG(_(e_maxmempat));
5779 return NULL;
5780 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005781 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005782 return NULL;
5783
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005784 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005785 rp->rs_state = state;
5786 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005787
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005788 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005789 return rp;
5790}
5791
5792/*
5793 * Pop an item from the regstack.
5794 */
5795 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005796regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005797 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005798{
5799 regitem_T *rp;
5800
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005801 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005802 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005803
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005804 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005805}
5806
Bram Moolenaar071d4272004-06-13 20:20:40 +00005807/*
5808 * regrepeat - repeatedly match something simple, return how many.
5809 * Advances reginput (and reglnum) to just after the matched chars.
5810 */
5811 static int
5812regrepeat(p, maxcount)
5813 char_u *p;
5814 long maxcount; /* maximum number of matches allowed */
5815{
5816 long count = 0;
5817 char_u *scan;
5818 char_u *opnd;
5819 int mask;
5820 int testval = 0;
5821
5822 scan = reginput; /* Make local copy of reginput for speed. */
5823 opnd = OPERAND(p);
5824 switch (OP(p))
5825 {
5826 case ANY:
5827 case ANY + ADD_NL:
5828 while (count < maxcount)
5829 {
5830 /* Matching anything means we continue until end-of-line (or
5831 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5832 while (*scan != NUL && count < maxcount)
5833 {
5834 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005835 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005836 }
Bram Moolenaar640009d2006-10-17 16:48:26 +00005837 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5838 || reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005839 break;
5840 ++count; /* count the line-break */
5841 reg_nextline();
5842 scan = reginput;
5843 if (got_int)
5844 break;
5845 }
5846 break;
5847
5848 case IDENT:
5849 case IDENT + ADD_NL:
5850 testval = TRUE;
5851 /*FALLTHROUGH*/
5852 case SIDENT:
5853 case SIDENT + ADD_NL:
5854 while (count < maxcount)
5855 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005856 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005857 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005858 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005859 }
5860 else if (*scan == NUL)
5861 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005862 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5863 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005864 break;
5865 reg_nextline();
5866 scan = reginput;
5867 if (got_int)
5868 break;
5869 }
5870 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5871 ++scan;
5872 else
5873 break;
5874 ++count;
5875 }
5876 break;
5877
5878 case KWORD:
5879 case KWORD + ADD_NL:
5880 testval = TRUE;
5881 /*FALLTHROUGH*/
5882 case SKWORD:
5883 case SKWORD + ADD_NL:
5884 while (count < maxcount)
5885 {
Bram Moolenaarf813a182013-01-30 13:59:37 +01005886 if (vim_iswordp_buf(scan, reg_buf)
5887 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005888 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005889 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005890 }
5891 else if (*scan == NUL)
5892 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005893 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5894 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005895 break;
5896 reg_nextline();
5897 scan = reginput;
5898 if (got_int)
5899 break;
5900 }
5901 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5902 ++scan;
5903 else
5904 break;
5905 ++count;
5906 }
5907 break;
5908
5909 case FNAME:
5910 case FNAME + ADD_NL:
5911 testval = TRUE;
5912 /*FALLTHROUGH*/
5913 case SFNAME:
5914 case SFNAME + ADD_NL:
5915 while (count < maxcount)
5916 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005917 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005918 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005919 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005920 }
5921 else if (*scan == NUL)
5922 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005923 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5924 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005925 break;
5926 reg_nextline();
5927 scan = reginput;
5928 if (got_int)
5929 break;
5930 }
5931 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5932 ++scan;
5933 else
5934 break;
5935 ++count;
5936 }
5937 break;
5938
5939 case PRINT:
5940 case PRINT + ADD_NL:
5941 testval = TRUE;
5942 /*FALLTHROUGH*/
5943 case SPRINT:
5944 case SPRINT + ADD_NL:
5945 while (count < maxcount)
5946 {
5947 if (*scan == NUL)
5948 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005949 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5950 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005951 break;
5952 reg_nextline();
5953 scan = reginput;
5954 if (got_int)
5955 break;
5956 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005957 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5958 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005959 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005960 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005961 }
5962 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5963 ++scan;
5964 else
5965 break;
5966 ++count;
5967 }
5968 break;
5969
5970 case WHITE:
5971 case WHITE + ADD_NL:
5972 testval = mask = RI_WHITE;
5973do_class:
5974 while (count < maxcount)
5975 {
5976#ifdef FEAT_MBYTE
5977 int l;
5978#endif
5979 if (*scan == NUL)
5980 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005981 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5982 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005983 break;
5984 reg_nextline();
5985 scan = reginput;
5986 if (got_int)
5987 break;
5988 }
5989#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005990 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005991 {
5992 if (testval != 0)
5993 break;
5994 scan += l;
5995 }
5996#endif
5997 else if ((class_tab[*scan] & mask) == testval)
5998 ++scan;
5999 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6000 ++scan;
6001 else
6002 break;
6003 ++count;
6004 }
6005 break;
6006
6007 case NWHITE:
6008 case NWHITE + ADD_NL:
6009 mask = RI_WHITE;
6010 goto do_class;
6011 case DIGIT:
6012 case DIGIT + ADD_NL:
6013 testval = mask = RI_DIGIT;
6014 goto do_class;
6015 case NDIGIT:
6016 case NDIGIT + ADD_NL:
6017 mask = RI_DIGIT;
6018 goto do_class;
6019 case HEX:
6020 case HEX + ADD_NL:
6021 testval = mask = RI_HEX;
6022 goto do_class;
6023 case NHEX:
6024 case NHEX + ADD_NL:
6025 mask = RI_HEX;
6026 goto do_class;
6027 case OCTAL:
6028 case OCTAL + ADD_NL:
6029 testval = mask = RI_OCTAL;
6030 goto do_class;
6031 case NOCTAL:
6032 case NOCTAL + ADD_NL:
6033 mask = RI_OCTAL;
6034 goto do_class;
6035 case WORD:
6036 case WORD + ADD_NL:
6037 testval = mask = RI_WORD;
6038 goto do_class;
6039 case NWORD:
6040 case NWORD + ADD_NL:
6041 mask = RI_WORD;
6042 goto do_class;
6043 case HEAD:
6044 case HEAD + ADD_NL:
6045 testval = mask = RI_HEAD;
6046 goto do_class;
6047 case NHEAD:
6048 case NHEAD + ADD_NL:
6049 mask = RI_HEAD;
6050 goto do_class;
6051 case ALPHA:
6052 case ALPHA + ADD_NL:
6053 testval = mask = RI_ALPHA;
6054 goto do_class;
6055 case NALPHA:
6056 case NALPHA + ADD_NL:
6057 mask = RI_ALPHA;
6058 goto do_class;
6059 case LOWER:
6060 case LOWER + ADD_NL:
6061 testval = mask = RI_LOWER;
6062 goto do_class;
6063 case NLOWER:
6064 case NLOWER + ADD_NL:
6065 mask = RI_LOWER;
6066 goto do_class;
6067 case UPPER:
6068 case UPPER + ADD_NL:
6069 testval = mask = RI_UPPER;
6070 goto do_class;
6071 case NUPPER:
6072 case NUPPER + ADD_NL:
6073 mask = RI_UPPER;
6074 goto do_class;
6075
6076 case EXACTLY:
6077 {
6078 int cu, cl;
6079
6080 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006081 * would have been used for it. It does handle single-byte
6082 * characters, such as latin1. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006083 if (ireg_ic)
6084 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006085 cu = MB_TOUPPER(*opnd);
6086 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006087 while (count < maxcount && (*scan == cu || *scan == cl))
6088 {
6089 count++;
6090 scan++;
6091 }
6092 }
6093 else
6094 {
6095 cu = *opnd;
6096 while (count < maxcount && *scan == cu)
6097 {
6098 count++;
6099 scan++;
6100 }
6101 }
6102 break;
6103 }
6104
6105#ifdef FEAT_MBYTE
6106 case MULTIBYTECODE:
6107 {
6108 int i, len, cf = 0;
6109
6110 /* Safety check (just in case 'encoding' was changed since
6111 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006112 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006113 {
6114 if (ireg_ic && enc_utf8)
6115 cf = utf_fold(utf_ptr2char(opnd));
Bram Moolenaar069dd082015-05-04 09:56:49 +02006116 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006117 {
6118 for (i = 0; i < len; ++i)
6119 if (opnd[i] != scan[i])
6120 break;
6121 if (i < len && (!ireg_ic || !enc_utf8
6122 || utf_fold(utf_ptr2char(scan)) != cf))
6123 break;
6124 scan += len;
6125 ++count;
6126 }
6127 }
6128 }
6129 break;
6130#endif
6131
6132 case ANYOF:
6133 case ANYOF + ADD_NL:
6134 testval = TRUE;
6135 /*FALLTHROUGH*/
6136
6137 case ANYBUT:
6138 case ANYBUT + ADD_NL:
6139 while (count < maxcount)
6140 {
6141#ifdef FEAT_MBYTE
6142 int len;
6143#endif
6144 if (*scan == NUL)
6145 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00006146 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6147 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006148 break;
6149 reg_nextline();
6150 scan = reginput;
6151 if (got_int)
6152 break;
6153 }
6154 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6155 ++scan;
6156#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006157 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006158 {
6159 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6160 break;
6161 scan += len;
6162 }
6163#endif
6164 else
6165 {
6166 if ((cstrchr(opnd, *scan) == NULL) == testval)
6167 break;
6168 ++scan;
6169 }
6170 ++count;
6171 }
6172 break;
6173
6174 case NEWL:
6175 while (count < maxcount
Bram Moolenaar640009d2006-10-17 16:48:26 +00006176 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
6177 && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006178 {
6179 count++;
6180 if (reg_line_lbr)
6181 ADVANCE_REGINPUT();
6182 else
6183 reg_nextline();
6184 scan = reginput;
6185 if (got_int)
6186 break;
6187 }
6188 break;
6189
6190 default: /* Oh dear. Called inappropriately. */
6191 EMSG(_(e_re_corr));
6192#ifdef DEBUG
6193 printf("Called regrepeat with op code %d\n", OP(p));
6194#endif
6195 break;
6196 }
6197
6198 reginput = scan;
6199
6200 return (int)count;
6201}
6202
6203/*
6204 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006205 * Returns NULL when calculating size, when there is no next item and when
6206 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006207 */
6208 static char_u *
6209regnext(p)
6210 char_u *p;
6211{
6212 int offset;
6213
Bram Moolenaard3005802009-11-25 17:21:32 +00006214 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006215 return NULL;
6216
6217 offset = NEXT(p);
6218 if (offset == 0)
6219 return NULL;
6220
Bram Moolenaar582fd852005-03-28 20:58:01 +00006221 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006222 return p - offset;
6223 else
6224 return p + offset;
6225}
6226
6227/*
6228 * Check the regexp program for its magic number.
6229 * Return TRUE if it's wrong.
6230 */
6231 static int
6232prog_magic_wrong()
6233{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006234 regprog_T *prog;
6235
6236 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
6237 if (prog->engine == &nfa_regengine)
6238 /* For NFA matcher we don't check the magic */
6239 return FALSE;
6240
6241 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006242 {
6243 EMSG(_(e_re_corr));
6244 return TRUE;
6245 }
6246 return FALSE;
6247}
6248
6249/*
6250 * Cleanup the subexpressions, if this wasn't done yet.
6251 * This construction is used to clear the subexpressions only when they are
6252 * used (to increase speed).
6253 */
6254 static void
6255cleanup_subexpr()
6256{
6257 if (need_clear_subexpr)
6258 {
6259 if (REG_MULTI)
6260 {
6261 /* Use 0xff to set lnum to -1 */
6262 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6263 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6264 }
6265 else
6266 {
6267 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6268 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
6269 }
6270 need_clear_subexpr = FALSE;
6271 }
6272}
6273
6274#ifdef FEAT_SYN_HL
6275 static void
6276cleanup_zsubexpr()
6277{
6278 if (need_clear_zsubexpr)
6279 {
6280 if (REG_MULTI)
6281 {
6282 /* Use 0xff to set lnum to -1 */
6283 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6284 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6285 }
6286 else
6287 {
6288 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6289 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6290 }
6291 need_clear_zsubexpr = FALSE;
6292 }
6293}
6294#endif
6295
6296/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006297 * Save the current subexpr to "bp", so that they can be restored
6298 * later by restore_subexpr().
6299 */
6300 static void
6301save_subexpr(bp)
6302 regbehind_T *bp;
6303{
6304 int i;
6305
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006306 /* When "need_clear_subexpr" is set we don't need to save the values, only
6307 * remember that this flag needs to be set again when restoring. */
6308 bp->save_need_clear_subexpr = need_clear_subexpr;
6309 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006310 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006311 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006312 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006313 if (REG_MULTI)
6314 {
6315 bp->save_start[i].se_u.pos = reg_startpos[i];
6316 bp->save_end[i].se_u.pos = reg_endpos[i];
6317 }
6318 else
6319 {
6320 bp->save_start[i].se_u.ptr = reg_startp[i];
6321 bp->save_end[i].se_u.ptr = reg_endp[i];
6322 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006323 }
6324 }
6325}
6326
6327/*
6328 * Restore the subexpr from "bp".
6329 */
6330 static void
6331restore_subexpr(bp)
6332 regbehind_T *bp;
6333{
6334 int i;
6335
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006336 /* Only need to restore saved values when they are not to be cleared. */
6337 need_clear_subexpr = bp->save_need_clear_subexpr;
6338 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006339 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006340 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006341 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006342 if (REG_MULTI)
6343 {
6344 reg_startpos[i] = bp->save_start[i].se_u.pos;
6345 reg_endpos[i] = bp->save_end[i].se_u.pos;
6346 }
6347 else
6348 {
6349 reg_startp[i] = bp->save_start[i].se_u.ptr;
6350 reg_endp[i] = bp->save_end[i].se_u.ptr;
6351 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006352 }
6353 }
6354}
6355
6356/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006357 * Advance reglnum, regline and reginput to the next line.
6358 */
6359 static void
6360reg_nextline()
6361{
6362 regline = reg_getline(++reglnum);
6363 reginput = regline;
6364 fast_breakcheck();
6365}
6366
6367/*
6368 * Save the input line and position in a regsave_T.
6369 */
6370 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006371reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006372 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006373 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006374{
6375 if (REG_MULTI)
6376 {
6377 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6378 save->rs_u.pos.lnum = reglnum;
6379 }
6380 else
6381 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006382 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006383}
6384
6385/*
6386 * Restore the input line and position from a regsave_T.
6387 */
6388 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006389reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006390 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006391 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006392{
6393 if (REG_MULTI)
6394 {
6395 if (reglnum != save->rs_u.pos.lnum)
6396 {
6397 /* only call reg_getline() when the line number changed to save
6398 * a bit of time */
6399 reglnum = save->rs_u.pos.lnum;
6400 regline = reg_getline(reglnum);
6401 }
6402 reginput = regline + save->rs_u.pos.col;
6403 }
6404 else
6405 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006406 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006407}
6408
6409/*
6410 * Return TRUE if current position is equal to saved position.
6411 */
6412 static int
6413reg_save_equal(save)
6414 regsave_T *save;
6415{
6416 if (REG_MULTI)
6417 return reglnum == save->rs_u.pos.lnum
6418 && reginput == regline + save->rs_u.pos.col;
6419 return reginput == save->rs_u.ptr;
6420}
6421
6422/*
6423 * Tentatively set the sub-expression start to the current position (after
6424 * calling regmatch() they will have changed). Need to save the existing
6425 * values for when there is no match.
6426 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6427 * depending on REG_MULTI.
6428 */
6429 static void
6430save_se_multi(savep, posp)
6431 save_se_T *savep;
6432 lpos_T *posp;
6433{
6434 savep->se_u.pos = *posp;
6435 posp->lnum = reglnum;
6436 posp->col = (colnr_T)(reginput - regline);
6437}
6438
6439 static void
6440save_se_one(savep, pp)
6441 save_se_T *savep;
6442 char_u **pp;
6443{
6444 savep->se_u.ptr = *pp;
6445 *pp = reginput;
6446}
6447
6448/*
6449 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6450 */
6451 static int
6452re_num_cmp(val, scan)
6453 long_u val;
6454 char_u *scan;
6455{
6456 long_u n = OPERAND_MIN(scan);
6457
6458 if (OPERAND_CMP(scan) == '>')
6459 return val > n;
6460 if (OPERAND_CMP(scan) == '<')
6461 return val < n;
6462 return val == n;
6463}
6464
Bram Moolenaar580abea2013-06-14 20:31:28 +02006465/*
6466 * Check whether a backreference matches.
6467 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006468 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6469 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006470 */
6471 static int
6472match_with_backref(start_lnum, start_col, end_lnum, end_col, bytelen)
6473 linenr_T start_lnum;
6474 colnr_T start_col;
6475 linenr_T end_lnum;
6476 colnr_T end_col;
6477 int *bytelen;
6478{
6479 linenr_T clnum = start_lnum;
6480 colnr_T ccol = start_col;
6481 int len;
6482 char_u *p;
6483
6484 if (bytelen != NULL)
6485 *bytelen = 0;
6486 for (;;)
6487 {
6488 /* Since getting one line may invalidate the other, need to make copy.
6489 * Slow! */
6490 if (regline != reg_tofree)
6491 {
6492 len = (int)STRLEN(regline);
6493 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6494 {
6495 len += 50; /* get some extra */
6496 vim_free(reg_tofree);
6497 reg_tofree = alloc(len);
6498 if (reg_tofree == NULL)
6499 return RA_FAIL; /* out of memory!*/
6500 reg_tofreelen = len;
6501 }
6502 STRCPY(reg_tofree, regline);
6503 reginput = reg_tofree + (reginput - regline);
6504 regline = reg_tofree;
6505 }
6506
6507 /* Get the line to compare with. */
6508 p = reg_getline(clnum);
6509 if (clnum == end_lnum)
6510 len = end_col - ccol;
6511 else
6512 len = (int)STRLEN(p + ccol);
6513
6514 if (cstrncmp(p + ccol, reginput, &len) != 0)
6515 return RA_NOMATCH; /* doesn't match */
6516 if (bytelen != NULL)
6517 *bytelen += len;
6518 if (clnum == end_lnum)
6519 break; /* match and at end! */
6520 if (reglnum >= reg_maxline)
6521 return RA_NOMATCH; /* text too short */
6522
6523 /* Advance to next line. */
6524 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006525 if (bytelen != NULL)
6526 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006527 ++clnum;
6528 ccol = 0;
6529 if (got_int)
6530 return RA_FAIL;
6531 }
6532
6533 /* found a match! Note that regline may now point to a copy of the line,
6534 * that should not matter. */
6535 return RA_MATCH;
6536}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006537
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006538#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006539
6540/*
6541 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6542 */
6543 static void
6544regdump(pattern, r)
6545 char_u *pattern;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006546 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006547{
6548 char_u *s;
6549 int op = EXACTLY; /* Arbitrary non-END op. */
6550 char_u *next;
6551 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006552 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006553
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006554#ifdef BT_REGEXP_LOG
6555 f = fopen("bt_regexp_log.log", "a");
6556#else
6557 f = stdout;
6558#endif
6559 if (f == NULL)
6560 return;
6561 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006562
6563 s = r->program + 1;
6564 /*
6565 * Loop until we find the END that isn't before a referred next (an END
6566 * can also appear in a NOMATCH operand).
6567 */
6568 while (op != END || s <= end)
6569 {
6570 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006571 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006572 next = regnext(s);
6573 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006574 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006575 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006576 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006577 if (end < next)
6578 end = next;
6579 if (op == BRACE_LIMITS)
6580 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006581 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006582 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006583 s += 8;
6584 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006585 else if (op == BEHIND || op == NOBEHIND)
6586 {
6587 /* one int */
6588 fprintf(f, " count %ld", OPERAND_MIN(s));
6589 s += 4;
6590 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006591 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6592 {
6593 /* one int plus comperator */
6594 fprintf(f, " count %ld", OPERAND_MIN(s));
6595 s += 5;
6596 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006597 s += 3;
6598 if (op == ANYOF || op == ANYOF + ADD_NL
6599 || op == ANYBUT || op == ANYBUT + ADD_NL
6600 || op == EXACTLY)
6601 {
6602 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006603 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006604 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006605 fprintf(f, "%c", *s++);
6606 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006607 s++;
6608 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006609 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006610 }
6611
6612 /* Header fields of interest. */
6613 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006614 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006615 ? (char *)transchar(r->regstart)
6616 : "multibyte", r->regstart);
6617 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006618 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006619 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006620 fprintf(f, "must have \"%s\"", r->regmust);
6621 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006622
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006623#ifdef BT_REGEXP_LOG
6624 fclose(f);
6625#endif
6626}
6627#endif /* BT_REGEXP_DUMP */
6628
6629#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006630/*
6631 * regprop - printable representation of opcode
6632 */
6633 static char_u *
6634regprop(op)
6635 char_u *op;
6636{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006637 char *p;
6638 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006639
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006640 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006641
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006642 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006643 {
6644 case BOL:
6645 p = "BOL";
6646 break;
6647 case EOL:
6648 p = "EOL";
6649 break;
6650 case RE_BOF:
6651 p = "BOF";
6652 break;
6653 case RE_EOF:
6654 p = "EOF";
6655 break;
6656 case CURSOR:
6657 p = "CURSOR";
6658 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006659 case RE_VISUAL:
6660 p = "RE_VISUAL";
6661 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006662 case RE_LNUM:
6663 p = "RE_LNUM";
6664 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006665 case RE_MARK:
6666 p = "RE_MARK";
6667 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006668 case RE_COL:
6669 p = "RE_COL";
6670 break;
6671 case RE_VCOL:
6672 p = "RE_VCOL";
6673 break;
6674 case BOW:
6675 p = "BOW";
6676 break;
6677 case EOW:
6678 p = "EOW";
6679 break;
6680 case ANY:
6681 p = "ANY";
6682 break;
6683 case ANY + ADD_NL:
6684 p = "ANY+NL";
6685 break;
6686 case ANYOF:
6687 p = "ANYOF";
6688 break;
6689 case ANYOF + ADD_NL:
6690 p = "ANYOF+NL";
6691 break;
6692 case ANYBUT:
6693 p = "ANYBUT";
6694 break;
6695 case ANYBUT + ADD_NL:
6696 p = "ANYBUT+NL";
6697 break;
6698 case IDENT:
6699 p = "IDENT";
6700 break;
6701 case IDENT + ADD_NL:
6702 p = "IDENT+NL";
6703 break;
6704 case SIDENT:
6705 p = "SIDENT";
6706 break;
6707 case SIDENT + ADD_NL:
6708 p = "SIDENT+NL";
6709 break;
6710 case KWORD:
6711 p = "KWORD";
6712 break;
6713 case KWORD + ADD_NL:
6714 p = "KWORD+NL";
6715 break;
6716 case SKWORD:
6717 p = "SKWORD";
6718 break;
6719 case SKWORD + ADD_NL:
6720 p = "SKWORD+NL";
6721 break;
6722 case FNAME:
6723 p = "FNAME";
6724 break;
6725 case FNAME + ADD_NL:
6726 p = "FNAME+NL";
6727 break;
6728 case SFNAME:
6729 p = "SFNAME";
6730 break;
6731 case SFNAME + ADD_NL:
6732 p = "SFNAME+NL";
6733 break;
6734 case PRINT:
6735 p = "PRINT";
6736 break;
6737 case PRINT + ADD_NL:
6738 p = "PRINT+NL";
6739 break;
6740 case SPRINT:
6741 p = "SPRINT";
6742 break;
6743 case SPRINT + ADD_NL:
6744 p = "SPRINT+NL";
6745 break;
6746 case WHITE:
6747 p = "WHITE";
6748 break;
6749 case WHITE + ADD_NL:
6750 p = "WHITE+NL";
6751 break;
6752 case NWHITE:
6753 p = "NWHITE";
6754 break;
6755 case NWHITE + ADD_NL:
6756 p = "NWHITE+NL";
6757 break;
6758 case DIGIT:
6759 p = "DIGIT";
6760 break;
6761 case DIGIT + ADD_NL:
6762 p = "DIGIT+NL";
6763 break;
6764 case NDIGIT:
6765 p = "NDIGIT";
6766 break;
6767 case NDIGIT + ADD_NL:
6768 p = "NDIGIT+NL";
6769 break;
6770 case HEX:
6771 p = "HEX";
6772 break;
6773 case HEX + ADD_NL:
6774 p = "HEX+NL";
6775 break;
6776 case NHEX:
6777 p = "NHEX";
6778 break;
6779 case NHEX + ADD_NL:
6780 p = "NHEX+NL";
6781 break;
6782 case OCTAL:
6783 p = "OCTAL";
6784 break;
6785 case OCTAL + ADD_NL:
6786 p = "OCTAL+NL";
6787 break;
6788 case NOCTAL:
6789 p = "NOCTAL";
6790 break;
6791 case NOCTAL + ADD_NL:
6792 p = "NOCTAL+NL";
6793 break;
6794 case WORD:
6795 p = "WORD";
6796 break;
6797 case WORD + ADD_NL:
6798 p = "WORD+NL";
6799 break;
6800 case NWORD:
6801 p = "NWORD";
6802 break;
6803 case NWORD + ADD_NL:
6804 p = "NWORD+NL";
6805 break;
6806 case HEAD:
6807 p = "HEAD";
6808 break;
6809 case HEAD + ADD_NL:
6810 p = "HEAD+NL";
6811 break;
6812 case NHEAD:
6813 p = "NHEAD";
6814 break;
6815 case NHEAD + ADD_NL:
6816 p = "NHEAD+NL";
6817 break;
6818 case ALPHA:
6819 p = "ALPHA";
6820 break;
6821 case ALPHA + ADD_NL:
6822 p = "ALPHA+NL";
6823 break;
6824 case NALPHA:
6825 p = "NALPHA";
6826 break;
6827 case NALPHA + ADD_NL:
6828 p = "NALPHA+NL";
6829 break;
6830 case LOWER:
6831 p = "LOWER";
6832 break;
6833 case LOWER + ADD_NL:
6834 p = "LOWER+NL";
6835 break;
6836 case NLOWER:
6837 p = "NLOWER";
6838 break;
6839 case NLOWER + ADD_NL:
6840 p = "NLOWER+NL";
6841 break;
6842 case UPPER:
6843 p = "UPPER";
6844 break;
6845 case UPPER + ADD_NL:
6846 p = "UPPER+NL";
6847 break;
6848 case NUPPER:
6849 p = "NUPPER";
6850 break;
6851 case NUPPER + ADD_NL:
6852 p = "NUPPER+NL";
6853 break;
6854 case BRANCH:
6855 p = "BRANCH";
6856 break;
6857 case EXACTLY:
6858 p = "EXACTLY";
6859 break;
6860 case NOTHING:
6861 p = "NOTHING";
6862 break;
6863 case BACK:
6864 p = "BACK";
6865 break;
6866 case END:
6867 p = "END";
6868 break;
6869 case MOPEN + 0:
6870 p = "MATCH START";
6871 break;
6872 case MOPEN + 1:
6873 case MOPEN + 2:
6874 case MOPEN + 3:
6875 case MOPEN + 4:
6876 case MOPEN + 5:
6877 case MOPEN + 6:
6878 case MOPEN + 7:
6879 case MOPEN + 8:
6880 case MOPEN + 9:
6881 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6882 p = NULL;
6883 break;
6884 case MCLOSE + 0:
6885 p = "MATCH END";
6886 break;
6887 case MCLOSE + 1:
6888 case MCLOSE + 2:
6889 case MCLOSE + 3:
6890 case MCLOSE + 4:
6891 case MCLOSE + 5:
6892 case MCLOSE + 6:
6893 case MCLOSE + 7:
6894 case MCLOSE + 8:
6895 case MCLOSE + 9:
6896 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6897 p = NULL;
6898 break;
6899 case BACKREF + 1:
6900 case BACKREF + 2:
6901 case BACKREF + 3:
6902 case BACKREF + 4:
6903 case BACKREF + 5:
6904 case BACKREF + 6:
6905 case BACKREF + 7:
6906 case BACKREF + 8:
6907 case BACKREF + 9:
6908 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6909 p = NULL;
6910 break;
6911 case NOPEN:
6912 p = "NOPEN";
6913 break;
6914 case NCLOSE:
6915 p = "NCLOSE";
6916 break;
6917#ifdef FEAT_SYN_HL
6918 case ZOPEN + 1:
6919 case ZOPEN + 2:
6920 case ZOPEN + 3:
6921 case ZOPEN + 4:
6922 case ZOPEN + 5:
6923 case ZOPEN + 6:
6924 case ZOPEN + 7:
6925 case ZOPEN + 8:
6926 case ZOPEN + 9:
6927 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6928 p = NULL;
6929 break;
6930 case ZCLOSE + 1:
6931 case ZCLOSE + 2:
6932 case ZCLOSE + 3:
6933 case ZCLOSE + 4:
6934 case ZCLOSE + 5:
6935 case ZCLOSE + 6:
6936 case ZCLOSE + 7:
6937 case ZCLOSE + 8:
6938 case ZCLOSE + 9:
6939 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6940 p = NULL;
6941 break;
6942 case ZREF + 1:
6943 case ZREF + 2:
6944 case ZREF + 3:
6945 case ZREF + 4:
6946 case ZREF + 5:
6947 case ZREF + 6:
6948 case ZREF + 7:
6949 case ZREF + 8:
6950 case ZREF + 9:
6951 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6952 p = NULL;
6953 break;
6954#endif
6955 case STAR:
6956 p = "STAR";
6957 break;
6958 case PLUS:
6959 p = "PLUS";
6960 break;
6961 case NOMATCH:
6962 p = "NOMATCH";
6963 break;
6964 case MATCH:
6965 p = "MATCH";
6966 break;
6967 case BEHIND:
6968 p = "BEHIND";
6969 break;
6970 case NOBEHIND:
6971 p = "NOBEHIND";
6972 break;
6973 case SUBPAT:
6974 p = "SUBPAT";
6975 break;
6976 case BRACE_LIMITS:
6977 p = "BRACE_LIMITS";
6978 break;
6979 case BRACE_SIMPLE:
6980 p = "BRACE_SIMPLE";
6981 break;
6982 case BRACE_COMPLEX + 0:
6983 case BRACE_COMPLEX + 1:
6984 case BRACE_COMPLEX + 2:
6985 case BRACE_COMPLEX + 3:
6986 case BRACE_COMPLEX + 4:
6987 case BRACE_COMPLEX + 5:
6988 case BRACE_COMPLEX + 6:
6989 case BRACE_COMPLEX + 7:
6990 case BRACE_COMPLEX + 8:
6991 case BRACE_COMPLEX + 9:
6992 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6993 p = NULL;
6994 break;
6995#ifdef FEAT_MBYTE
6996 case MULTIBYTECODE:
6997 p = "MULTIBYTECODE";
6998 break;
6999#endif
7000 case NEWL:
7001 p = "NEWL";
7002 break;
7003 default:
7004 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
7005 p = NULL;
7006 break;
7007 }
7008 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007009 STRCAT(buf, p);
7010 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007011}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007012#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007013
Bram Moolenaarfb031402014-09-09 17:18:49 +02007014/*
7015 * Used in a place where no * or \+ can follow.
7016 */
7017 static int
7018re_mult_next(what)
7019 char *what;
7020{
7021 if (re_multi_type(peekchr()) == MULTI_MULT)
7022 EMSG2_RET_FAIL(_("E888: (NFA regexp) cannot repeat %s"), what);
7023 return OK;
7024}
7025
Bram Moolenaar071d4272004-06-13 20:20:40 +00007026#ifdef FEAT_MBYTE
7027static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
7028
7029typedef struct
7030{
7031 int a, b, c;
7032} decomp_T;
7033
7034
7035/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00007036static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00007037{
7038 {0x5e2,0,0}, /* 0xfb20 alt ayin */
7039 {0x5d0,0,0}, /* 0xfb21 alt alef */
7040 {0x5d3,0,0}, /* 0xfb22 alt dalet */
7041 {0x5d4,0,0}, /* 0xfb23 alt he */
7042 {0x5db,0,0}, /* 0xfb24 alt kaf */
7043 {0x5dc,0,0}, /* 0xfb25 alt lamed */
7044 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
7045 {0x5e8,0,0}, /* 0xfb27 alt resh */
7046 {0x5ea,0,0}, /* 0xfb28 alt tav */
7047 {'+', 0, 0}, /* 0xfb29 alt plus */
7048 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
7049 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
7050 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
7051 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
7052 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
7053 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
7054 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
7055 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
7056 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
7057 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
7058 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
7059 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
7060 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
7061 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
7062 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
7063 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
7064 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
7065 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
7066 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
7067 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
7068 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
7069 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
7070 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
7071 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
7072 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
7073 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
7074 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
7075 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
7076 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
7077 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
7078 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7079 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7080 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7081 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7082 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7083 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7084 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7085 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7086};
7087
7088 static void
7089mb_decompose(c, c1, c2, c3)
7090 int c, *c1, *c2, *c3;
7091{
7092 decomp_T d;
7093
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007094 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007095 {
7096 d = decomp_table[c - 0xfb20];
7097 *c1 = d.a;
7098 *c2 = d.b;
7099 *c3 = d.c;
7100 }
7101 else
7102 {
7103 *c1 = c;
7104 *c2 = *c3 = 0;
7105 }
7106}
7107#endif
7108
7109/*
7110 * Compare two strings, ignore case if ireg_ic set.
7111 * Return 0 if strings match, non-zero otherwise.
7112 * Correct the length "*n" when composing characters are ignored.
7113 */
7114 static int
7115cstrncmp(s1, s2, n)
7116 char_u *s1, *s2;
7117 int *n;
7118{
7119 int result;
7120
7121 if (!ireg_ic)
7122 result = STRNCMP(s1, s2, *n);
7123 else
7124 result = MB_STRNICMP(s1, s2, *n);
7125
7126#ifdef FEAT_MBYTE
7127 /* if it failed and it's utf8 and we want to combineignore: */
7128 if (result != 0 && enc_utf8 && ireg_icombine)
7129 {
7130 char_u *str1, *str2;
7131 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007132 int junk;
7133
7134 /* we have to handle the strcmp ourselves, since it is necessary to
7135 * deal with the composing characters by ignoring them: */
7136 str1 = s1;
7137 str2 = s2;
7138 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007139 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007140 {
7141 c1 = mb_ptr2char_adv(&str1);
7142 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007143
7144 /* decompose the character if necessary, into 'base' characters
7145 * because I don't care about Arabic, I will hard-code the Hebrew
7146 * which I *do* care about! So sue me... */
7147 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
7148 {
7149 /* decomposition necessary? */
7150 mb_decompose(c1, &c11, &junk, &junk);
7151 mb_decompose(c2, &c12, &junk, &junk);
7152 c1 = c11;
7153 c2 = c12;
7154 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
7155 break;
7156 }
7157 }
7158 result = c2 - c1;
7159 if (result == 0)
7160 *n = (int)(str2 - s2);
7161 }
7162#endif
7163
7164 return result;
7165}
7166
7167/*
7168 * cstrchr: This function is used a lot for simple searches, keep it fast!
7169 */
7170 static char_u *
7171cstrchr(s, c)
7172 char_u *s;
7173 int c;
7174{
7175 char_u *p;
7176 int cc;
7177
7178 if (!ireg_ic
7179#ifdef FEAT_MBYTE
7180 || (!enc_utf8 && mb_char2len(c) > 1)
7181#endif
7182 )
7183 return vim_strchr(s, c);
7184
7185 /* tolower() and toupper() can be slow, comparing twice should be a lot
7186 * faster (esp. when using MS Visual C++!).
7187 * For UTF-8 need to use folded case. */
7188#ifdef FEAT_MBYTE
7189 if (enc_utf8 && c > 0x80)
7190 cc = utf_fold(c);
7191 else
7192#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007193 if (MB_ISUPPER(c))
7194 cc = MB_TOLOWER(c);
7195 else if (MB_ISLOWER(c))
7196 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007197 else
7198 return vim_strchr(s, c);
7199
7200#ifdef FEAT_MBYTE
7201 if (has_mbyte)
7202 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007203 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007204 {
7205 if (enc_utf8 && c > 0x80)
7206 {
7207 if (utf_fold(utf_ptr2char(p)) == cc)
7208 return p;
7209 }
7210 else if (*p == c || *p == cc)
7211 return p;
7212 }
7213 }
7214 else
7215#endif
7216 /* Faster version for when there are no multi-byte characters. */
7217 for (p = s; *p != NUL; ++p)
7218 if (*p == c || *p == cc)
7219 return p;
7220
7221 return NULL;
7222}
7223
7224/***************************************************************
7225 * regsub stuff *
7226 ***************************************************************/
7227
7228/* This stuff below really confuses cc on an SGI -- webb */
7229#ifdef __sgi
7230# undef __ARGS
7231# define __ARGS(x) ()
7232#endif
7233
7234/*
7235 * We should define ftpr as a pointer to a function returning a pointer to
7236 * a function returning a pointer to a function ...
7237 * This is impossible, so we declare a pointer to a function returning a
7238 * pointer to a function returning void. This should work for all compilers.
7239 */
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007240typedef void (*(*fptr_T) __ARGS((int *, int)))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007241
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007242static fptr_T do_upper __ARGS((int *, int));
7243static fptr_T do_Upper __ARGS((int *, int));
7244static fptr_T do_lower __ARGS((int *, int));
7245static fptr_T do_Lower __ARGS((int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007246
7247static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
7248
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007249 static fptr_T
Bram Moolenaar071d4272004-06-13 20:20:40 +00007250do_upper(d, c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007251 int *d;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007252 int c;
7253{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007254 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007255
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007256 return (fptr_T)NULL;
7257}
7258
7259 static fptr_T
7260do_Upper(d, c)
7261 int *d;
7262 int c;
7263{
7264 *d = MB_TOUPPER(c);
7265
7266 return (fptr_T)do_Upper;
7267}
7268
7269 static fptr_T
7270do_lower(d, c)
7271 int *d;
7272 int c;
7273{
7274 *d = MB_TOLOWER(c);
7275
7276 return (fptr_T)NULL;
7277}
7278
7279 static fptr_T
7280do_Lower(d, c)
7281 int *d;
7282 int c;
7283{
7284 *d = MB_TOLOWER(c);
7285
7286 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007287}
7288
7289/*
7290 * regtilde(): Replace tildes in the pattern by the old pattern.
7291 *
7292 * Short explanation of the tilde: It stands for the previous replacement
7293 * pattern. If that previous pattern also contains a ~ we should go back a
7294 * step further... But we insert the previous pattern into the current one
7295 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007296 * This still does not handle the case where "magic" changes. So require the
7297 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007298 *
7299 * The tildes are parsed once before the first call to vim_regsub().
7300 */
7301 char_u *
7302regtilde(source, magic)
7303 char_u *source;
7304 int magic;
7305{
7306 char_u *newsub = source;
7307 char_u *tmpsub;
7308 char_u *p;
7309 int len;
7310 int prevlen;
7311
7312 for (p = newsub; *p; ++p)
7313 {
7314 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7315 {
7316 if (reg_prev_sub != NULL)
7317 {
7318 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7319 prevlen = (int)STRLEN(reg_prev_sub);
7320 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7321 if (tmpsub != NULL)
7322 {
7323 /* copy prefix */
7324 len = (int)(p - newsub); /* not including ~ */
7325 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007326 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007327 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7328 /* copy postfix */
7329 if (!magic)
7330 ++p; /* back off \ */
7331 STRCPY(tmpsub + len + prevlen, p + 1);
7332
7333 if (newsub != source) /* already allocated newsub */
7334 vim_free(newsub);
7335 newsub = tmpsub;
7336 p = newsub + len + prevlen;
7337 }
7338 }
7339 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007340 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007341 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007342 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007343 --p;
7344 }
7345 else
7346 {
7347 if (*p == '\\' && p[1]) /* skip escaped characters */
7348 ++p;
7349#ifdef FEAT_MBYTE
7350 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007351 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007352#endif
7353 }
7354 }
7355
7356 vim_free(reg_prev_sub);
7357 if (newsub != source) /* newsub was allocated, just keep it */
7358 reg_prev_sub = newsub;
7359 else /* no ~ found, need to save newsub */
7360 reg_prev_sub = vim_strsave(newsub);
7361 return newsub;
7362}
7363
7364#ifdef FEAT_EVAL
7365static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7366
7367/* These pointers are used instead of reg_match and reg_mmatch for
7368 * reg_submatch(). Needed for when the substitution string is an expression
7369 * that contains a call to substitute() and submatch(). */
7370static regmatch_T *submatch_match;
7371static regmmatch_T *submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007372static linenr_T submatch_firstlnum;
7373static linenr_T submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007374static int submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007375#endif
7376
7377#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
7378/*
7379 * vim_regsub() - perform substitutions after a vim_regexec() or
7380 * vim_regexec_multi() match.
7381 *
7382 * If "copy" is TRUE really copy into "dest".
7383 * If "copy" is FALSE nothing is copied, this is just to find out the length
7384 * of the result.
7385 *
7386 * If "backslash" is TRUE, a backslash will be removed later, need to double
7387 * them to keep them, and insert a backslash before a CR to avoid it being
7388 * replaced with a line break later.
7389 *
7390 * Note: The matched text must not change between the call of
7391 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7392 * references invalid!
7393 *
7394 * Returns the size of the replacement, including terminating NUL.
7395 */
7396 int
7397vim_regsub(rmp, source, dest, copy, magic, backslash)
7398 regmatch_T *rmp;
7399 char_u *source;
7400 char_u *dest;
7401 int copy;
7402 int magic;
7403 int backslash;
7404{
7405 reg_match = rmp;
7406 reg_mmatch = NULL;
7407 reg_maxline = 0;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01007408 reg_buf = curbuf;
Bram Moolenaar93fc4812014-04-23 18:48:47 +02007409 reg_line_lbr = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007410 return vim_regsub_both(source, dest, copy, magic, backslash);
7411}
7412#endif
7413
7414 int
7415vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
7416 regmmatch_T *rmp;
7417 linenr_T lnum;
7418 char_u *source;
7419 char_u *dest;
7420 int copy;
7421 int magic;
7422 int backslash;
7423{
7424 reg_match = NULL;
7425 reg_mmatch = rmp;
7426 reg_buf = curbuf; /* always works on the current buffer! */
7427 reg_firstlnum = lnum;
7428 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
Bram Moolenaar93fc4812014-04-23 18:48:47 +02007429 reg_line_lbr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007430 return vim_regsub_both(source, dest, copy, magic, backslash);
7431}
7432
7433 static int
7434vim_regsub_both(source, dest, copy, magic, backslash)
7435 char_u *source;
7436 char_u *dest;
7437 int copy;
7438 int magic;
7439 int backslash;
7440{
7441 char_u *src;
7442 char_u *dst;
7443 char_u *s;
7444 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007445 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007446 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007447 fptr_T func_all = (fptr_T)NULL;
7448 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007449 linenr_T clnum = 0; /* init for GCC */
7450 int len = 0; /* init for GCC */
7451#ifdef FEAT_EVAL
7452 static char_u *eval_result = NULL;
7453#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007454
7455 /* Be paranoid... */
7456 if (source == NULL || dest == NULL)
7457 {
7458 EMSG(_(e_null));
7459 return 0;
7460 }
7461 if (prog_magic_wrong())
7462 return 0;
7463 src = source;
7464 dst = dest;
7465
7466 /*
7467 * When the substitute part starts with "\=" evaluate it as an expression.
7468 */
7469 if (source[0] == '\\' && source[1] == '='
7470#ifdef FEAT_EVAL
7471 && !can_f_submatch /* can't do this recursively */
7472#endif
7473 )
7474 {
7475#ifdef FEAT_EVAL
7476 /* To make sure that the length doesn't change between checking the
7477 * length and copying the string, and to speed up things, the
7478 * resulting string is saved from the call with "copy" == FALSE to the
7479 * call with "copy" == TRUE. */
7480 if (copy)
7481 {
7482 if (eval_result != NULL)
7483 {
7484 STRCPY(dest, eval_result);
7485 dst += STRLEN(eval_result);
7486 vim_free(eval_result);
7487 eval_result = NULL;
7488 }
7489 }
7490 else
7491 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007492 win_T *save_reg_win;
7493 int save_ireg_ic;
7494
7495 vim_free(eval_result);
7496
7497 /* The expression may contain substitute(), which calls us
7498 * recursively. Make sure submatch() gets the text from the first
7499 * level. Don't need to save "reg_buf", because
7500 * vim_regexec_multi() can't be called recursively. */
7501 submatch_match = reg_match;
7502 submatch_mmatch = reg_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007503 submatch_firstlnum = reg_firstlnum;
7504 submatch_maxline = reg_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007505 submatch_line_lbr = reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007506 save_reg_win = reg_win;
7507 save_ireg_ic = ireg_ic;
7508 can_f_submatch = TRUE;
7509
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007510 eval_result = eval_to_string(source + 2, NULL, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007511 if (eval_result != NULL)
7512 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007513 int had_backslash = FALSE;
7514
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00007515 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007516 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007517 /* Change NL to CR, so that it becomes a line break,
7518 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007519 * Skip over a backslashed character. */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007520 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007521 *s = CAR;
7522 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007523 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007524 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007525 /* Change NL to CR here too, so that this works:
7526 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7527 * abc\
7528 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007529 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007530 */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007531 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007532 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007533 had_backslash = TRUE;
7534 }
7535 }
7536 if (had_backslash && backslash)
7537 {
7538 /* Backslashes will be consumed, need to double them. */
7539 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7540 if (s != NULL)
7541 {
7542 vim_free(eval_result);
7543 eval_result = s;
7544 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007545 }
7546
7547 dst += STRLEN(eval_result);
7548 }
7549
7550 reg_match = submatch_match;
7551 reg_mmatch = submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007552 reg_firstlnum = submatch_firstlnum;
7553 reg_maxline = submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007554 reg_line_lbr = submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007555 reg_win = save_reg_win;
7556 ireg_ic = save_ireg_ic;
7557 can_f_submatch = FALSE;
7558 }
7559#endif
7560 }
7561 else
7562 while ((c = *src++) != NUL)
7563 {
7564 if (c == '&' && magic)
7565 no = 0;
7566 else if (c == '\\' && *src != NUL)
7567 {
7568 if (*src == '&' && !magic)
7569 {
7570 ++src;
7571 no = 0;
7572 }
7573 else if ('0' <= *src && *src <= '9')
7574 {
7575 no = *src++ - '0';
7576 }
7577 else if (vim_strchr((char_u *)"uUlLeE", *src))
7578 {
7579 switch (*src++)
7580 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007581 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007582 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007583 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007584 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007585 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007586 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007587 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007588 continue;
7589 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007590 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007591 continue;
7592 }
7593 }
7594 }
7595 if (no < 0) /* Ordinary character. */
7596 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007597 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7598 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007599 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007600 if (copy)
7601 {
7602 *dst++ = c;
7603 *dst++ = *src++;
7604 *dst++ = *src++;
7605 }
7606 else
7607 {
7608 dst += 3;
7609 src += 2;
7610 }
7611 continue;
7612 }
7613
Bram Moolenaar071d4272004-06-13 20:20:40 +00007614 if (c == '\\' && *src != NUL)
7615 {
7616 /* Check for abbreviations -- webb */
7617 switch (*src)
7618 {
7619 case 'r': c = CAR; ++src; break;
7620 case 'n': c = NL; ++src; break;
7621 case 't': c = TAB; ++src; break;
7622 /* Oh no! \e already has meaning in subst pat :-( */
7623 /* case 'e': c = ESC; ++src; break; */
7624 case 'b': c = Ctrl_H; ++src; break;
7625
7626 /* If "backslash" is TRUE the backslash will be removed
7627 * later. Used to insert a literal CR. */
7628 default: if (backslash)
7629 {
7630 if (copy)
7631 *dst = '\\';
7632 ++dst;
7633 }
7634 c = *src++;
7635 }
7636 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007637#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007638 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007639 c = mb_ptr2char(src - 1);
7640#endif
7641
Bram Moolenaardb552d602006-03-23 22:59:57 +00007642 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007643 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007644 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007645 func_one = (fptr_T)(func_one(&cc, c));
7646 else if (func_all != (fptr_T)NULL)
7647 /* Turbo C complains without the typecast */
7648 func_all = (fptr_T)(func_all(&cc, c));
7649 else /* just copy */
7650 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007651
7652#ifdef FEAT_MBYTE
7653 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007654 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007655 int totlen = mb_ptr2len(src - 1);
7656
Bram Moolenaar071d4272004-06-13 20:20:40 +00007657 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007658 mb_char2bytes(cc, dst);
7659 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007660 if (enc_utf8)
7661 {
7662 int clen = utf_ptr2len(src - 1);
7663
7664 /* If the character length is shorter than "totlen", there
7665 * are composing characters; copy them as-is. */
7666 if (clen < totlen)
7667 {
7668 if (copy)
7669 mch_memmove(dst + 1, src - 1 + clen,
7670 (size_t)(totlen - clen));
7671 dst += totlen - clen;
7672 }
7673 }
7674 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007675 }
7676 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007677#endif
7678 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007679 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007680 dst++;
7681 }
7682 else
7683 {
7684 if (REG_MULTI)
7685 {
7686 clnum = reg_mmatch->startpos[no].lnum;
7687 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7688 s = NULL;
7689 else
7690 {
7691 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7692 if (reg_mmatch->endpos[no].lnum == clnum)
7693 len = reg_mmatch->endpos[no].col
7694 - reg_mmatch->startpos[no].col;
7695 else
7696 len = (int)STRLEN(s);
7697 }
7698 }
7699 else
7700 {
7701 s = reg_match->startp[no];
7702 if (reg_match->endp[no] == NULL)
7703 s = NULL;
7704 else
7705 len = (int)(reg_match->endp[no] - s);
7706 }
7707 if (s != NULL)
7708 {
7709 for (;;)
7710 {
7711 if (len == 0)
7712 {
7713 if (REG_MULTI)
7714 {
7715 if (reg_mmatch->endpos[no].lnum == clnum)
7716 break;
7717 if (copy)
7718 *dst = CAR;
7719 ++dst;
7720 s = reg_getline(++clnum);
7721 if (reg_mmatch->endpos[no].lnum == clnum)
7722 len = reg_mmatch->endpos[no].col;
7723 else
7724 len = (int)STRLEN(s);
7725 }
7726 else
7727 break;
7728 }
7729 else if (*s == NUL) /* we hit NUL. */
7730 {
7731 if (copy)
7732 EMSG(_(e_re_damg));
7733 goto exit;
7734 }
7735 else
7736 {
7737 if (backslash && (*s == CAR || *s == '\\'))
7738 {
7739 /*
7740 * Insert a backslash in front of a CR, otherwise
7741 * it will be replaced by a line break.
7742 * Number of backslashes will be halved later,
7743 * double them here.
7744 */
7745 if (copy)
7746 {
7747 dst[0] = '\\';
7748 dst[1] = *s;
7749 }
7750 dst += 2;
7751 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007752 else
7753 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007754#ifdef FEAT_MBYTE
7755 if (has_mbyte)
7756 c = mb_ptr2char(s);
7757 else
7758#endif
7759 c = *s;
7760
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007761 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007762 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007763 func_one = (fptr_T)(func_one(&cc, c));
7764 else if (func_all != (fptr_T)NULL)
7765 /* Turbo C complains without the typecast */
7766 func_all = (fptr_T)(func_all(&cc, c));
7767 else /* just copy */
7768 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007769
7770#ifdef FEAT_MBYTE
7771 if (has_mbyte)
7772 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007773 int l;
7774
7775 /* Copy composing characters separately, one
7776 * at a time. */
7777 if (enc_utf8)
7778 l = utf_ptr2len(s) - 1;
7779 else
7780 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007781
7782 s += l;
7783 len -= l;
7784 if (copy)
7785 mb_char2bytes(cc, dst);
7786 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007787 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007788 else
7789#endif
7790 if (copy)
7791 *dst = cc;
7792 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007793 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007794
Bram Moolenaar071d4272004-06-13 20:20:40 +00007795 ++s;
7796 --len;
7797 }
7798 }
7799 }
7800 no = -1;
7801 }
7802 }
7803 if (copy)
7804 *dst = NUL;
7805
7806exit:
7807 return (int)((dst - dest) + 1);
7808}
7809
7810#ifdef FEAT_EVAL
Bram Moolenaard32a3192009-11-26 19:40:49 +00007811static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7812
Bram Moolenaar071d4272004-06-13 20:20:40 +00007813/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007814 * Call reg_getline() with the line numbers from the submatch. If a
7815 * substitute() was used the reg_maxline and other values have been
7816 * overwritten.
7817 */
7818 static char_u *
7819reg_getline_submatch(lnum)
7820 linenr_T lnum;
7821{
7822 char_u *s;
7823 linenr_T save_first = reg_firstlnum;
7824 linenr_T save_max = reg_maxline;
7825
7826 reg_firstlnum = submatch_firstlnum;
7827 reg_maxline = submatch_maxline;
7828
7829 s = reg_getline(lnum);
7830
7831 reg_firstlnum = save_first;
7832 reg_maxline = save_max;
7833 return s;
7834}
7835
7836/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007837 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007838 * allocated memory.
7839 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7840 */
7841 char_u *
7842reg_submatch(no)
7843 int no;
7844{
7845 char_u *retval = NULL;
7846 char_u *s;
7847 int len;
7848 int round;
7849 linenr_T lnum;
7850
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007851 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007852 return NULL;
7853
7854 if (submatch_match == NULL)
7855 {
7856 /*
7857 * First round: compute the length and allocate memory.
7858 * Second round: copy the text.
7859 */
7860 for (round = 1; round <= 2; ++round)
7861 {
7862 lnum = submatch_mmatch->startpos[no].lnum;
7863 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7864 return NULL;
7865
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007866 s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007867 if (s == NULL) /* anti-crash check, cannot happen? */
7868 break;
7869 if (submatch_mmatch->endpos[no].lnum == lnum)
7870 {
7871 /* Within one line: take form start to end col. */
7872 len = submatch_mmatch->endpos[no].col
7873 - submatch_mmatch->startpos[no].col;
7874 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007875 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007876 ++len;
7877 }
7878 else
7879 {
7880 /* Multiple lines: take start line from start col, middle
7881 * lines completely and end line up to end col. */
7882 len = (int)STRLEN(s);
7883 if (round == 2)
7884 {
7885 STRCPY(retval, s);
7886 retval[len] = '\n';
7887 }
7888 ++len;
7889 ++lnum;
7890 while (lnum < submatch_mmatch->endpos[no].lnum)
7891 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007892 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007893 if (round == 2)
7894 STRCPY(retval + len, s);
7895 len += (int)STRLEN(s);
7896 if (round == 2)
7897 retval[len] = '\n';
7898 ++len;
7899 }
7900 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007901 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00007902 submatch_mmatch->endpos[no].col);
7903 len += submatch_mmatch->endpos[no].col;
7904 if (round == 2)
7905 retval[len] = NUL;
7906 ++len;
7907 }
7908
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007909 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007910 {
7911 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007912 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007913 return NULL;
7914 }
7915 }
7916 }
7917 else
7918 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00007919 s = submatch_match->startp[no];
7920 if (s == NULL || submatch_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007921 retval = NULL;
7922 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007923 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007924 }
7925
7926 return retval;
7927}
Bram Moolenaar41571762014-04-02 19:00:58 +02007928
7929/*
7930 * Used for the submatch() function with the optional non-zero argument: get
7931 * the list of strings from the n'th submatch in allocated memory with NULs
7932 * represented in NLs.
7933 * Returns a list of allocated strings. Returns NULL when not in a ":s"
7934 * command, for a non-existing submatch and for any error.
7935 */
7936 list_T *
7937reg_submatch_list(no)
7938 int no;
7939{
7940 char_u *s;
7941 linenr_T slnum;
7942 linenr_T elnum;
7943 colnr_T scol;
7944 colnr_T ecol;
7945 int i;
7946 list_T *list;
7947 int error = FALSE;
7948
7949 if (!can_f_submatch || no < 0)
7950 return NULL;
7951
7952 if (submatch_match == NULL)
7953 {
7954 slnum = submatch_mmatch->startpos[no].lnum;
7955 elnum = submatch_mmatch->endpos[no].lnum;
7956 if (slnum < 0 || elnum < 0)
7957 return NULL;
7958
7959 scol = submatch_mmatch->startpos[no].col;
7960 ecol = submatch_mmatch->endpos[no].col;
7961
7962 list = list_alloc();
7963 if (list == NULL)
7964 return NULL;
7965
7966 s = reg_getline_submatch(slnum) + scol;
7967 if (slnum == elnum)
7968 {
7969 if (list_append_string(list, s, ecol - scol) == FAIL)
7970 error = TRUE;
7971 }
7972 else
7973 {
7974 if (list_append_string(list, s, -1) == FAIL)
7975 error = TRUE;
7976 for (i = 1; i < elnum - slnum; i++)
7977 {
7978 s = reg_getline_submatch(slnum + i);
7979 if (list_append_string(list, s, -1) == FAIL)
7980 error = TRUE;
7981 }
7982 s = reg_getline_submatch(elnum);
7983 if (list_append_string(list, s, ecol) == FAIL)
7984 error = TRUE;
7985 }
7986 }
7987 else
7988 {
7989 s = submatch_match->startp[no];
7990 if (s == NULL || submatch_match->endp[no] == NULL)
7991 return NULL;
7992 list = list_alloc();
7993 if (list == NULL)
7994 return NULL;
7995 if (list_append_string(list, s,
7996 (int)(submatch_match->endp[no] - s)) == FAIL)
7997 error = TRUE;
7998 }
7999
8000 if (error)
8001 {
8002 list_free(list, TRUE);
8003 return NULL;
8004 }
8005 return list;
8006}
Bram Moolenaar071d4272004-06-13 20:20:40 +00008007#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008008
8009static regengine_T bt_regengine =
8010{
8011 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008012 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008013 bt_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008014 bt_regexec_multi,
8015 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008016};
8017
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008018#include "regexp_nfa.c"
8019
8020static regengine_T nfa_regengine =
8021{
8022 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008023 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008024 nfa_regexec_nl,
Bram Moolenaarfda37292014-11-05 14:27:36 +01008025 nfa_regexec_multi,
8026 (char_u *)""
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008027};
8028
8029/* Which regexp engine to use? Needed for vim_regcomp().
8030 * Must match with 'regexpengine'. */
8031static int regexp_engine = 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008032
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008033#ifdef DEBUG
8034static char_u regname[][30] = {
8035 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02008036 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008037 "NFA Regexp Engine"
8038 };
8039#endif
8040
8041/*
8042 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02008043 * Returns the program in allocated memory.
8044 * Use vim_regfree() to free the memory.
8045 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008046 */
8047 regprog_T *
8048vim_regcomp(expr_arg, re_flags)
8049 char_u *expr_arg;
8050 int re_flags;
8051{
8052 regprog_T *prog = NULL;
8053 char_u *expr = expr_arg;
8054
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008055 regexp_engine = p_re;
8056
8057 /* Check for prefix "\%#=", that sets the regexp engine */
8058 if (STRNCMP(expr, "\\%#=", 4) == 0)
8059 {
8060 int newengine = expr[4] - '0';
8061
8062 if (newengine == AUTOMATIC_ENGINE
8063 || newengine == BACKTRACKING_ENGINE
8064 || newengine == NFA_ENGINE)
8065 {
8066 regexp_engine = expr[4] - '0';
8067 expr += 5;
8068#ifdef DEBUG
Bram Moolenaar6e132072014-05-13 16:46:32 +02008069 smsg((char_u *)"New regexp mode selected (%d): %s",
8070 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008071#endif
8072 }
8073 else
8074 {
8075 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
8076 regexp_engine = AUTOMATIC_ENGINE;
8077 }
8078 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008079 bt_regengine.expr = expr;
8080 nfa_regengine.expr = expr;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008081
8082 /*
8083 * First try the NFA engine, unless backtracking was requested.
8084 */
8085 if (regexp_engine != BACKTRACKING_ENGINE)
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008086 prog = nfa_regengine.regcomp(expr,
8087 re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008088 else
8089 prog = bt_regengine.regcomp(expr, re_flags);
8090
Bram Moolenaarfda37292014-11-05 14:27:36 +01008091 /* Check for error compiling regexp with initial engine. */
8092 if (prog == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008093 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008094#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008095 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8096 {
8097 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008098 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008099 if (f)
8100 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008101 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008102 fclose(f);
8103 }
8104 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008105 EMSG2("(NFA) Could not open \"%s\" to write !!!",
8106 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008107 }
8108#endif
8109 /*
Bram Moolenaarfda37292014-11-05 14:27:36 +01008110 * If the NFA engine failed, try the backtracking engine.
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008111 * The NFA engine also fails for patterns that it can't handle well
8112 * but are still valid patterns, thus a retry should work.
8113 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008114 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008115 {
Bram Moolenaare0ad3652015-01-27 12:59:55 +01008116 regexp_engine = BACKTRACKING_ENGINE;
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008117 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaarfda37292014-11-05 14:27:36 +01008118 }
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008119 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008120
Bram Moolenaarfda37292014-11-05 14:27:36 +01008121 if (prog != NULL)
8122 {
8123 /* Store the info needed to call regcomp() again when the engine turns
8124 * out to be very slow when executing it. */
8125 prog->re_engine = regexp_engine;
8126 prog->re_flags = re_flags;
8127 }
8128
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008129 return prog;
8130}
8131
8132/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008133 * Free a compiled regexp program, returned by vim_regcomp().
8134 */
8135 void
8136vim_regfree(prog)
8137 regprog_T *prog;
8138{
8139 if (prog != NULL)
8140 prog->engine->regfree(prog);
8141}
8142
Bram Moolenaarfda37292014-11-05 14:27:36 +01008143#ifdef FEAT_EVAL
8144static void report_re_switch __ARGS((char_u *pat));
8145
8146 static void
8147report_re_switch(pat)
8148 char_u *pat;
8149{
8150 if (p_verbose > 0)
8151 {
8152 verbose_enter();
8153 MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
8154 MSG_PUTS(pat);
8155 verbose_leave();
8156 }
8157}
8158#endif
8159
8160static int vim_regexec_both __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int nl));
8161
Bram Moolenaar473de612013-06-08 18:19:48 +02008162/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008163 * Match a regexp against a string.
8164 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008165 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008166 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaarfda37292014-11-05 14:27:36 +01008167 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008168 *
8169 * Return TRUE if there is a match, FALSE if not.
8170 */
Bram Moolenaarfda37292014-11-05 14:27:36 +01008171 static int
8172vim_regexec_both(rmp, line, col, nl)
8173 regmatch_T *rmp;
8174 char_u *line; /* string to match against */
8175 colnr_T col; /* column to start looking for match */
8176 int nl;
8177{
8178 int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8179
8180 /* NFA engine aborted because it's very slow. */
8181 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8182 && result == NFA_TOO_EXPENSIVE)
8183 {
8184 int save_p_re = p_re;
8185 int re_flags = rmp->regprog->re_flags;
8186 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8187
8188 p_re = BACKTRACKING_ENGINE;
8189 vim_regfree(rmp->regprog);
8190 if (pat != NULL)
8191 {
8192#ifdef FEAT_EVAL
8193 report_re_switch(pat);
8194#endif
8195 rmp->regprog = vim_regcomp(pat, re_flags);
8196 if (rmp->regprog != NULL)
8197 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8198 vim_free(pat);
8199 }
8200
8201 p_re = save_p_re;
8202 }
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008203 return result > 0;
Bram Moolenaarfda37292014-11-05 14:27:36 +01008204}
8205
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008206/*
8207 * Note: "*prog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008208 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008209 */
8210 int
8211vim_regexec_prog(prog, ignore_case, line, col)
8212 regprog_T **prog;
8213 int ignore_case;
8214 char_u *line;
8215 colnr_T col;
8216{
8217 int r;
8218 regmatch_T regmatch;
8219
8220 regmatch.regprog = *prog;
8221 regmatch.rm_ic = ignore_case;
8222 r = vim_regexec_both(&regmatch, line, col, FALSE);
8223 *prog = regmatch.regprog;
8224 return r;
8225}
8226
8227/*
8228 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008229 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008230 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008231 int
8232vim_regexec(rmp, line, col)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008233 regmatch_T *rmp;
8234 char_u *line;
8235 colnr_T col;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008236{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008237 return vim_regexec_both(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008238}
8239
8240#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8241 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8242/*
8243 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008244 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008245 * Return TRUE if there is a match, FALSE if not.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008246 */
8247 int
8248vim_regexec_nl(rmp, line, col)
Bram Moolenaarfda37292014-11-05 14:27:36 +01008249 regmatch_T *rmp;
8250 char_u *line;
8251 colnr_T col;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008252{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008253 return vim_regexec_both(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008254}
8255#endif
8256
8257/*
8258 * Match a regexp against multiple lines.
8259 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
Bram Moolenaardffa5b82014-11-19 16:38:07 +01008260 * Note: "rmp->regprog" may be freed and changed.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008261 * Uses curbuf for line count and 'iskeyword'.
8262 *
8263 * Return zero if there is no match. Return number of lines contained in the
8264 * match otherwise.
8265 */
8266 long
8267vim_regexec_multi(rmp, win, buf, lnum, col, tm)
8268 regmmatch_T *rmp;
8269 win_T *win; /* window in which to search or NULL */
8270 buf_T *buf; /* buffer in which to search */
8271 linenr_T lnum; /* nr of line to start looking for match */
8272 colnr_T col; /* column to start looking for match */
8273 proftime_T *tm; /* timeout limit or NULL */
8274{
Bram Moolenaarfda37292014-11-05 14:27:36 +01008275 int result = rmp->regprog->engine->regexec_multi(
8276 rmp, win, buf, lnum, col, tm);
8277
8278 /* NFA engine aborted because it's very slow. */
8279 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8280 && result == NFA_TOO_EXPENSIVE)
8281 {
8282 int save_p_re = p_re;
8283 int re_flags = rmp->regprog->re_flags;
8284 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8285
8286 p_re = BACKTRACKING_ENGINE;
8287 vim_regfree(rmp->regprog);
8288 if (pat != NULL)
8289 {
8290#ifdef FEAT_EVAL
8291 report_re_switch(pat);
8292#endif
8293 rmp->regprog = vim_regcomp(pat, re_flags);
8294 if (rmp->regprog != NULL)
8295 result = rmp->regprog->engine->regexec_multi(
8296 rmp, win, buf, lnum, col, tm);
8297 vim_free(pat);
8298 }
8299 p_re = save_p_re;
8300 }
8301
Bram Moolenaar66a3e792014-11-20 23:07:05 +01008302 return result <= 0 ? 0 : result;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008303}