blob: dcb9a3b4e8e07ec7f42f6df18e2e39d90983c06d [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
Bram Moolenaar8df5acf2014-05-13 19:37:29 +0200247#define RE_COMPOSING 209 /* any composing characters */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000248
Bram Moolenaar071d4272004-06-13 20:20:40 +0000249/*
250 * Magic characters have a special meaning, they don't match literally.
251 * Magic characters are negative. This separates them from literal characters
252 * (possibly multi-byte). Only ASCII characters can be Magic.
253 */
254#define Magic(x) ((int)(x) - 256)
255#define un_Magic(x) ((x) + 256)
256#define is_Magic(x) ((x) < 0)
257
258static int no_Magic __ARGS((int x));
259static int toggle_Magic __ARGS((int x));
260
261 static int
262no_Magic(x)
263 int x;
264{
265 if (is_Magic(x))
266 return un_Magic(x);
267 return x;
268}
269
270 static int
271toggle_Magic(x)
272 int x;
273{
274 if (is_Magic(x))
275 return un_Magic(x);
276 return Magic(x);
277}
278
279/*
280 * The first byte of the regexp internal "program" is actually this magic
281 * number; the start node begins in the second byte. It's used to catch the
282 * most severe mutilation of the program by the caller.
283 */
284
285#define REGMAGIC 0234
286
287/*
288 * Opcode notes:
289 *
290 * BRANCH The set of branches constituting a single choice are hooked
291 * together with their "next" pointers, since precedence prevents
292 * anything being concatenated to any individual branch. The
293 * "next" pointer of the last BRANCH in a choice points to the
294 * thing following the whole choice. This is also where the
295 * final "next" pointer of each individual branch points; each
296 * branch starts with the operand node of a BRANCH node.
297 *
298 * BACK Normal "next" pointers all implicitly point forward; BACK
299 * exists to make loop structures possible.
300 *
301 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
302 * BRANCH structures using BACK. Simple cases (one character
303 * per match) are implemented with STAR and PLUS for speed
304 * and to minimize recursive plunges.
305 *
306 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
307 * node, and defines the min and max limits to be used for that
308 * node.
309 *
310 * MOPEN,MCLOSE ...are numbered at compile time.
311 * ZOPEN,ZCLOSE ...ditto
312 */
313
314/*
315 * A node is one char of opcode followed by two chars of "next" pointer.
316 * "Next" pointers are stored as two 8-bit bytes, high order first. The
317 * value is a positive offset from the opcode of the node containing it.
318 * An operand, if any, simply follows the node. (Note that much of the
319 * code generation knows about this implicit relationship.)
320 *
321 * Using two bytes for the "next" pointer is vast overkill for most things,
322 * but allows patterns to get big without disasters.
323 */
324#define OP(p) ((int)*(p))
325#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
326#define OPERAND(p) ((p) + 3)
327/* Obtain an operand that was stored as four bytes, MSB first. */
328#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
329 + ((long)(p)[5] << 8) + (long)(p)[6])
330/* Obtain a second operand stored as four bytes. */
331#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
332/* Obtain a second single-byte operand stored after a four bytes operand. */
333#define OPERAND_CMP(p) (p)[7]
334
335/*
336 * Utility definitions.
337 */
338#define UCHARAT(p) ((int)*(char_u *)(p))
339
340/* Used for an error (down from) vim_regcomp(): give the error message, set
341 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000342#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000343#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200344#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
345#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
346#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000347
348#define MAX_LIMIT (32767L << 16L)
349
350static int re_multi_type __ARGS((int));
351static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
352static char_u *cstrchr __ARGS((char_u *, int));
353
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200354#ifdef BT_REGEXP_DUMP
355static void regdump __ARGS((char_u *, bt_regprog_T *));
356#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000357#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +0000358static char_u *regprop __ARGS((char_u *));
359#endif
360
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200361static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
362static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
363static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
364static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200365#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200366static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
367static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200368#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200369static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200370static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar071d4272004-06-13 20:20:40 +0000371#define NOT_MULTI 0
372#define MULTI_ONE 1
373#define MULTI_MULT 2
374/*
375 * Return NOT_MULTI if c is not a "multi" operator.
376 * Return MULTI_ONE if c is a single "multi" operator.
377 * Return MULTI_MULT if c is a multi "multi" operator.
378 */
379 static int
380re_multi_type(c)
381 int c;
382{
383 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
384 return MULTI_ONE;
385 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
386 return MULTI_MULT;
387 return NOT_MULTI;
388}
389
390/*
391 * Flags to be passed up and down.
392 */
393#define HASWIDTH 0x1 /* Known never to match null string. */
394#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
395#define SPSTART 0x4 /* Starts with * or +. */
396#define HASNL 0x8 /* Contains some \n. */
397#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
398#define WORST 0 /* Worst case. */
399
400/*
401 * When regcode is set to this value, code is not emitted and size is computed
402 * instead.
403 */
404#define JUST_CALC_SIZE ((char_u *) -1)
405
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000406static char_u *reg_prev_sub = NULL;
407
Bram Moolenaar071d4272004-06-13 20:20:40 +0000408/*
409 * REGEXP_INRANGE contains all characters which are always special in a []
410 * range after '\'.
411 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
412 * These are:
413 * \n - New line (NL).
414 * \r - Carriage Return (CR).
415 * \t - Tab (TAB).
416 * \e - Escape (ESC).
417 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000418 * \d - Character code in decimal, eg \d123
419 * \o - Character code in octal, eg \o80
420 * \x - Character code in hex, eg \x4a
421 * \u - Multibyte character code, eg \u20ac
422 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000423 */
424static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000425static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426
427static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000428static int get_char_class __ARGS((char_u **pp));
429static int get_equi_class __ARGS((char_u **pp));
430static void reg_equi_class __ARGS((int c));
431static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000432static char_u *skip_anyof __ARGS((char_u *p));
433static void init_class_tab __ARGS((void));
434
435/*
436 * Translate '\x' to its control character, except "\n", which is Magic.
437 */
438 static int
439backslash_trans(c)
440 int c;
441{
442 switch (c)
443 {
444 case 'r': return CAR;
445 case 't': return TAB;
446 case 'e': return ESC;
447 case 'b': return BS;
448 }
449 return c;
450}
451
452/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000453 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000454 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
455 * recognized. Otherwise "pp" is advanced to after the item.
456 */
457 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000458get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000459 char_u **pp;
460{
461 static const char *(class_names[]) =
462 {
463 "alnum:]",
464#define CLASS_ALNUM 0
465 "alpha:]",
466#define CLASS_ALPHA 1
467 "blank:]",
468#define CLASS_BLANK 2
469 "cntrl:]",
470#define CLASS_CNTRL 3
471 "digit:]",
472#define CLASS_DIGIT 4
473 "graph:]",
474#define CLASS_GRAPH 5
475 "lower:]",
476#define CLASS_LOWER 6
477 "print:]",
478#define CLASS_PRINT 7
479 "punct:]",
480#define CLASS_PUNCT 8
481 "space:]",
482#define CLASS_SPACE 9
483 "upper:]",
484#define CLASS_UPPER 10
485 "xdigit:]",
486#define CLASS_XDIGIT 11
487 "tab:]",
488#define CLASS_TAB 12
489 "return:]",
490#define CLASS_RETURN 13
491 "backspace:]",
492#define CLASS_BACKSPACE 14
493 "escape:]",
494#define CLASS_ESCAPE 15
495 };
496#define CLASS_NONE 99
497 int i;
498
499 if ((*pp)[1] == ':')
500 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000501 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000502 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
503 {
504 *pp += STRLEN(class_names[i]) + 2;
505 return i;
506 }
507 }
508 return CLASS_NONE;
509}
510
511/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000512 * Specific version of character class functions.
513 * Using a table to keep this fast.
514 */
515static short class_tab[256];
516
517#define RI_DIGIT 0x01
518#define RI_HEX 0x02
519#define RI_OCTAL 0x04
520#define RI_WORD 0x08
521#define RI_HEAD 0x10
522#define RI_ALPHA 0x20
523#define RI_LOWER 0x40
524#define RI_UPPER 0x80
525#define RI_WHITE 0x100
526
527 static void
528init_class_tab()
529{
530 int i;
531 static int done = FALSE;
532
533 if (done)
534 return;
535
536 for (i = 0; i < 256; ++i)
537 {
538 if (i >= '0' && i <= '7')
539 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
540 else if (i >= '8' && i <= '9')
541 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
542 else if (i >= 'a' && i <= 'f')
543 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
544#ifdef EBCDIC
545 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
546 || (i >= 's' && i <= 'z'))
547#else
548 else if (i >= 'g' && i <= 'z')
549#endif
550 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
551 else if (i >= 'A' && i <= 'F')
552 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
553#ifdef EBCDIC
554 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
555 || (i >= 'S' && i <= 'Z'))
556#else
557 else if (i >= 'G' && i <= 'Z')
558#endif
559 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
560 else if (i == '_')
561 class_tab[i] = RI_WORD + RI_HEAD;
562 else
563 class_tab[i] = 0;
564 }
565 class_tab[' '] |= RI_WHITE;
566 class_tab['\t'] |= RI_WHITE;
567 done = TRUE;
568}
569
570#ifdef FEAT_MBYTE
571# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
572# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
573# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
574# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
575# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
576# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
577# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
578# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
579# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
580#else
581# define ri_digit(c) (class_tab[c] & RI_DIGIT)
582# define ri_hex(c) (class_tab[c] & RI_HEX)
583# define ri_octal(c) (class_tab[c] & RI_OCTAL)
584# define ri_word(c) (class_tab[c] & RI_WORD)
585# define ri_head(c) (class_tab[c] & RI_HEAD)
586# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
587# define ri_lower(c) (class_tab[c] & RI_LOWER)
588# define ri_upper(c) (class_tab[c] & RI_UPPER)
589# define ri_white(c) (class_tab[c] & RI_WHITE)
590#endif
591
592/* flags for regflags */
593#define RF_ICASE 1 /* ignore case */
594#define RF_NOICASE 2 /* don't ignore case */
595#define RF_HASNL 4 /* can match a NL */
596#define RF_ICOMBINE 8 /* ignore combining characters */
597#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
598
599/*
600 * Global work variables for vim_regcomp().
601 */
602
603static char_u *regparse; /* Input-scan pointer. */
604static int prevchr_len; /* byte length of previous char */
605static int num_complex_braces; /* Complex \{...} count */
606static int regnpar; /* () count. */
607#ifdef FEAT_SYN_HL
608static int regnzpar; /* \z() count. */
609static int re_has_z; /* \z item detected */
610#endif
611static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
612static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000613static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000614static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
615static unsigned regflags; /* RF_ flags for prog */
616static long brace_min[10]; /* Minimums for complex brace repeats */
617static long brace_max[10]; /* Maximums for complex brace repeats */
618static int brace_count[10]; /* Current counts for complex brace repeats */
619#if defined(FEAT_SYN_HL) || defined(PROTO)
620static int had_eol; /* TRUE when EOL found by vim_regcomp() */
621#endif
622static int one_exactly = FALSE; /* only do one char for EXACTLY */
623
624static int reg_magic; /* magicness of the pattern: */
625#define MAGIC_NONE 1 /* "\V" very unmagic */
626#define MAGIC_OFF 2 /* "\M" or 'magic' off */
627#define MAGIC_ON 3 /* "\m" or 'magic' */
628#define MAGIC_ALL 4 /* "\v" very magic */
629
630static int reg_string; /* matching with a string instead of a buffer
631 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000632static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000633
634/*
635 * META contains all characters that may be magic, except '^' and '$'.
636 */
637
638#ifdef EBCDIC
639static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
640#else
641/* META[] is used often enough to justify turning it into a table. */
642static char_u META_flags[] = {
643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645/* % & ( ) * + . */
646 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
647/* 1 2 3 4 5 6 7 8 9 < = > ? */
648 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
649/* @ A C D F H I K L M O */
650 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
651/* P S U V W X Z [ _ */
652 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
653/* a c d f h i k l m n o */
654 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
655/* p s u v w x z { | ~ */
656 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
657};
658#endif
659
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200660static int curchr; /* currently parsed character */
661/* Previous character. Note: prevchr is sometimes -1 when we are not at the
662 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
663 * because ^ was taken to be magic -- webb */
664static int prevchr;
665static int prevprevchr; /* previous-previous character */
666static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000667
668/* arguments for reg() */
669#define REG_NOPAREN 0 /* toplevel reg() */
670#define REG_PAREN 1 /* \(\) */
671#define REG_ZPAREN 2 /* \z(\) */
672#define REG_NPAREN 3 /* \%(\) */
673
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200674typedef struct
675{
676 char_u *regparse;
677 int prevchr_len;
678 int curchr;
679 int prevchr;
680 int prevprevchr;
681 int nextchr;
682 int at_start;
683 int prev_at_start;
684 int regnpar;
685} parse_state_T;
686
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687/*
688 * Forward declarations for vim_regcomp()'s friends.
689 */
690static void initchr __ARGS((char_u *));
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200691static void save_parse_state __ARGS((parse_state_T *ps));
692static void restore_parse_state __ARGS((parse_state_T *ps));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000693static int getchr __ARGS((void));
694static void skipchr_keepstart __ARGS((void));
695static int peekchr __ARGS((void));
696static void skipchr __ARGS((void));
697static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000698static int gethexchrs __ARGS((int maxinputlen));
699static int getoctchrs __ARGS((void));
700static int getdecchrs __ARGS((void));
701static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000702static void regcomp_start __ARGS((char_u *expr, int flags));
703static char_u *reg __ARGS((int, int *));
704static char_u *regbranch __ARGS((int *flagp));
705static char_u *regconcat __ARGS((int *flagp));
706static char_u *regpiece __ARGS((int *));
707static char_u *regatom __ARGS((int *));
708static char_u *regnode __ARGS((int));
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000709#ifdef FEAT_MBYTE
710static int use_multibytecode __ARGS((int c));
711#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000712static int prog_magic_wrong __ARGS((void));
713static char_u *regnext __ARGS((char_u *));
714static void regc __ARGS((int b));
715#ifdef FEAT_MBYTE
716static void regmbc __ARGS((int c));
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200717# define REGMBC(x) regmbc(x);
718# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000719#else
720# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200721# define REGMBC(x)
722# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000723#endif
724static void reginsert __ARGS((int, char_u *));
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200725static void reginsert_nr __ARGS((int op, long val, char_u *opnd));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000726static void reginsert_limits __ARGS((int, long, long, char_u *));
727static char_u *re_put_long __ARGS((char_u *pr, long_u val));
728static int read_limits __ARGS((long *, long *));
729static void regtail __ARGS((char_u *, char_u *));
730static void regoptail __ARGS((char_u *, char_u *));
731
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200732static regengine_T bt_regengine;
733static regengine_T nfa_regengine;
734
Bram Moolenaar071d4272004-06-13 20:20:40 +0000735/*
736 * Return TRUE if compiled regular expression "prog" can match a line break.
737 */
738 int
739re_multiline(prog)
740 regprog_T *prog;
741{
742 return (prog->regflags & RF_HASNL);
743}
744
745/*
746 * Return TRUE if compiled regular expression "prog" looks before the start
747 * position (pattern contains "\@<=" or "\@<!").
748 */
749 int
750re_lookbehind(prog)
751 regprog_T *prog;
752{
753 return (prog->regflags & RF_LOOKBH);
754}
755
756/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000757 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
758 * Returns a character representing the class. Zero means that no item was
759 * recognized. Otherwise "pp" is advanced to after the item.
760 */
761 static int
762get_equi_class(pp)
763 char_u **pp;
764{
765 int c;
766 int l = 1;
767 char_u *p = *pp;
768
769 if (p[1] == '=')
770 {
771#ifdef FEAT_MBYTE
772 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000773 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000774#endif
775 if (p[l + 2] == '=' && p[l + 3] == ']')
776 {
777#ifdef FEAT_MBYTE
778 if (has_mbyte)
779 c = mb_ptr2char(p + 2);
780 else
781#endif
782 c = p[2];
783 *pp += l + 4;
784 return c;
785 }
786 }
787 return 0;
788}
789
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200790#ifdef EBCDIC
791/*
792 * Table for equivalence class "c". (IBM-1047)
793 */
794char *EQUIVAL_CLASS_C[16] = {
795 "A\x62\x63\x64\x65\x66\x67",
796 "C\x68",
797 "E\x71\x72\x73\x74",
798 "I\x75\x76\x77\x78",
799 "N\x69",
800 "O\xEB\xEC\xED\xEE\xEF",
801 "U\xFB\xFC\xFD\xFE",
802 "Y\xBA",
803 "a\x42\x43\x44\x45\x46\x47",
804 "c\x48",
805 "e\x51\x52\x53\x54",
806 "i\x55\x56\x57\x58",
807 "n\x49",
808 "o\xCB\xCC\xCD\xCE\xCF",
809 "u\xDB\xDC\xDD\xDE",
810 "y\x8D\xDF",
811};
812#endif
813
Bram Moolenaardf177f62005-02-22 08:39:57 +0000814/*
815 * Produce the bytes for equivalence class "c".
816 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200817 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000818 */
819 static void
820reg_equi_class(c)
821 int c;
822{
823#ifdef FEAT_MBYTE
824 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000825 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000826#endif
827 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200828#ifdef EBCDIC
829 int i;
830
831 /* This might be slower than switch/case below. */
832 for (i = 0; i < 16; i++)
833 {
834 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
835 {
836 char *p = EQUIVAL_CLASS_C[i];
837
838 while (*p != 0)
839 regmbc(*p++);
840 return;
841 }
842 }
843#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000844 switch (c)
845 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000846 case 'A': case '\300': case '\301': case '\302':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200847 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
848 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000849 case '\303': case '\304': case '\305':
850 regmbc('A'); regmbc('\300'); regmbc('\301');
851 regmbc('\302'); regmbc('\303'); regmbc('\304');
852 regmbc('\305');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200853 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
854 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
855 REGMBC(0x1ea2)
856 return;
857 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
858 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000859 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000860 case 'C': case '\307':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200861 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000862 regmbc('C'); regmbc('\307');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200863 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
864 REGMBC(0x10c)
865 return;
866 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
867 CASEMBC(0x1e0e) CASEMBC(0x1e10)
868 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
869 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000870 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000871 case 'E': case '\310': case '\311': case '\312': case '\313':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200872 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
873 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000874 regmbc('E'); regmbc('\310'); regmbc('\311');
875 regmbc('\312'); regmbc('\313');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200876 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
877 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
878 REGMBC(0x1ebc)
879 return;
880 case 'F': CASEMBC(0x1e1e)
881 regmbc('F'); REGMBC(0x1e1e)
882 return;
883 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
884 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
885 CASEMBC(0x1e20)
886 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
887 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
888 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
889 return;
890 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
891 CASEMBC(0x1e26) CASEMBC(0x1e28)
892 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
893 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000894 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000895 case 'I': case '\314': case '\315': case '\316': case '\317':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200896 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
897 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000898 regmbc('I'); regmbc('\314'); regmbc('\315');
899 regmbc('\316'); regmbc('\317');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200900 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
901 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
902 REGMBC(0x1ec8)
903 return;
904 case 'J': CASEMBC(0x134)
905 regmbc('J'); REGMBC(0x134)
906 return;
907 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
908 CASEMBC(0x1e34)
909 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
910 REGMBC(0x1e30) REGMBC(0x1e34)
911 return;
912 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
913 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
914 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
915 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
916 REGMBC(0x1e3a)
917 return;
918 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
919 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000920 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000921 case 'N': case '\321':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200922 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
923 CASEMBC(0x1e48)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000924 regmbc('N'); regmbc('\321');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200925 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
926 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000927 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000928 case 'O': case '\322': case '\323': case '\324': case '\325':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200929 case '\326': case '\330':
930 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
931 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000932 regmbc('O'); regmbc('\322'); regmbc('\323');
933 regmbc('\324'); regmbc('\325'); regmbc('\326');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200934 regmbc('\330');
935 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
936 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
937 REGMBC(0x1ec) REGMBC(0x1ece)
938 return;
939 case 'P': case 0x1e54: case 0x1e56:
940 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
941 return;
942 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
943 CASEMBC(0x1e58) CASEMBC(0x1e5e)
944 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
945 REGMBC(0x1e58) REGMBC(0x1e5e)
946 return;
947 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
948 CASEMBC(0x160) CASEMBC(0x1e60)
949 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
950 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
951 return;
952 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
953 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
954 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
955 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000956 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000957 case 'U': case '\331': case '\332': case '\333': case '\334':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200958 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
959 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
960 CASEMBC(0x1ee6)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000961 regmbc('U'); regmbc('\331'); regmbc('\332');
962 regmbc('\333'); regmbc('\334');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200963 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
964 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
965 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
966 return;
967 case 'V': CASEMBC(0x1e7c)
968 regmbc('V'); REGMBC(0x1e7c)
969 return;
970 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
971 CASEMBC(0x1e84) CASEMBC(0x1e86)
972 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
973 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
974 return;
975 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
976 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000977 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000978 case 'Y': case '\335':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200979 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
980 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000981 regmbc('Y'); regmbc('\335');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200982 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
983 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
984 return;
985 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
986 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
987 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
988 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
989 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000990 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000991 case 'a': case '\340': case '\341': case '\342':
992 case '\343': case '\344': case '\345':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200993 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
994 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000995 regmbc('a'); regmbc('\340'); regmbc('\341');
996 regmbc('\342'); regmbc('\343'); regmbc('\344');
997 regmbc('\345');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200998 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
999 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
1000 REGMBC(0x1ea3)
1001 return;
1002 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
1003 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001004 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001005 case 'c': case '\347':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001006 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001007 regmbc('c'); regmbc('\347');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001008 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
1009 REGMBC(0x10d)
1010 return;
1011 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1d0b)
1012 CASEMBC(0x1e11)
1013 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
1014 REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001015 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001016 case 'e': case '\350': case '\351': case '\352': case '\353':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001017 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1018 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001019 regmbc('e'); regmbc('\350'); regmbc('\351');
1020 regmbc('\352'); regmbc('\353');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001021 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1022 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1023 REGMBC(0x1ebd)
1024 return;
1025 case 'f': CASEMBC(0x1e1f)
1026 regmbc('f'); REGMBC(0x1e1f)
1027 return;
1028 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1029 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1030 CASEMBC(0x1e21)
1031 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1032 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1033 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1034 return;
1035 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1036 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1037 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1038 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1039 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001040 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001041 case 'i': case '\354': case '\355': case '\356': case '\357':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001042 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1043 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001044 regmbc('i'); regmbc('\354'); regmbc('\355');
1045 regmbc('\356'); regmbc('\357');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001046 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1047 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1048 return;
1049 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1050 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1051 return;
1052 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1053 CASEMBC(0x1e35)
1054 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1055 REGMBC(0x1e31) REGMBC(0x1e35)
1056 return;
1057 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1058 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1059 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1060 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1061 REGMBC(0x1e3b)
1062 return;
1063 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1064 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001065 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001066 case 'n': case '\361':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001067 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1068 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001069 regmbc('n'); regmbc('\361');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001070 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1071 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001072 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001073 case 'o': case '\362': case '\363': case '\364': case '\365':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001074 case '\366': case '\370':
1075 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1076 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001077 regmbc('o'); regmbc('\362'); regmbc('\363');
1078 regmbc('\364'); regmbc('\365'); regmbc('\366');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001079 regmbc('\370');
1080 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1081 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1082 REGMBC(0x1ed) REGMBC(0x1ecf)
1083 return;
1084 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1085 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1086 return;
1087 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1088 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1089 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1090 REGMBC(0x1e59) REGMBC(0x1e5f)
1091 return;
1092 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1093 CASEMBC(0x161) CASEMBC(0x1e61)
1094 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1095 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1096 return;
1097 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1098 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1099 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1100 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001101 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001102 case 'u': case '\371': case '\372': case '\373': case '\374':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001103 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1104 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1105 CASEMBC(0x1ee7)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001106 regmbc('u'); regmbc('\371'); regmbc('\372');
1107 regmbc('\373'); regmbc('\374');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001108 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1109 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1110 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1111 return;
1112 case 'v': CASEMBC(0x1e7d)
1113 regmbc('v'); REGMBC(0x1e7d)
1114 return;
1115 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1116 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1117 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1118 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1119 REGMBC(0x1e98)
1120 return;
1121 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1122 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001123 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001124 case 'y': case '\375': case '\377':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001125 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1126 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001127 regmbc('y'); regmbc('\375'); regmbc('\377');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001128 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1129 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1130 return;
1131 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1132 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1133 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1134 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1135 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001136 return;
1137 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001138#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001139 }
1140 regmbc(c);
1141}
1142
1143/*
1144 * Check for a collating element "[.a.]". "pp" points to the '['.
1145 * Returns a character. Zero means that no item was recognized. Otherwise
1146 * "pp" is advanced to after the item.
1147 * Currently only single characters are recognized!
1148 */
1149 static int
1150get_coll_element(pp)
1151 char_u **pp;
1152{
1153 int c;
1154 int l = 1;
1155 char_u *p = *pp;
1156
1157 if (p[1] == '.')
1158 {
1159#ifdef FEAT_MBYTE
1160 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001161 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001162#endif
1163 if (p[l + 2] == '.' && p[l + 3] == ']')
1164 {
1165#ifdef FEAT_MBYTE
1166 if (has_mbyte)
1167 c = mb_ptr2char(p + 2);
1168 else
1169#endif
1170 c = p[2];
1171 *pp += l + 4;
1172 return c;
1173 }
1174 }
1175 return 0;
1176}
1177
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001178static void get_cpo_flags __ARGS((void));
1179static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1180static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1181
1182 static void
1183get_cpo_flags()
1184{
1185 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1186 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1187}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001188
1189/*
1190 * Skip over a "[]" range.
1191 * "p" must point to the character after the '['.
1192 * The returned pointer is on the matching ']', or the terminating NUL.
1193 */
1194 static char_u *
1195skip_anyof(p)
1196 char_u *p;
1197{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001198#ifdef FEAT_MBYTE
1199 int l;
1200#endif
1201
Bram Moolenaardf177f62005-02-22 08:39:57 +00001202 if (*p == '^') /* Complement of range. */
1203 ++p;
1204 if (*p == ']' || *p == '-')
1205 ++p;
1206 while (*p != NUL && *p != ']')
1207 {
1208#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001209 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001210 p += l;
1211 else
1212#endif
1213 if (*p == '-')
1214 {
1215 ++p;
1216 if (*p != ']' && *p != NUL)
1217 mb_ptr_adv(p);
1218 }
1219 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001220 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001221 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001222 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001223 p += 2;
1224 else if (*p == '[')
1225 {
1226 if (get_char_class(&p) == CLASS_NONE
1227 && get_equi_class(&p) == 0
1228 && get_coll_element(&p) == 0)
1229 ++p; /* It was not a class name */
1230 }
1231 else
1232 ++p;
1233 }
1234
1235 return p;
1236}
1237
1238/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001239 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001240 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001241 * Take care of characters with a backslash in front of it.
1242 * Skip strings inside [ and ].
1243 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1244 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1245 * is changed in-place.
1246 */
1247 char_u *
1248skip_regexp(startp, dirc, magic, newp)
1249 char_u *startp;
1250 int dirc;
1251 int magic;
1252 char_u **newp;
1253{
1254 int mymagic;
1255 char_u *p = startp;
1256
1257 if (magic)
1258 mymagic = MAGIC_ON;
1259 else
1260 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001261 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001263 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001264 {
1265 if (p[0] == dirc) /* found end of regexp */
1266 break;
1267 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1268 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1269 {
1270 p = skip_anyof(p + 1);
1271 if (p[0] == NUL)
1272 break;
1273 }
1274 else if (p[0] == '\\' && p[1] != NUL)
1275 {
1276 if (dirc == '?' && newp != NULL && p[1] == '?')
1277 {
1278 /* change "\?" to "?", make a copy first. */
1279 if (*newp == NULL)
1280 {
1281 *newp = vim_strsave(startp);
1282 if (*newp != NULL)
1283 p = *newp + (p - startp);
1284 }
1285 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001286 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001287 else
1288 ++p;
1289 }
1290 else
1291 ++p; /* skip next character */
1292 if (*p == 'v')
1293 mymagic = MAGIC_ALL;
1294 else if (*p == 'V')
1295 mymagic = MAGIC_NONE;
1296 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001297 }
1298 return p;
1299}
1300
Bram Moolenaar473de612013-06-08 18:19:48 +02001301static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
1302static void bt_regfree __ARGS((regprog_T *prog));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001303
Bram Moolenaar071d4272004-06-13 20:20:40 +00001304/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001305 * bt_regcomp() - compile a regular expression into internal code for the
1306 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001307 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001308 *
1309 * We can't allocate space until we know how big the compiled form will be,
1310 * but we can't compile it (and thus know how big it is) until we've got a
1311 * place to put the code. So we cheat: we compile it twice, once with code
1312 * generation turned off and size counting turned on, and once "for real".
1313 * This also means that we don't allocate space until we are sure that the
1314 * thing really will compile successfully, and we never have to move the
1315 * code and thus invalidate pointers into it. (Note that it has to be in
1316 * one piece because vim_free() must be able to free it all.)
1317 *
1318 * Whether upper/lower case is to be ignored is decided when executing the
1319 * program, it does not matter here.
1320 *
1321 * Beware that the optimization-preparation code in here knows about some
1322 * of the structure of the compiled regexp.
1323 * "re_flags": RE_MAGIC and/or RE_STRING.
1324 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001325 static regprog_T *
1326bt_regcomp(expr, re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001327 char_u *expr;
1328 int re_flags;
1329{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001330 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001331 char_u *scan;
1332 char_u *longest;
1333 int len;
1334 int flags;
1335
1336 if (expr == NULL)
1337 EMSG_RET_NULL(_(e_null));
1338
1339 init_class_tab();
1340
1341 /*
1342 * First pass: determine size, legality.
1343 */
1344 regcomp_start(expr, re_flags);
1345 regcode = JUST_CALC_SIZE;
1346 regc(REGMAGIC);
1347 if (reg(REG_NOPAREN, &flags) == NULL)
1348 return NULL;
1349
1350 /* Small enough for pointer-storage convention? */
1351#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1352 if (regsize >= 65536L - 256L)
1353 EMSG_RET_NULL(_("E339: Pattern too long"));
1354#endif
1355
1356 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001357 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001358 if (r == NULL)
1359 return NULL;
1360
1361 /*
1362 * Second pass: emit code.
1363 */
1364 regcomp_start(expr, re_flags);
1365 regcode = r->program;
1366 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001367 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001368 {
1369 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001370 if (reg_toolong)
1371 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001372 return NULL;
1373 }
1374
1375 /* Dig out information for optimizations. */
1376 r->regstart = NUL; /* Worst-case defaults. */
1377 r->reganch = 0;
1378 r->regmust = NULL;
1379 r->regmlen = 0;
1380 r->regflags = regflags;
1381 if (flags & HASNL)
1382 r->regflags |= RF_HASNL;
1383 if (flags & HASLOOKBH)
1384 r->regflags |= RF_LOOKBH;
1385#ifdef FEAT_SYN_HL
1386 /* Remember whether this pattern has any \z specials in it. */
1387 r->reghasz = re_has_z;
1388#endif
1389 scan = r->program + 1; /* First BRANCH. */
1390 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1391 {
1392 scan = OPERAND(scan);
1393
1394 /* Starting-point info. */
1395 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1396 {
1397 r->reganch++;
1398 scan = regnext(scan);
1399 }
1400
1401 if (OP(scan) == EXACTLY)
1402 {
1403#ifdef FEAT_MBYTE
1404 if (has_mbyte)
1405 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1406 else
1407#endif
1408 r->regstart = *OPERAND(scan);
1409 }
1410 else if ((OP(scan) == BOW
1411 || OP(scan) == EOW
1412 || OP(scan) == NOTHING
1413 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1414 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1415 && OP(regnext(scan)) == EXACTLY)
1416 {
1417#ifdef FEAT_MBYTE
1418 if (has_mbyte)
1419 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1420 else
1421#endif
1422 r->regstart = *OPERAND(regnext(scan));
1423 }
1424
1425 /*
1426 * If there's something expensive in the r.e., find the longest
1427 * literal string that must appear and make it the regmust. Resolve
1428 * ties in favor of later strings, since the regstart check works
1429 * with the beginning of the r.e. and avoiding duplication
1430 * strengthens checking. Not a strong reason, but sufficient in the
1431 * absence of others.
1432 */
1433 /*
1434 * When the r.e. starts with BOW, it is faster to look for a regmust
1435 * first. Used a lot for "#" and "*" commands. (Added by mool).
1436 */
1437 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1438 && !(flags & HASNL))
1439 {
1440 longest = NULL;
1441 len = 0;
1442 for (; scan != NULL; scan = regnext(scan))
1443 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1444 {
1445 longest = OPERAND(scan);
1446 len = (int)STRLEN(OPERAND(scan));
1447 }
1448 r->regmust = longest;
1449 r->regmlen = len;
1450 }
1451 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001452#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001453 regdump(expr, r);
1454#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001455 r->engine = &bt_regengine;
1456 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001457}
1458
1459/*
Bram Moolenaar473de612013-06-08 18:19:48 +02001460 * Free a compiled regexp program, returned by bt_regcomp().
1461 */
1462 static void
1463bt_regfree(prog)
1464 regprog_T *prog;
1465{
1466 vim_free(prog);
1467}
1468
1469/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001470 * Setup to parse the regexp. Used once to get the length and once to do it.
1471 */
1472 static void
1473regcomp_start(expr, re_flags)
1474 char_u *expr;
1475 int re_flags; /* see vim_regcomp() */
1476{
1477 initchr(expr);
1478 if (re_flags & RE_MAGIC)
1479 reg_magic = MAGIC_ON;
1480 else
1481 reg_magic = MAGIC_OFF;
1482 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001483 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001484 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001485
1486 num_complex_braces = 0;
1487 regnpar = 1;
1488 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1489#ifdef FEAT_SYN_HL
1490 regnzpar = 1;
1491 re_has_z = 0;
1492#endif
1493 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001494 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001495 regflags = 0;
1496#if defined(FEAT_SYN_HL) || defined(PROTO)
1497 had_eol = FALSE;
1498#endif
1499}
1500
1501#if defined(FEAT_SYN_HL) || defined(PROTO)
1502/*
1503 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1504 * found. This is messy, but it works fine.
1505 */
1506 int
1507vim_regcomp_had_eol()
1508{
1509 return had_eol;
1510}
1511#endif
1512
1513/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001514 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001515 *
1516 * Caller must absorb opening parenthesis.
1517 *
1518 * Combining parenthesis handling with the base level of regular expression
1519 * is a trifle forced, but the need to tie the tails of the branches to what
1520 * follows makes it hard to avoid.
1521 */
1522 static char_u *
1523reg(paren, flagp)
1524 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1525 int *flagp;
1526{
1527 char_u *ret;
1528 char_u *br;
1529 char_u *ender;
1530 int parno = 0;
1531 int flags;
1532
1533 *flagp = HASWIDTH; /* Tentatively. */
1534
1535#ifdef FEAT_SYN_HL
1536 if (paren == REG_ZPAREN)
1537 {
1538 /* Make a ZOPEN node. */
1539 if (regnzpar >= NSUBEXP)
1540 EMSG_RET_NULL(_("E50: Too many \\z("));
1541 parno = regnzpar;
1542 regnzpar++;
1543 ret = regnode(ZOPEN + parno);
1544 }
1545 else
1546#endif
1547 if (paren == REG_PAREN)
1548 {
1549 /* Make a MOPEN node. */
1550 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001551 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001552 parno = regnpar;
1553 ++regnpar;
1554 ret = regnode(MOPEN + parno);
1555 }
1556 else if (paren == REG_NPAREN)
1557 {
1558 /* Make a NOPEN node. */
1559 ret = regnode(NOPEN);
1560 }
1561 else
1562 ret = NULL;
1563
1564 /* Pick up the branches, linking them together. */
1565 br = regbranch(&flags);
1566 if (br == NULL)
1567 return NULL;
1568 if (ret != NULL)
1569 regtail(ret, br); /* [MZ]OPEN -> first. */
1570 else
1571 ret = br;
1572 /* If one of the branches can be zero-width, the whole thing can.
1573 * If one of the branches has * at start or matches a line-break, the
1574 * whole thing can. */
1575 if (!(flags & HASWIDTH))
1576 *flagp &= ~HASWIDTH;
1577 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1578 while (peekchr() == Magic('|'))
1579 {
1580 skipchr();
1581 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001582 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001583 return NULL;
1584 regtail(ret, br); /* BRANCH -> BRANCH. */
1585 if (!(flags & HASWIDTH))
1586 *flagp &= ~HASWIDTH;
1587 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1588 }
1589
1590 /* Make a closing node, and hook it on the end. */
1591 ender = regnode(
1592#ifdef FEAT_SYN_HL
1593 paren == REG_ZPAREN ? ZCLOSE + parno :
1594#endif
1595 paren == REG_PAREN ? MCLOSE + parno :
1596 paren == REG_NPAREN ? NCLOSE : END);
1597 regtail(ret, ender);
1598
1599 /* Hook the tails of the branches to the closing node. */
1600 for (br = ret; br != NULL; br = regnext(br))
1601 regoptail(br, ender);
1602
1603 /* Check for proper termination. */
1604 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1605 {
1606#ifdef FEAT_SYN_HL
1607 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001608 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001609 else
1610#endif
1611 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001612 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001613 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001614 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001615 }
1616 else if (paren == REG_NOPAREN && peekchr() != NUL)
1617 {
1618 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001619 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001620 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001621 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001622 /* NOTREACHED */
1623 }
1624 /*
1625 * Here we set the flag allowing back references to this set of
1626 * parentheses.
1627 */
1628 if (paren == REG_PAREN)
1629 had_endbrace[parno] = TRUE; /* have seen the close paren */
1630 return ret;
1631}
1632
1633/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001634 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001635 * Implements the & operator.
1636 */
1637 static char_u *
1638regbranch(flagp)
1639 int *flagp;
1640{
1641 char_u *ret;
1642 char_u *chain = NULL;
1643 char_u *latest;
1644 int flags;
1645
1646 *flagp = WORST | HASNL; /* Tentatively. */
1647
1648 ret = regnode(BRANCH);
1649 for (;;)
1650 {
1651 latest = regconcat(&flags);
1652 if (latest == NULL)
1653 return NULL;
1654 /* If one of the branches has width, the whole thing has. If one of
1655 * the branches anchors at start-of-line, the whole thing does.
1656 * If one of the branches uses look-behind, the whole thing does. */
1657 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1658 /* If one of the branches doesn't match a line-break, the whole thing
1659 * doesn't. */
1660 *flagp &= ~HASNL | (flags & HASNL);
1661 if (chain != NULL)
1662 regtail(chain, latest);
1663 if (peekchr() != Magic('&'))
1664 break;
1665 skipchr();
1666 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001667 if (reg_toolong)
1668 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001669 reginsert(MATCH, latest);
1670 chain = latest;
1671 }
1672
1673 return ret;
1674}
1675
1676/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001677 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001678 * Implements the concatenation operator.
1679 */
1680 static char_u *
1681regconcat(flagp)
1682 int *flagp;
1683{
1684 char_u *first = NULL;
1685 char_u *chain = NULL;
1686 char_u *latest;
1687 int flags;
1688 int cont = TRUE;
1689
1690 *flagp = WORST; /* Tentatively. */
1691
1692 while (cont)
1693 {
1694 switch (peekchr())
1695 {
1696 case NUL:
1697 case Magic('|'):
1698 case Magic('&'):
1699 case Magic(')'):
1700 cont = FALSE;
1701 break;
1702 case Magic('Z'):
1703#ifdef FEAT_MBYTE
1704 regflags |= RF_ICOMBINE;
1705#endif
1706 skipchr_keepstart();
1707 break;
1708 case Magic('c'):
1709 regflags |= RF_ICASE;
1710 skipchr_keepstart();
1711 break;
1712 case Magic('C'):
1713 regflags |= RF_NOICASE;
1714 skipchr_keepstart();
1715 break;
1716 case Magic('v'):
1717 reg_magic = MAGIC_ALL;
1718 skipchr_keepstart();
1719 curchr = -1;
1720 break;
1721 case Magic('m'):
1722 reg_magic = MAGIC_ON;
1723 skipchr_keepstart();
1724 curchr = -1;
1725 break;
1726 case Magic('M'):
1727 reg_magic = MAGIC_OFF;
1728 skipchr_keepstart();
1729 curchr = -1;
1730 break;
1731 case Magic('V'):
1732 reg_magic = MAGIC_NONE;
1733 skipchr_keepstart();
1734 curchr = -1;
1735 break;
1736 default:
1737 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001738 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001739 return NULL;
1740 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1741 if (chain == NULL) /* First piece. */
1742 *flagp |= flags & SPSTART;
1743 else
1744 regtail(chain, latest);
1745 chain = latest;
1746 if (first == NULL)
1747 first = latest;
1748 break;
1749 }
1750 }
1751 if (first == NULL) /* Loop ran zero times. */
1752 first = regnode(NOTHING);
1753 return first;
1754}
1755
1756/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001757 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001758 *
1759 * Note that the branching code sequences used for = and the general cases
1760 * of * and + are somewhat optimized: they use the same NOTHING node as
1761 * both the endmarker for their branch list and the body of the last branch.
1762 * It might seem that this node could be dispensed with entirely, but the
1763 * endmarker role is not redundant.
1764 */
1765 static char_u *
1766regpiece(flagp)
1767 int *flagp;
1768{
1769 char_u *ret;
1770 int op;
1771 char_u *next;
1772 int flags;
1773 long minval;
1774 long maxval;
1775
1776 ret = regatom(&flags);
1777 if (ret == NULL)
1778 return NULL;
1779
1780 op = peekchr();
1781 if (re_multi_type(op) == NOT_MULTI)
1782 {
1783 *flagp = flags;
1784 return ret;
1785 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001786 /* default flags */
1787 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1788
1789 skipchr();
1790 switch (op)
1791 {
1792 case Magic('*'):
1793 if (flags & SIMPLE)
1794 reginsert(STAR, ret);
1795 else
1796 {
1797 /* Emit x* as (x&|), where & means "self". */
1798 reginsert(BRANCH, ret); /* Either x */
1799 regoptail(ret, regnode(BACK)); /* and loop */
1800 regoptail(ret, ret); /* back */
1801 regtail(ret, regnode(BRANCH)); /* or */
1802 regtail(ret, regnode(NOTHING)); /* null. */
1803 }
1804 break;
1805
1806 case Magic('+'):
1807 if (flags & SIMPLE)
1808 reginsert(PLUS, ret);
1809 else
1810 {
1811 /* Emit x+ as x(&|), where & means "self". */
1812 next = regnode(BRANCH); /* Either */
1813 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001814 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001815 regtail(next, regnode(BRANCH)); /* or */
1816 regtail(ret, regnode(NOTHING)); /* null. */
1817 }
1818 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1819 break;
1820
1821 case Magic('@'):
1822 {
1823 int lop = END;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001824 int nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001825
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001826 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001827 switch (no_Magic(getchr()))
1828 {
1829 case '=': lop = MATCH; break; /* \@= */
1830 case '!': lop = NOMATCH; break; /* \@! */
1831 case '>': lop = SUBPAT; break; /* \@> */
1832 case '<': switch (no_Magic(getchr()))
1833 {
1834 case '=': lop = BEHIND; break; /* \@<= */
1835 case '!': lop = NOBEHIND; break; /* \@<! */
1836 }
1837 }
1838 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001839 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001840 reg_magic == MAGIC_ALL);
1841 /* Look behind must match with behind_pos. */
1842 if (lop == BEHIND || lop == NOBEHIND)
1843 {
1844 regtail(ret, regnode(BHPOS));
1845 *flagp |= HASLOOKBH;
1846 }
1847 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001848 if (lop == BEHIND || lop == NOBEHIND)
1849 {
1850 if (nr < 0)
1851 nr = 0; /* no limit is same as zero limit */
1852 reginsert_nr(lop, nr, ret);
1853 }
1854 else
1855 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001856 break;
1857 }
1858
1859 case Magic('?'):
1860 case Magic('='):
1861 /* Emit x= as (x|) */
1862 reginsert(BRANCH, ret); /* Either x */
1863 regtail(ret, regnode(BRANCH)); /* or */
1864 next = regnode(NOTHING); /* null. */
1865 regtail(ret, next);
1866 regoptail(ret, next);
1867 break;
1868
1869 case Magic('{'):
1870 if (!read_limits(&minval, &maxval))
1871 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001872 if (flags & SIMPLE)
1873 {
1874 reginsert(BRACE_SIMPLE, ret);
1875 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1876 }
1877 else
1878 {
1879 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001880 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001881 reg_magic == MAGIC_ALL);
1882 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1883 regoptail(ret, regnode(BACK));
1884 regoptail(ret, ret);
1885 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1886 ++num_complex_braces;
1887 }
1888 if (minval > 0 && maxval > 0)
1889 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1890 break;
1891 }
1892 if (re_multi_type(peekchr()) != NOT_MULTI)
1893 {
1894 /* Can't have a multi follow a multi. */
1895 if (peekchr() == Magic('*'))
1896 sprintf((char *)IObuff, _("E61: Nested %s*"),
1897 reg_magic >= MAGIC_ON ? "" : "\\");
1898 else
1899 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1900 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1901 EMSG_RET_NULL(IObuff);
1902 }
1903
1904 return ret;
1905}
1906
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001907/* When making changes to classchars also change nfa_classcodes. */
1908static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1909static int classcodes[] = {
1910 ANY, IDENT, SIDENT, KWORD, SKWORD,
1911 FNAME, SFNAME, PRINT, SPRINT,
1912 WHITE, NWHITE, DIGIT, NDIGIT,
1913 HEX, NHEX, OCTAL, NOCTAL,
1914 WORD, NWORD, HEAD, NHEAD,
1915 ALPHA, NALPHA, LOWER, NLOWER,
1916 UPPER, NUPPER
1917};
1918
Bram Moolenaar071d4272004-06-13 20:20:40 +00001919/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001920 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001921 *
1922 * Optimization: gobbles an entire sequence of ordinary characters so that
1923 * it can turn them into a single node, which is smaller to store and
1924 * faster to run. Don't do this when one_exactly is set.
1925 */
1926 static char_u *
1927regatom(flagp)
1928 int *flagp;
1929{
1930 char_u *ret;
1931 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001932 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001933 char_u *p;
1934 int extra = 0;
1935
1936 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001937
1938 c = getchr();
1939 switch (c)
1940 {
1941 case Magic('^'):
1942 ret = regnode(BOL);
1943 break;
1944
1945 case Magic('$'):
1946 ret = regnode(EOL);
1947#if defined(FEAT_SYN_HL) || defined(PROTO)
1948 had_eol = TRUE;
1949#endif
1950 break;
1951
1952 case Magic('<'):
1953 ret = regnode(BOW);
1954 break;
1955
1956 case Magic('>'):
1957 ret = regnode(EOW);
1958 break;
1959
1960 case Magic('_'):
1961 c = no_Magic(getchr());
1962 if (c == '^') /* "\_^" is start-of-line */
1963 {
1964 ret = regnode(BOL);
1965 break;
1966 }
1967 if (c == '$') /* "\_$" is end-of-line */
1968 {
1969 ret = regnode(EOL);
1970#if defined(FEAT_SYN_HL) || defined(PROTO)
1971 had_eol = TRUE;
1972#endif
1973 break;
1974 }
1975
1976 extra = ADD_NL;
1977 *flagp |= HASNL;
1978
1979 /* "\_[" is character range plus newline */
1980 if (c == '[')
1981 goto collection;
1982
1983 /* "\_x" is character class plus newline */
1984 /*FALLTHROUGH*/
1985
1986 /*
1987 * Character classes.
1988 */
1989 case Magic('.'):
1990 case Magic('i'):
1991 case Magic('I'):
1992 case Magic('k'):
1993 case Magic('K'):
1994 case Magic('f'):
1995 case Magic('F'):
1996 case Magic('p'):
1997 case Magic('P'):
1998 case Magic('s'):
1999 case Magic('S'):
2000 case Magic('d'):
2001 case Magic('D'):
2002 case Magic('x'):
2003 case Magic('X'):
2004 case Magic('o'):
2005 case Magic('O'):
2006 case Magic('w'):
2007 case Magic('W'):
2008 case Magic('h'):
2009 case Magic('H'):
2010 case Magic('a'):
2011 case Magic('A'):
2012 case Magic('l'):
2013 case Magic('L'):
2014 case Magic('u'):
2015 case Magic('U'):
2016 p = vim_strchr(classchars, no_Magic(c));
2017 if (p == NULL)
2018 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002019#ifdef FEAT_MBYTE
2020 /* When '.' is followed by a composing char ignore the dot, so that
2021 * the composing char is matched here. */
2022 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2023 {
2024 c = getchr();
2025 goto do_multibyte;
2026 }
2027#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002028 ret = regnode(classcodes[p - classchars] + extra);
2029 *flagp |= HASWIDTH | SIMPLE;
2030 break;
2031
2032 case Magic('n'):
2033 if (reg_string)
2034 {
2035 /* In a string "\n" matches a newline character. */
2036 ret = regnode(EXACTLY);
2037 regc(NL);
2038 regc(NUL);
2039 *flagp |= HASWIDTH | SIMPLE;
2040 }
2041 else
2042 {
2043 /* In buffer text "\n" matches the end of a line. */
2044 ret = regnode(NEWL);
2045 *flagp |= HASWIDTH | HASNL;
2046 }
2047 break;
2048
2049 case Magic('('):
2050 if (one_exactly)
2051 EMSG_ONE_RET_NULL;
2052 ret = reg(REG_PAREN, &flags);
2053 if (ret == NULL)
2054 return NULL;
2055 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2056 break;
2057
2058 case NUL:
2059 case Magic('|'):
2060 case Magic('&'):
2061 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002062 if (one_exactly)
2063 EMSG_ONE_RET_NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002064 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
2065 /* NOTREACHED */
2066
2067 case Magic('='):
2068 case Magic('?'):
2069 case Magic('+'):
2070 case Magic('@'):
2071 case Magic('{'):
2072 case Magic('*'):
2073 c = no_Magic(c);
2074 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2075 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2076 ? "" : "\\", c);
2077 EMSG_RET_NULL(IObuff);
2078 /* NOTREACHED */
2079
2080 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002081 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002082 {
2083 char_u *lp;
2084
2085 ret = regnode(EXACTLY);
2086 lp = reg_prev_sub;
2087 while (*lp != NUL)
2088 regc(*lp++);
2089 regc(NUL);
2090 if (*reg_prev_sub != NUL)
2091 {
2092 *flagp |= HASWIDTH;
2093 if ((lp - reg_prev_sub) == 1)
2094 *flagp |= SIMPLE;
2095 }
2096 }
2097 else
2098 EMSG_RET_NULL(_(e_nopresub));
2099 break;
2100
2101 case Magic('1'):
2102 case Magic('2'):
2103 case Magic('3'):
2104 case Magic('4'):
2105 case Magic('5'):
2106 case Magic('6'):
2107 case Magic('7'):
2108 case Magic('8'):
2109 case Magic('9'):
2110 {
2111 int refnum;
2112
2113 refnum = c - Magic('0');
2114 /*
2115 * Check if the back reference is legal. We must have seen the
2116 * close brace.
2117 * TODO: Should also check that we don't refer to something
2118 * that is repeated (+*=): what instance of the repetition
2119 * should we match?
2120 */
2121 if (!had_endbrace[refnum])
2122 {
2123 /* Trick: check if "@<=" or "@<!" follows, in which case
2124 * the \1 can appear before the referenced match. */
2125 for (p = regparse; *p != NUL; ++p)
2126 if (p[0] == '@' && p[1] == '<'
2127 && (p[2] == '!' || p[2] == '='))
2128 break;
2129 if (*p == NUL)
2130 EMSG_RET_NULL(_("E65: Illegal back reference"));
2131 }
2132 ret = regnode(BACKREF + refnum);
2133 }
2134 break;
2135
Bram Moolenaar071d4272004-06-13 20:20:40 +00002136 case Magic('z'):
2137 {
2138 c = no_Magic(getchr());
2139 switch (c)
2140 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002141#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002142 case '(': if (reg_do_extmatch != REX_SET)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002143 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002144 if (one_exactly)
2145 EMSG_ONE_RET_NULL;
2146 ret = reg(REG_ZPAREN, &flags);
2147 if (ret == NULL)
2148 return NULL;
2149 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2150 re_has_z = REX_SET;
2151 break;
2152
2153 case '1':
2154 case '2':
2155 case '3':
2156 case '4':
2157 case '5':
2158 case '6':
2159 case '7':
2160 case '8':
2161 case '9': if (reg_do_extmatch != REX_USE)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002162 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002163 ret = regnode(ZREF + c - '0');
2164 re_has_z = REX_USE;
2165 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002166#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002167
2168 case 's': ret = regnode(MOPEN + 0);
2169 break;
2170
2171 case 'e': ret = regnode(MCLOSE + 0);
2172 break;
2173
2174 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2175 }
2176 }
2177 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002178
2179 case Magic('%'):
2180 {
2181 c = no_Magic(getchr());
2182 switch (c)
2183 {
2184 /* () without a back reference */
2185 case '(':
2186 if (one_exactly)
2187 EMSG_ONE_RET_NULL;
2188 ret = reg(REG_NPAREN, &flags);
2189 if (ret == NULL)
2190 return NULL;
2191 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2192 break;
2193
2194 /* Catch \%^ and \%$ regardless of where they appear in the
2195 * pattern -- regardless of whether or not it makes sense. */
2196 case '^':
2197 ret = regnode(RE_BOF);
2198 break;
2199
2200 case '$':
2201 ret = regnode(RE_EOF);
2202 break;
2203
2204 case '#':
2205 ret = regnode(CURSOR);
2206 break;
2207
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002208 case 'V':
2209 ret = regnode(RE_VISUAL);
2210 break;
2211
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02002212 case 'C':
2213 ret = regnode(RE_COMPOSING);
2214 break;
2215
Bram Moolenaar071d4272004-06-13 20:20:40 +00002216 /* \%[abc]: Emit as a list of branches, all ending at the last
2217 * branch which matches nothing. */
2218 case '[':
2219 if (one_exactly) /* doesn't nest */
2220 EMSG_ONE_RET_NULL;
2221 {
2222 char_u *lastbranch;
2223 char_u *lastnode = NULL;
2224 char_u *br;
2225
2226 ret = NULL;
2227 while ((c = getchr()) != ']')
2228 {
2229 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002230 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002231 reg_magic == MAGIC_ALL);
2232 br = regnode(BRANCH);
2233 if (ret == NULL)
2234 ret = br;
2235 else
2236 regtail(lastnode, br);
2237
2238 ungetchr();
2239 one_exactly = TRUE;
2240 lastnode = regatom(flagp);
2241 one_exactly = FALSE;
2242 if (lastnode == NULL)
2243 return NULL;
2244 }
2245 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002246 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002247 reg_magic == MAGIC_ALL);
2248 lastbranch = regnode(BRANCH);
2249 br = regnode(NOTHING);
2250 if (ret != JUST_CALC_SIZE)
2251 {
2252 regtail(lastnode, br);
2253 regtail(lastbranch, br);
2254 /* connect all branches to the NOTHING
2255 * branch at the end */
2256 for (br = ret; br != lastnode; )
2257 {
2258 if (OP(br) == BRANCH)
2259 {
2260 regtail(br, lastbranch);
2261 br = OPERAND(br);
2262 }
2263 else
2264 br = regnext(br);
2265 }
2266 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002267 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002268 break;
2269 }
2270
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002271 case 'd': /* %d123 decimal */
2272 case 'o': /* %o123 octal */
2273 case 'x': /* %xab hex 2 */
2274 case 'u': /* %uabcd hex 4 */
2275 case 'U': /* %U1234abcd hex 8 */
2276 {
2277 int i;
2278
2279 switch (c)
2280 {
2281 case 'd': i = getdecchrs(); break;
2282 case 'o': i = getoctchrs(); break;
2283 case 'x': i = gethexchrs(2); break;
2284 case 'u': i = gethexchrs(4); break;
2285 case 'U': i = gethexchrs(8); break;
2286 default: i = -1; break;
2287 }
2288
2289 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002290 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002291 _("E678: Invalid character after %s%%[dxouU]"),
2292 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002293#ifdef FEAT_MBYTE
2294 if (use_multibytecode(i))
2295 ret = regnode(MULTIBYTECODE);
2296 else
2297#endif
2298 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002299 if (i == 0)
2300 regc(0x0a);
2301 else
2302#ifdef FEAT_MBYTE
2303 regmbc(i);
2304#else
2305 regc(i);
2306#endif
2307 regc(NUL);
2308 *flagp |= HASWIDTH;
2309 break;
2310 }
2311
Bram Moolenaar071d4272004-06-13 20:20:40 +00002312 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002313 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2314 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002315 {
2316 long_u n = 0;
2317 int cmp;
2318
2319 cmp = c;
2320 if (cmp == '<' || cmp == '>')
2321 c = getchr();
2322 while (VIM_ISDIGIT(c))
2323 {
2324 n = n * 10 + (c - '0');
2325 c = getchr();
2326 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002327 if (c == '\'' && n == 0)
2328 {
2329 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2330 c = getchr();
2331 ret = regnode(RE_MARK);
2332 if (ret == JUST_CALC_SIZE)
2333 regsize += 2;
2334 else
2335 {
2336 *regcode++ = c;
2337 *regcode++ = cmp;
2338 }
2339 break;
2340 }
2341 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002342 {
2343 if (c == 'l')
2344 ret = regnode(RE_LNUM);
2345 else if (c == 'c')
2346 ret = regnode(RE_COL);
2347 else
2348 ret = regnode(RE_VCOL);
2349 if (ret == JUST_CALC_SIZE)
2350 regsize += 5;
2351 else
2352 {
2353 /* put the number and the optional
2354 * comparator after the opcode */
2355 regcode = re_put_long(regcode, n);
2356 *regcode++ = cmp;
2357 }
2358 break;
2359 }
2360 }
2361
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002362 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002363 reg_magic == MAGIC_ALL);
2364 }
2365 }
2366 break;
2367
2368 case Magic('['):
2369collection:
2370 {
2371 char_u *lp;
2372
2373 /*
2374 * If there is no matching ']', we assume the '[' is a normal
2375 * character. This makes 'incsearch' and ":help [" work.
2376 */
2377 lp = skip_anyof(regparse);
2378 if (*lp == ']') /* there is a matching ']' */
2379 {
2380 int startc = -1; /* > 0 when next '-' is a range */
2381 int endc;
2382
2383 /*
2384 * In a character class, different parsing rules apply.
2385 * Not even \ is special anymore, nothing is.
2386 */
2387 if (*regparse == '^') /* Complement of range. */
2388 {
2389 ret = regnode(ANYBUT + extra);
2390 regparse++;
2391 }
2392 else
2393 ret = regnode(ANYOF + extra);
2394
2395 /* At the start ']' and '-' mean the literal character. */
2396 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002397 {
2398 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002399 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002400 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002401
2402 while (*regparse != NUL && *regparse != ']')
2403 {
2404 if (*regparse == '-')
2405 {
2406 ++regparse;
2407 /* The '-' is not used for a range at the end and
2408 * after or before a '\n'. */
2409 if (*regparse == ']' || *regparse == NUL
2410 || startc == -1
2411 || (regparse[0] == '\\' && regparse[1] == 'n'))
2412 {
2413 regc('-');
2414 startc = '-'; /* [--x] is a range */
2415 }
2416 else
2417 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002418 /* Also accept "a-[.z.]" */
2419 endc = 0;
2420 if (*regparse == '[')
2421 endc = get_coll_element(&regparse);
2422 if (endc == 0)
2423 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002424#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002425 if (has_mbyte)
2426 endc = mb_ptr2char_adv(&regparse);
2427 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002428#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002429 endc = *regparse++;
2430 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002431
2432 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002433 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002434 endc = coll_get_char();
2435
Bram Moolenaar071d4272004-06-13 20:20:40 +00002436 if (startc > endc)
2437 EMSG_RET_NULL(_(e_invrange));
2438#ifdef FEAT_MBYTE
2439 if (has_mbyte && ((*mb_char2len)(startc) > 1
2440 || (*mb_char2len)(endc) > 1))
2441 {
2442 /* Limit to a range of 256 chars */
2443 if (endc > startc + 256)
2444 EMSG_RET_NULL(_(e_invrange));
2445 while (++startc <= endc)
2446 regmbc(startc);
2447 }
2448 else
2449#endif
2450 {
2451#ifdef EBCDIC
2452 int alpha_only = FALSE;
2453
2454 /* for alphabetical range skip the gaps
2455 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2456 if (isalpha(startc) && isalpha(endc))
2457 alpha_only = TRUE;
2458#endif
2459 while (++startc <= endc)
2460#ifdef EBCDIC
2461 if (!alpha_only || isalpha(startc))
2462#endif
2463 regc(startc);
2464 }
2465 startc = -1;
2466 }
2467 }
2468 /*
2469 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2470 * accepts "\t", "\e", etc., but only when the 'l' flag in
2471 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002472 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002473 */
2474 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002475 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002476 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002477 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002478 && vim_strchr(REGEXP_ABBR,
2479 regparse[1]) != NULL)))
2480 {
2481 regparse++;
2482 if (*regparse == 'n')
2483 {
2484 /* '\n' in range: also match NL */
2485 if (ret != JUST_CALC_SIZE)
2486 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002487 /* Using \n inside [^] does not change what
2488 * matches. "[^\n]" is the same as ".". */
2489 if (*ret == ANYOF)
2490 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002491 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002492 *flagp |= HASNL;
2493 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002494 /* else: must have had a \n already */
2495 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002496 regparse++;
2497 startc = -1;
2498 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002499 else if (*regparse == 'd'
2500 || *regparse == 'o'
2501 || *regparse == 'x'
2502 || *regparse == 'u'
2503 || *regparse == 'U')
2504 {
2505 startc = coll_get_char();
2506 if (startc == 0)
2507 regc(0x0a);
2508 else
2509#ifdef FEAT_MBYTE
2510 regmbc(startc);
2511#else
2512 regc(startc);
2513#endif
2514 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002515 else
2516 {
2517 startc = backslash_trans(*regparse++);
2518 regc(startc);
2519 }
2520 }
2521 else if (*regparse == '[')
2522 {
2523 int c_class;
2524 int cu;
2525
Bram Moolenaardf177f62005-02-22 08:39:57 +00002526 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002527 startc = -1;
2528 /* Characters assumed to be 8 bits! */
2529 switch (c_class)
2530 {
2531 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002532 c_class = get_equi_class(&regparse);
2533 if (c_class != 0)
2534 {
2535 /* produce equivalence class */
2536 reg_equi_class(c_class);
2537 }
2538 else if ((c_class =
2539 get_coll_element(&regparse)) != 0)
2540 {
2541 /* produce a collating element */
2542 regmbc(c_class);
2543 }
2544 else
2545 {
2546 /* literal '[', allow [[-x] as a range */
2547 startc = *regparse++;
2548 regc(startc);
2549 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002550 break;
2551 case CLASS_ALNUM:
2552 for (cu = 1; cu <= 255; cu++)
2553 if (isalnum(cu))
2554 regc(cu);
2555 break;
2556 case CLASS_ALPHA:
2557 for (cu = 1; cu <= 255; cu++)
2558 if (isalpha(cu))
2559 regc(cu);
2560 break;
2561 case CLASS_BLANK:
2562 regc(' ');
2563 regc('\t');
2564 break;
2565 case CLASS_CNTRL:
2566 for (cu = 1; cu <= 255; cu++)
2567 if (iscntrl(cu))
2568 regc(cu);
2569 break;
2570 case CLASS_DIGIT:
2571 for (cu = 1; cu <= 255; cu++)
2572 if (VIM_ISDIGIT(cu))
2573 regc(cu);
2574 break;
2575 case CLASS_GRAPH:
2576 for (cu = 1; cu <= 255; cu++)
2577 if (isgraph(cu))
2578 regc(cu);
2579 break;
2580 case CLASS_LOWER:
2581 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002582 if (MB_ISLOWER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002583 regc(cu);
2584 break;
2585 case CLASS_PRINT:
2586 for (cu = 1; cu <= 255; cu++)
2587 if (vim_isprintc(cu))
2588 regc(cu);
2589 break;
2590 case CLASS_PUNCT:
2591 for (cu = 1; cu <= 255; cu++)
2592 if (ispunct(cu))
2593 regc(cu);
2594 break;
2595 case CLASS_SPACE:
2596 for (cu = 9; cu <= 13; cu++)
2597 regc(cu);
2598 regc(' ');
2599 break;
2600 case CLASS_UPPER:
2601 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002602 if (MB_ISUPPER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002603 regc(cu);
2604 break;
2605 case CLASS_XDIGIT:
2606 for (cu = 1; cu <= 255; cu++)
2607 if (vim_isxdigit(cu))
2608 regc(cu);
2609 break;
2610 case CLASS_TAB:
2611 regc('\t');
2612 break;
2613 case CLASS_RETURN:
2614 regc('\r');
2615 break;
2616 case CLASS_BACKSPACE:
2617 regc('\b');
2618 break;
2619 case CLASS_ESCAPE:
2620 regc('\033');
2621 break;
2622 }
2623 }
2624 else
2625 {
2626#ifdef FEAT_MBYTE
2627 if (has_mbyte)
2628 {
2629 int len;
2630
2631 /* produce a multibyte character, including any
2632 * following composing characters */
2633 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002634 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002635 if (enc_utf8 && utf_char2len(startc) != len)
2636 startc = -1; /* composing chars */
2637 while (--len >= 0)
2638 regc(*regparse++);
2639 }
2640 else
2641#endif
2642 {
2643 startc = *regparse++;
2644 regc(startc);
2645 }
2646 }
2647 }
2648 regc(NUL);
2649 prevchr_len = 1; /* last char was the ']' */
2650 if (*regparse != ']')
2651 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2652 skipchr(); /* let's be friends with the lexer again */
2653 *flagp |= HASWIDTH | SIMPLE;
2654 break;
2655 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002656 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002657 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002658 }
2659 /* FALLTHROUGH */
2660
2661 default:
2662 {
2663 int len;
2664
2665#ifdef FEAT_MBYTE
2666 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002667 * before a multi and when it's a composing char. */
2668 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002669 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002670do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002671 ret = regnode(MULTIBYTECODE);
2672 regmbc(c);
2673 *flagp |= HASWIDTH | SIMPLE;
2674 break;
2675 }
2676#endif
2677
2678 ret = regnode(EXACTLY);
2679
2680 /*
2681 * Append characters as long as:
2682 * - there is no following multi, we then need the character in
2683 * front of it as a single character operand
2684 * - not running into a Magic character
2685 * - "one_exactly" is not set
2686 * But always emit at least one character. Might be a Multi,
2687 * e.g., a "[" without matching "]".
2688 */
2689 for (len = 0; c != NUL && (len == 0
2690 || (re_multi_type(peekchr()) == NOT_MULTI
2691 && !one_exactly
2692 && !is_Magic(c))); ++len)
2693 {
2694 c = no_Magic(c);
2695#ifdef FEAT_MBYTE
2696 if (has_mbyte)
2697 {
2698 regmbc(c);
2699 if (enc_utf8)
2700 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002701 int l;
2702
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002703 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002704 for (;;)
2705 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002706 l = utf_ptr2len(regparse);
2707 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002708 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002709 regmbc(utf_ptr2char(regparse));
2710 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002711 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002712 }
2713 }
2714 else
2715#endif
2716 regc(c);
2717 c = getchr();
2718 }
2719 ungetchr();
2720
2721 regc(NUL);
2722 *flagp |= HASWIDTH;
2723 if (len == 1)
2724 *flagp |= SIMPLE;
2725 }
2726 break;
2727 }
2728
2729 return ret;
2730}
2731
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002732#ifdef FEAT_MBYTE
2733/*
2734 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2735 * character "c".
2736 */
2737 static int
2738use_multibytecode(c)
2739 int c;
2740{
2741 return has_mbyte && (*mb_char2len)(c) > 1
2742 && (re_multi_type(peekchr()) != NOT_MULTI
2743 || (enc_utf8 && utf_iscomposing(c)));
2744}
2745#endif
2746
Bram Moolenaar071d4272004-06-13 20:20:40 +00002747/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002748 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002749 * Return pointer to generated code.
2750 */
2751 static char_u *
2752regnode(op)
2753 int op;
2754{
2755 char_u *ret;
2756
2757 ret = regcode;
2758 if (ret == JUST_CALC_SIZE)
2759 regsize += 3;
2760 else
2761 {
2762 *regcode++ = op;
2763 *regcode++ = NUL; /* Null "next" pointer. */
2764 *regcode++ = NUL;
2765 }
2766 return ret;
2767}
2768
2769/*
2770 * Emit (if appropriate) a byte of code
2771 */
2772 static void
2773regc(b)
2774 int b;
2775{
2776 if (regcode == JUST_CALC_SIZE)
2777 regsize++;
2778 else
2779 *regcode++ = b;
2780}
2781
2782#ifdef FEAT_MBYTE
2783/*
2784 * Emit (if appropriate) a multi-byte character of code
2785 */
2786 static void
2787regmbc(c)
2788 int c;
2789{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002790 if (!has_mbyte && c > 0xff)
2791 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002792 if (regcode == JUST_CALC_SIZE)
2793 regsize += (*mb_char2len)(c);
2794 else
2795 regcode += (*mb_char2bytes)(c, regcode);
2796}
2797#endif
2798
2799/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002800 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002801 *
2802 * Means relocating the operand.
2803 */
2804 static void
2805reginsert(op, opnd)
2806 int op;
2807 char_u *opnd;
2808{
2809 char_u *src;
2810 char_u *dst;
2811 char_u *place;
2812
2813 if (regcode == JUST_CALC_SIZE)
2814 {
2815 regsize += 3;
2816 return;
2817 }
2818 src = regcode;
2819 regcode += 3;
2820 dst = regcode;
2821 while (src > opnd)
2822 *--dst = *--src;
2823
2824 place = opnd; /* Op node, where operand used to be. */
2825 *place++ = op;
2826 *place++ = NUL;
2827 *place = NUL;
2828}
2829
2830/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002831 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002832 * Add a number to the operator.
2833 */
2834 static void
2835reginsert_nr(op, val, opnd)
2836 int op;
2837 long val;
2838 char_u *opnd;
2839{
2840 char_u *src;
2841 char_u *dst;
2842 char_u *place;
2843
2844 if (regcode == JUST_CALC_SIZE)
2845 {
2846 regsize += 7;
2847 return;
2848 }
2849 src = regcode;
2850 regcode += 7;
2851 dst = regcode;
2852 while (src > opnd)
2853 *--dst = *--src;
2854
2855 place = opnd; /* Op node, where operand used to be. */
2856 *place++ = op;
2857 *place++ = NUL;
2858 *place++ = NUL;
2859 place = re_put_long(place, (long_u)val);
2860}
2861
2862/*
2863 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002864 * The operator has the given limit values as operands. Also set next pointer.
2865 *
2866 * Means relocating the operand.
2867 */
2868 static void
2869reginsert_limits(op, minval, maxval, opnd)
2870 int op;
2871 long minval;
2872 long maxval;
2873 char_u *opnd;
2874{
2875 char_u *src;
2876 char_u *dst;
2877 char_u *place;
2878
2879 if (regcode == JUST_CALC_SIZE)
2880 {
2881 regsize += 11;
2882 return;
2883 }
2884 src = regcode;
2885 regcode += 11;
2886 dst = regcode;
2887 while (src > opnd)
2888 *--dst = *--src;
2889
2890 place = opnd; /* Op node, where operand used to be. */
2891 *place++ = op;
2892 *place++ = NUL;
2893 *place++ = NUL;
2894 place = re_put_long(place, (long_u)minval);
2895 place = re_put_long(place, (long_u)maxval);
2896 regtail(opnd, place);
2897}
2898
2899/*
2900 * Write a long as four bytes at "p" and return pointer to the next char.
2901 */
2902 static char_u *
2903re_put_long(p, val)
2904 char_u *p;
2905 long_u val;
2906{
2907 *p++ = (char_u) ((val >> 24) & 0377);
2908 *p++ = (char_u) ((val >> 16) & 0377);
2909 *p++ = (char_u) ((val >> 8) & 0377);
2910 *p++ = (char_u) (val & 0377);
2911 return p;
2912}
2913
2914/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002915 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002916 */
2917 static void
2918regtail(p, val)
2919 char_u *p;
2920 char_u *val;
2921{
2922 char_u *scan;
2923 char_u *temp;
2924 int offset;
2925
2926 if (p == JUST_CALC_SIZE)
2927 return;
2928
2929 /* Find last node. */
2930 scan = p;
2931 for (;;)
2932 {
2933 temp = regnext(scan);
2934 if (temp == NULL)
2935 break;
2936 scan = temp;
2937 }
2938
Bram Moolenaar582fd852005-03-28 20:58:01 +00002939 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002940 offset = (int)(scan - val);
2941 else
2942 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002943 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002944 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002945 * values in too many places. */
2946 if (offset > 0xffff)
2947 reg_toolong = TRUE;
2948 else
2949 {
2950 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2951 *(scan + 2) = (char_u) (offset & 0377);
2952 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002953}
2954
2955/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002956 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002957 */
2958 static void
2959regoptail(p, val)
2960 char_u *p;
2961 char_u *val;
2962{
2963 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2964 if (p == NULL || p == JUST_CALC_SIZE
2965 || (OP(p) != BRANCH
2966 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2967 return;
2968 regtail(OPERAND(p), val);
2969}
2970
2971/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002972 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002973 */
2974
Bram Moolenaar071d4272004-06-13 20:20:40 +00002975static int at_start; /* True when on the first character */
2976static int prev_at_start; /* True when on the second character */
2977
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002978/*
2979 * Start parsing at "str".
2980 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002981 static void
2982initchr(str)
2983 char_u *str;
2984{
2985 regparse = str;
2986 prevchr_len = 0;
2987 curchr = prevprevchr = prevchr = nextchr = -1;
2988 at_start = TRUE;
2989 prev_at_start = FALSE;
2990}
2991
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002992/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002993 * Save the current parse state, so that it can be restored and parsing
2994 * starts in the same state again.
2995 */
2996 static void
2997save_parse_state(ps)
2998 parse_state_T *ps;
2999{
3000 ps->regparse = regparse;
3001 ps->prevchr_len = prevchr_len;
3002 ps->curchr = curchr;
3003 ps->prevchr = prevchr;
3004 ps->prevprevchr = prevprevchr;
3005 ps->nextchr = nextchr;
3006 ps->at_start = at_start;
3007 ps->prev_at_start = prev_at_start;
3008 ps->regnpar = regnpar;
3009}
3010
3011/*
3012 * Restore a previously saved parse state.
3013 */
3014 static void
3015restore_parse_state(ps)
3016 parse_state_T *ps;
3017{
3018 regparse = ps->regparse;
3019 prevchr_len = ps->prevchr_len;
3020 curchr = ps->curchr;
3021 prevchr = ps->prevchr;
3022 prevprevchr = ps->prevprevchr;
3023 nextchr = ps->nextchr;
3024 at_start = ps->at_start;
3025 prev_at_start = ps->prev_at_start;
3026 regnpar = ps->regnpar;
3027}
3028
3029
3030/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003031 * Get the next character without advancing.
3032 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003033 static int
3034peekchr()
3035{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003036 static int after_slash = FALSE;
3037
Bram Moolenaar071d4272004-06-13 20:20:40 +00003038 if (curchr == -1)
3039 {
3040 switch (curchr = regparse[0])
3041 {
3042 case '.':
3043 case '[':
3044 case '~':
3045 /* magic when 'magic' is on */
3046 if (reg_magic >= MAGIC_ON)
3047 curchr = Magic(curchr);
3048 break;
3049 case '(':
3050 case ')':
3051 case '{':
3052 case '%':
3053 case '+':
3054 case '=':
3055 case '?':
3056 case '@':
3057 case '!':
3058 case '&':
3059 case '|':
3060 case '<':
3061 case '>':
3062 case '#': /* future ext. */
3063 case '"': /* future ext. */
3064 case '\'': /* future ext. */
3065 case ',': /* future ext. */
3066 case '-': /* future ext. */
3067 case ':': /* future ext. */
3068 case ';': /* future ext. */
3069 case '`': /* future ext. */
3070 case '/': /* Can't be used in / command */
3071 /* magic only after "\v" */
3072 if (reg_magic == MAGIC_ALL)
3073 curchr = Magic(curchr);
3074 break;
3075 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003076 /* * is not magic as the very first character, eg "?*ptr", when
3077 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3078 * "\(\*" is not magic, thus must be magic if "after_slash" */
3079 if (reg_magic >= MAGIC_ON
3080 && !at_start
3081 && !(prev_at_start && prevchr == Magic('^'))
3082 && (after_slash
3083 || (prevchr != Magic('(')
3084 && prevchr != Magic('&')
3085 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003086 curchr = Magic('*');
3087 break;
3088 case '^':
3089 /* '^' is only magic as the very first character and if it's after
3090 * "\(", "\|", "\&' or "\n" */
3091 if (reg_magic >= MAGIC_OFF
3092 && (at_start
3093 || reg_magic == MAGIC_ALL
3094 || prevchr == Magic('(')
3095 || prevchr == Magic('|')
3096 || prevchr == Magic('&')
3097 || prevchr == Magic('n')
3098 || (no_Magic(prevchr) == '('
3099 && prevprevchr == Magic('%'))))
3100 {
3101 curchr = Magic('^');
3102 at_start = TRUE;
3103 prev_at_start = FALSE;
3104 }
3105 break;
3106 case '$':
3107 /* '$' is only magic as the very last char and if it's in front of
3108 * either "\|", "\)", "\&", or "\n" */
3109 if (reg_magic >= MAGIC_OFF)
3110 {
3111 char_u *p = regparse + 1;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003112 int is_magic_all = (reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003113
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003114 /* ignore \c \C \m \M \v \V and \Z after '$' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003115 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003116 || p[1] == 'm' || p[1] == 'M'
3117 || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3118 {
3119 if (p[1] == 'v')
3120 is_magic_all = TRUE;
3121 else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3122 is_magic_all = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003123 p += 2;
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003124 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003125 if (p[0] == NUL
3126 || (p[0] == '\\'
3127 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3128 || p[1] == 'n'))
Bram Moolenaarff65ac82014-07-09 19:32:34 +02003129 || (is_magic_all
3130 && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003131 || reg_magic == MAGIC_ALL)
3132 curchr = Magic('$');
3133 }
3134 break;
3135 case '\\':
3136 {
3137 int c = regparse[1];
3138
3139 if (c == NUL)
3140 curchr = '\\'; /* trailing '\' */
3141 else if (
3142#ifdef EBCDIC
3143 vim_strchr(META, c)
3144#else
3145 c <= '~' && META_flags[c]
3146#endif
3147 )
3148 {
3149 /*
3150 * META contains everything that may be magic sometimes,
3151 * except ^ and $ ("\^" and "\$" are only magic after
3152 * "\v"). We now fetch the next character and toggle its
3153 * magicness. Therefore, \ is so meta-magic that it is
3154 * not in META.
3155 */
3156 curchr = -1;
3157 prev_at_start = at_start;
3158 at_start = FALSE; /* be able to say "/\*ptr" */
3159 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003160 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003161 peekchr();
3162 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003163 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003164 curchr = toggle_Magic(curchr);
3165 }
3166 else if (vim_strchr(REGEXP_ABBR, c))
3167 {
3168 /*
3169 * Handle abbreviations, like "\t" for TAB -- webb
3170 */
3171 curchr = backslash_trans(c);
3172 }
3173 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3174 curchr = toggle_Magic(c);
3175 else
3176 {
3177 /*
3178 * Next character can never be (made) magic?
3179 * Then backslashing it won't do anything.
3180 */
3181#ifdef FEAT_MBYTE
3182 if (has_mbyte)
3183 curchr = (*mb_ptr2char)(regparse + 1);
3184 else
3185#endif
3186 curchr = c;
3187 }
3188 break;
3189 }
3190
3191#ifdef FEAT_MBYTE
3192 default:
3193 if (has_mbyte)
3194 curchr = (*mb_ptr2char)(regparse);
3195#endif
3196 }
3197 }
3198
3199 return curchr;
3200}
3201
3202/*
3203 * Eat one lexed character. Do this in a way that we can undo it.
3204 */
3205 static void
3206skipchr()
3207{
3208 /* peekchr() eats a backslash, do the same here */
3209 if (*regparse == '\\')
3210 prevchr_len = 1;
3211 else
3212 prevchr_len = 0;
3213 if (regparse[prevchr_len] != NUL)
3214 {
3215#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003216 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003217 /* exclude composing chars that mb_ptr2len does include */
3218 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003219 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003220 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003221 else
3222#endif
3223 ++prevchr_len;
3224 }
3225 regparse += prevchr_len;
3226 prev_at_start = at_start;
3227 at_start = FALSE;
3228 prevprevchr = prevchr;
3229 prevchr = curchr;
3230 curchr = nextchr; /* use previously unget char, or -1 */
3231 nextchr = -1;
3232}
3233
3234/*
3235 * Skip a character while keeping the value of prev_at_start for at_start.
3236 * prevchr and prevprevchr are also kept.
3237 */
3238 static void
3239skipchr_keepstart()
3240{
3241 int as = prev_at_start;
3242 int pr = prevchr;
3243 int prpr = prevprevchr;
3244
3245 skipchr();
3246 at_start = as;
3247 prevchr = pr;
3248 prevprevchr = prpr;
3249}
3250
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003251/*
3252 * Get the next character from the pattern. We know about magic and such, so
3253 * therefore we need a lexical analyzer.
3254 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003255 static int
3256getchr()
3257{
3258 int chr = peekchr();
3259
3260 skipchr();
3261 return chr;
3262}
3263
3264/*
3265 * put character back. Works only once!
3266 */
3267 static void
3268ungetchr()
3269{
3270 nextchr = curchr;
3271 curchr = prevchr;
3272 prevchr = prevprevchr;
3273 at_start = prev_at_start;
3274 prev_at_start = FALSE;
3275
3276 /* Backup regparse, so that it's at the same position as before the
3277 * getchr(). */
3278 regparse -= prevchr_len;
3279}
3280
3281/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003282 * Get and return the value of the hex string at the current position.
3283 * Return -1 if there is no valid hex number.
3284 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003285 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003286 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003287 * The parameter controls the maximum number of input characters. This will be
3288 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3289 */
3290 static int
3291gethexchrs(maxinputlen)
3292 int maxinputlen;
3293{
3294 int nr = 0;
3295 int c;
3296 int i;
3297
3298 for (i = 0; i < maxinputlen; ++i)
3299 {
3300 c = regparse[0];
3301 if (!vim_isxdigit(c))
3302 break;
3303 nr <<= 4;
3304 nr |= hex2nr(c);
3305 ++regparse;
3306 }
3307
3308 if (i == 0)
3309 return -1;
3310 return nr;
3311}
3312
3313/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003314 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003315 * current position. Return -1 for invalid. Consumes all digits.
3316 */
3317 static int
3318getdecchrs()
3319{
3320 int nr = 0;
3321 int c;
3322 int i;
3323
3324 for (i = 0; ; ++i)
3325 {
3326 c = regparse[0];
3327 if (c < '0' || c > '9')
3328 break;
3329 nr *= 10;
3330 nr += c - '0';
3331 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003332 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003333 }
3334
3335 if (i == 0)
3336 return -1;
3337 return nr;
3338}
3339
3340/*
3341 * get and return the value of the octal string immediately after the current
3342 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3343 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3344 * treat 8 or 9 as recognised characters. Position is updated:
3345 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003346 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003347 */
3348 static int
3349getoctchrs()
3350{
3351 int nr = 0;
3352 int c;
3353 int i;
3354
3355 for (i = 0; i < 3 && nr < 040; ++i)
3356 {
3357 c = regparse[0];
3358 if (c < '0' || c > '7')
3359 break;
3360 nr <<= 3;
3361 nr |= hex2nr(c);
3362 ++regparse;
3363 }
3364
3365 if (i == 0)
3366 return -1;
3367 return nr;
3368}
3369
3370/*
3371 * Get a number after a backslash that is inside [].
3372 * When nothing is recognized return a backslash.
3373 */
3374 static int
3375coll_get_char()
3376{
3377 int nr = -1;
3378
3379 switch (*regparse++)
3380 {
3381 case 'd': nr = getdecchrs(); break;
3382 case 'o': nr = getoctchrs(); break;
3383 case 'x': nr = gethexchrs(2); break;
3384 case 'u': nr = gethexchrs(4); break;
3385 case 'U': nr = gethexchrs(8); break;
3386 }
3387 if (nr < 0)
3388 {
3389 /* If getting the number fails be backwards compatible: the character
3390 * is a backslash. */
3391 --regparse;
3392 nr = '\\';
3393 }
3394 return nr;
3395}
3396
3397/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003398 * read_limits - Read two integers to be taken as a minimum and maximum.
3399 * If the first character is '-', then the range is reversed.
3400 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3401 * missing, a very big number is the default.
3402 */
3403 static int
3404read_limits(minval, maxval)
3405 long *minval;
3406 long *maxval;
3407{
3408 int reverse = FALSE;
3409 char_u *first_char;
3410 long tmp;
3411
3412 if (*regparse == '-')
3413 {
3414 /* Starts with '-', so reverse the range later */
3415 regparse++;
3416 reverse = TRUE;
3417 }
3418 first_char = regparse;
3419 *minval = getdigits(&regparse);
3420 if (*regparse == ',') /* There is a comma */
3421 {
3422 if (vim_isdigit(*++regparse))
3423 *maxval = getdigits(&regparse);
3424 else
3425 *maxval = MAX_LIMIT;
3426 }
3427 else if (VIM_ISDIGIT(*first_char))
3428 *maxval = *minval; /* It was \{n} or \{-n} */
3429 else
3430 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3431 if (*regparse == '\\')
3432 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003433 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003434 {
3435 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3436 reg_magic == MAGIC_ALL ? "" : "\\");
3437 EMSG_RET_FAIL(IObuff);
3438 }
3439
3440 /*
3441 * Reverse the range if there was a '-', or make sure it is in the right
3442 * order otherwise.
3443 */
3444 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3445 {
3446 tmp = *minval;
3447 *minval = *maxval;
3448 *maxval = tmp;
3449 }
3450 skipchr(); /* let's be friends with the lexer again */
3451 return OK;
3452}
3453
3454/*
3455 * vim_regexec and friends
3456 */
3457
3458/*
3459 * Global work variables for vim_regexec().
3460 */
3461
3462/* The current match-position is remembered with these variables: */
3463static linenr_T reglnum; /* line number, relative to first line */
3464static char_u *regline; /* start of current line */
3465static char_u *reginput; /* current input, points into "regline" */
3466
3467static int need_clear_subexpr; /* subexpressions still need to be
3468 * cleared */
3469#ifdef FEAT_SYN_HL
3470static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3471 * still need to be cleared */
3472#endif
3473
Bram Moolenaar071d4272004-06-13 20:20:40 +00003474/*
3475 * Structure used to save the current input state, when it needs to be
3476 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003477 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003478 */
3479typedef struct
3480{
3481 union
3482 {
3483 char_u *ptr; /* reginput pointer, for single-line regexp */
3484 lpos_T pos; /* reginput pos, for multi-line regexp */
3485 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003486 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003487} regsave_T;
3488
3489/* struct to save start/end pointer/position in for \(\) */
3490typedef struct
3491{
3492 union
3493 {
3494 char_u *ptr;
3495 lpos_T pos;
3496 } se_u;
3497} save_se_T;
3498
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003499/* used for BEHIND and NOBEHIND matching */
3500typedef struct regbehind_S
3501{
3502 regsave_T save_after;
3503 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003504 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003505 save_se_T save_start[NSUBEXP];
3506 save_se_T save_end[NSUBEXP];
3507} regbehind_T;
3508
Bram Moolenaar071d4272004-06-13 20:20:40 +00003509static char_u *reg_getline __ARGS((linenr_T lnum));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003510static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3511static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003512static void cleanup_subexpr __ARGS((void));
3513#ifdef FEAT_SYN_HL
3514static void cleanup_zsubexpr __ARGS((void));
3515#endif
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003516static void save_subexpr __ARGS((regbehind_T *bp));
3517static void restore_subexpr __ARGS((regbehind_T *bp));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003518static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003519static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3520static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003521static int reg_save_equal __ARGS((regsave_T *save));
3522static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3523static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3524
3525/* Save the sub-expressions before attempting a match. */
3526#define save_se(savep, posp, pp) \
3527 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3528
3529/* After a failed match restore the sub-expressions. */
3530#define restore_se(savep, posp, pp) { \
3531 if (REG_MULTI) \
3532 *(posp) = (savep)->se_u.pos; \
3533 else \
3534 *(pp) = (savep)->se_u.ptr; }
3535
3536static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaar580abea2013-06-14 20:31:28 +02003537static int match_with_backref __ARGS((linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003538static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003539static int regrepeat __ARGS((char_u *p, long maxcount));
3540
3541#ifdef DEBUG
3542int regnarrate = 0;
3543#endif
3544
3545/*
3546 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3547 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3548 * contains '\c' or '\C' the value is overruled.
3549 */
3550static int ireg_ic;
3551
3552#ifdef FEAT_MBYTE
3553/*
3554 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3555 * in the regexp. Defaults to false, always.
3556 */
3557static int ireg_icombine;
3558#endif
3559
3560/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003561 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3562 * there is no maximum.
3563 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003564static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003565
3566/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003567 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3568 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003569 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003570 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003571static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003572static unsigned reg_tofreelen;
3573
3574/*
3575 * These variables are set when executing a regexp to speed up the execution.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003576 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003577 * done:
3578 * single-line multi-line
3579 * reg_match &regmatch_T NULL
3580 * reg_mmatch NULL &regmmatch_T
3581 * reg_startp reg_match->startp <invalid>
3582 * reg_endp reg_match->endp <invalid>
3583 * reg_startpos <invalid> reg_mmatch->startpos
3584 * reg_endpos <invalid> reg_mmatch->endpos
3585 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003586 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003587 * reg_firstlnum <invalid> first line in which to search
3588 * reg_maxline 0 last line nr
3589 * reg_line_lbr FALSE or TRUE FALSE
3590 */
3591static regmatch_T *reg_match;
3592static regmmatch_T *reg_mmatch;
3593static char_u **reg_startp = NULL;
3594static char_u **reg_endp = NULL;
3595static lpos_T *reg_startpos = NULL;
3596static lpos_T *reg_endpos = NULL;
3597static win_T *reg_win;
3598static buf_T *reg_buf;
3599static linenr_T reg_firstlnum;
3600static linenr_T reg_maxline;
3601static int reg_line_lbr; /* "\n" in string is line break */
3602
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003603/* Values for rs_state in regitem_T. */
3604typedef enum regstate_E
3605{
3606 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3607 , RS_MOPEN /* MOPEN + [0-9] */
3608 , RS_MCLOSE /* MCLOSE + [0-9] */
3609#ifdef FEAT_SYN_HL
3610 , RS_ZOPEN /* ZOPEN + [0-9] */
3611 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3612#endif
3613 , RS_BRANCH /* BRANCH */
3614 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3615 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3616 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3617 , RS_NOMATCH /* NOMATCH */
3618 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3619 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3620 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3621 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3622} regstate_T;
3623
3624/*
3625 * When there are alternatives a regstate_T is put on the regstack to remember
3626 * what we are doing.
3627 * Before it may be another type of item, depending on rs_state, to remember
3628 * more things.
3629 */
3630typedef struct regitem_S
3631{
3632 regstate_T rs_state; /* what we are doing, one of RS_ above */
3633 char_u *rs_scan; /* current node in program */
3634 union
3635 {
3636 save_se_T sesave;
3637 regsave_T regsave;
3638 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003639 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003640} regitem_T;
3641
3642static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3643static void regstack_pop __ARGS((char_u **scan));
3644
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003645/* used for STAR, PLUS and BRACE_SIMPLE matching */
3646typedef struct regstar_S
3647{
3648 int nextb; /* next byte */
3649 int nextb_ic; /* next byte reverse case */
3650 long count;
3651 long minval;
3652 long maxval;
3653} regstar_T;
3654
3655/* used to store input position when a BACK was encountered, so that we now if
3656 * we made any progress since the last time. */
3657typedef struct backpos_S
3658{
3659 char_u *bp_scan; /* "scan" where BACK was encountered */
3660 regsave_T bp_pos; /* last input position */
3661} backpos_T;
3662
3663/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003664 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3665 * to avoid invoking malloc() and free() often.
3666 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3667 * or regbehind_T.
3668 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003669 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003670static garray_T regstack = {0, 0, 0, 0, NULL};
3671static garray_T backpos = {0, 0, 0, 0, NULL};
3672
3673/*
3674 * Both for regstack and backpos tables we use the following strategy of
3675 * allocation (to reduce malloc/free calls):
3676 * - Initial size is fairly small.
3677 * - When needed, the tables are grown bigger (8 times at first, double after
3678 * that).
3679 * - After executing the match we free the memory only if the array has grown.
3680 * Thus the memory is kept allocated when it's at the initial size.
3681 * This makes it fast while not keeping a lot of memory allocated.
3682 * A three times speed increase was observed when using many simple patterns.
3683 */
3684#define REGSTACK_INITIAL 2048
3685#define BACKPOS_INITIAL 64
3686
3687#if defined(EXITFREE) || defined(PROTO)
3688 void
3689free_regexp_stuff()
3690{
3691 ga_clear(&regstack);
3692 ga_clear(&backpos);
3693 vim_free(reg_tofree);
3694 vim_free(reg_prev_sub);
3695}
3696#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003697
Bram Moolenaar071d4272004-06-13 20:20:40 +00003698/*
3699 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3700 */
3701 static char_u *
3702reg_getline(lnum)
3703 linenr_T lnum;
3704{
3705 /* when looking behind for a match/no-match lnum is negative. But we
3706 * can't go before line 1 */
3707 if (reg_firstlnum + lnum < 1)
3708 return NULL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00003709 if (lnum > reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003710 /* Must have matched the "\n" in the last line. */
3711 return (char_u *)"";
Bram Moolenaar071d4272004-06-13 20:20:40 +00003712 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3713}
3714
3715static regsave_T behind_pos;
3716
3717#ifdef FEAT_SYN_HL
3718static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3719static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3720static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3721static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3722#endif
3723
3724/* TRUE if using multi-line regexp. */
3725#define REG_MULTI (reg_match == NULL)
3726
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003727static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int line_lbr));
3728
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003729
Bram Moolenaar071d4272004-06-13 20:20:40 +00003730/*
3731 * Match a regexp against a string.
3732 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3733 * Uses curbuf for line count and 'iskeyword'.
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003734 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003735 *
3736 * Return TRUE if there is a match, FALSE if not.
3737 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003738 static int
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003739bt_regexec_nl(rmp, line, col, line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003740 regmatch_T *rmp;
3741 char_u *line; /* string to match against */
3742 colnr_T col; /* column to start looking for match */
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003743 int line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003744{
3745 reg_match = rmp;
3746 reg_mmatch = NULL;
3747 reg_maxline = 0;
Bram Moolenaar2af78a12014-04-23 19:06:37 +02003748 reg_line_lbr = line_lbr;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003749 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003750 reg_win = NULL;
3751 ireg_ic = rmp->rm_ic;
3752#ifdef FEAT_MBYTE
3753 ireg_icombine = FALSE;
3754#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003755 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003756 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003757}
3758
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003759static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
3760
Bram Moolenaar071d4272004-06-13 20:20:40 +00003761/*
3762 * Match a regexp against multiple lines.
3763 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3764 * Uses curbuf for line count and 'iskeyword'.
3765 *
3766 * Return zero if there is no match. Return number of lines contained in the
3767 * match otherwise.
3768 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003769 static long
3770bt_regexec_multi(rmp, win, buf, lnum, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003771 regmmatch_T *rmp;
3772 win_T *win; /* window in which to search or NULL */
3773 buf_T *buf; /* buffer in which to search */
3774 linenr_T lnum; /* nr of line to start looking for match */
3775 colnr_T col; /* column to start looking for match */
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003776 proftime_T *tm; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003777{
3778 long r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003779
3780 reg_match = NULL;
3781 reg_mmatch = rmp;
3782 reg_buf = buf;
3783 reg_win = win;
3784 reg_firstlnum = lnum;
3785 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3786 reg_line_lbr = FALSE;
3787 ireg_ic = rmp->rmm_ic;
3788#ifdef FEAT_MBYTE
3789 ireg_icombine = FALSE;
3790#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003791 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003792
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003793 r = bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003794
3795 return r;
3796}
3797
3798/*
3799 * Match a regexp against a string ("line" points to the string) or multiple
3800 * lines ("line" is NULL, use reg_getline()).
3801 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003802 static long
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003803bt_regexec_both(line, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003804 char_u *line;
3805 colnr_T col; /* column to start looking for match */
Bram Moolenaar78a15312009-05-15 19:33:18 +00003806 proftime_T *tm UNUSED; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003807{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003808 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003809 char_u *s;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003810 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003811
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003812 /* Create "regstack" and "backpos" if they are not allocated yet.
3813 * We allocate *_INITIAL amount of bytes first and then set the grow size
3814 * to much bigger value to avoid many malloc calls in case of deep regular
3815 * expressions. */
3816 if (regstack.ga_data == NULL)
3817 {
3818 /* Use an item size of 1 byte, since we push different things
3819 * onto the regstack. */
3820 ga_init2(&regstack, 1, REGSTACK_INITIAL);
3821 ga_grow(&regstack, REGSTACK_INITIAL);
3822 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3823 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003824
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003825 if (backpos.ga_data == NULL)
3826 {
3827 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3828 ga_grow(&backpos, BACKPOS_INITIAL);
3829 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3830 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003831
Bram Moolenaar071d4272004-06-13 20:20:40 +00003832 if (REG_MULTI)
3833 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003834 prog = (bt_regprog_T *)reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003835 line = reg_getline((linenr_T)0);
3836 reg_startpos = reg_mmatch->startpos;
3837 reg_endpos = reg_mmatch->endpos;
3838 }
3839 else
3840 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003841 prog = (bt_regprog_T *)reg_match->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003842 reg_startp = reg_match->startp;
3843 reg_endp = reg_match->endp;
3844 }
3845
3846 /* Be paranoid... */
3847 if (prog == NULL || line == NULL)
3848 {
3849 EMSG(_(e_null));
3850 goto theend;
3851 }
3852
3853 /* Check validity of program. */
3854 if (prog_magic_wrong())
3855 goto theend;
3856
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003857 /* If the start column is past the maximum column: no need to try. */
3858 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3859 goto theend;
3860
Bram Moolenaar071d4272004-06-13 20:20:40 +00003861 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3862 if (prog->regflags & RF_ICASE)
3863 ireg_ic = TRUE;
3864 else if (prog->regflags & RF_NOICASE)
3865 ireg_ic = FALSE;
3866
3867#ifdef FEAT_MBYTE
3868 /* If pattern contains "\Z" overrule value of ireg_icombine */
3869 if (prog->regflags & RF_ICOMBINE)
3870 ireg_icombine = TRUE;
3871#endif
3872
3873 /* If there is a "must appear" string, look for it. */
3874 if (prog->regmust != NULL)
3875 {
3876 int c;
3877
3878#ifdef FEAT_MBYTE
3879 if (has_mbyte)
3880 c = (*mb_ptr2char)(prog->regmust);
3881 else
3882#endif
3883 c = *prog->regmust;
3884 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003885
3886 /*
3887 * This is used very often, esp. for ":global". Use three versions of
3888 * the loop to avoid overhead of conditions.
3889 */
3890 if (!ireg_ic
3891#ifdef FEAT_MBYTE
3892 && !has_mbyte
3893#endif
3894 )
3895 while ((s = vim_strbyte(s, c)) != NULL)
3896 {
3897 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3898 break; /* Found it. */
3899 ++s;
3900 }
3901#ifdef FEAT_MBYTE
3902 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3903 while ((s = vim_strchr(s, c)) != NULL)
3904 {
3905 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3906 break; /* Found it. */
3907 mb_ptr_adv(s);
3908 }
3909#endif
3910 else
3911 while ((s = cstrchr(s, c)) != NULL)
3912 {
3913 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3914 break; /* Found it. */
3915 mb_ptr_adv(s);
3916 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003917 if (s == NULL) /* Not present. */
3918 goto theend;
3919 }
3920
3921 regline = line;
3922 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003923 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003924
3925 /* Simplest case: Anchored match need be tried only once. */
3926 if (prog->reganch)
3927 {
3928 int c;
3929
3930#ifdef FEAT_MBYTE
3931 if (has_mbyte)
3932 c = (*mb_ptr2char)(regline + col);
3933 else
3934#endif
3935 c = regline[col];
3936 if (prog->regstart == NUL
3937 || prog->regstart == c
3938 || (ireg_ic && ((
3939#ifdef FEAT_MBYTE
3940 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3941 || (c < 255 && prog->regstart < 255 &&
3942#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003943 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003944 retval = regtry(prog, col);
3945 else
3946 retval = 0;
3947 }
3948 else
3949 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003950#ifdef FEAT_RELTIME
3951 int tm_count = 0;
3952#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003953 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003954 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003955 {
3956 if (prog->regstart != NUL)
3957 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003958 /* Skip until the char we know it must start with.
3959 * Used often, do some work to avoid call overhead. */
3960 if (!ireg_ic
3961#ifdef FEAT_MBYTE
3962 && !has_mbyte
3963#endif
3964 )
3965 s = vim_strbyte(regline + col, prog->regstart);
3966 else
3967 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003968 if (s == NULL)
3969 {
3970 retval = 0;
3971 break;
3972 }
3973 col = (int)(s - regline);
3974 }
3975
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003976 /* Check for maximum column to try. */
3977 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3978 {
3979 retval = 0;
3980 break;
3981 }
3982
Bram Moolenaar071d4272004-06-13 20:20:40 +00003983 retval = regtry(prog, col);
3984 if (retval > 0)
3985 break;
3986
3987 /* if not currently on the first line, get it again */
3988 if (reglnum != 0)
3989 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003990 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003991 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003992 }
3993 if (regline[col] == NUL)
3994 break;
3995#ifdef FEAT_MBYTE
3996 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003997 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003998 else
3999#endif
4000 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00004001#ifdef FEAT_RELTIME
4002 /* Check for timeout once in a twenty times to avoid overhead. */
4003 if (tm != NULL && ++tm_count == 20)
4004 {
4005 tm_count = 0;
4006 if (profile_passed_limit(tm))
4007 break;
4008 }
4009#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00004010 }
4011 }
4012
Bram Moolenaar071d4272004-06-13 20:20:40 +00004013theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004014 /* Free "reg_tofree" when it's a bit big.
4015 * Free regstack and backpos if they are bigger than their initial size. */
4016 if (reg_tofreelen > 400)
4017 {
4018 vim_free(reg_tofree);
4019 reg_tofree = NULL;
4020 }
4021 if (regstack.ga_maxlen > REGSTACK_INITIAL)
4022 ga_clear(&regstack);
4023 if (backpos.ga_maxlen > BACKPOS_INITIAL)
4024 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004025
Bram Moolenaar071d4272004-06-13 20:20:40 +00004026 return retval;
4027}
4028
4029#ifdef FEAT_SYN_HL
4030static reg_extmatch_T *make_extmatch __ARGS((void));
4031
4032/*
4033 * Create a new extmatch and mark it as referenced once.
4034 */
4035 static reg_extmatch_T *
4036make_extmatch()
4037{
4038 reg_extmatch_T *em;
4039
4040 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4041 if (em != NULL)
4042 em->refcnt = 1;
4043 return em;
4044}
4045
4046/*
4047 * Add a reference to an extmatch.
4048 */
4049 reg_extmatch_T *
4050ref_extmatch(em)
4051 reg_extmatch_T *em;
4052{
4053 if (em != NULL)
4054 em->refcnt++;
4055 return em;
4056}
4057
4058/*
4059 * Remove a reference to an extmatch. If there are no references left, free
4060 * the info.
4061 */
4062 void
4063unref_extmatch(em)
4064 reg_extmatch_T *em;
4065{
4066 int i;
4067
4068 if (em != NULL && --em->refcnt <= 0)
4069 {
4070 for (i = 0; i < NSUBEXP; ++i)
4071 vim_free(em->matches[i]);
4072 vim_free(em);
4073 }
4074}
4075#endif
4076
4077/*
4078 * regtry - try match of "prog" with at regline["col"].
4079 * Returns 0 for failure, number of lines contained in the match otherwise.
4080 */
4081 static long
4082regtry(prog, col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004083 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004084 colnr_T col;
4085{
4086 reginput = regline + col;
4087 need_clear_subexpr = TRUE;
4088#ifdef FEAT_SYN_HL
4089 /* Clear the external match subpointers if necessary. */
4090 if (prog->reghasz == REX_SET)
4091 need_clear_zsubexpr = TRUE;
4092#endif
4093
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004094 if (regmatch(prog->program + 1) == 0)
4095 return 0;
4096
4097 cleanup_subexpr();
4098 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004099 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004100 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004101 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004102 reg_startpos[0].lnum = 0;
4103 reg_startpos[0].col = col;
4104 }
4105 if (reg_endpos[0].lnum < 0)
4106 {
4107 reg_endpos[0].lnum = reglnum;
4108 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004109 }
4110 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004111 /* Use line number of "\ze". */
4112 reglnum = reg_endpos[0].lnum;
4113 }
4114 else
4115 {
4116 if (reg_startp[0] == NULL)
4117 reg_startp[0] = regline + col;
4118 if (reg_endp[0] == NULL)
4119 reg_endp[0] = reginput;
4120 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004121#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004122 /* Package any found \z(...\) matches for export. Default is none. */
4123 unref_extmatch(re_extmatch_out);
4124 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004125
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004126 if (prog->reghasz == REX_SET)
4127 {
4128 int i;
4129
4130 cleanup_zsubexpr();
4131 re_extmatch_out = make_extmatch();
4132 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004133 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004134 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004135 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004136 /* Only accept single line matches. */
4137 if (reg_startzpos[i].lnum >= 0
Bram Moolenaar5a4e1602014-04-06 21:34:04 +02004138 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4139 && reg_endzpos[i].col >= reg_startzpos[i].col)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004140 re_extmatch_out->matches[i] =
4141 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004142 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004143 reg_endzpos[i].col - reg_startzpos[i].col);
4144 }
4145 else
4146 {
4147 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4148 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004149 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004150 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004151 }
4152 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004153 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004154#endif
4155 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004156}
4157
4158#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00004159static int reg_prev_class __ARGS((void));
4160
Bram Moolenaar071d4272004-06-13 20:20:40 +00004161/*
4162 * Get class of previous character.
4163 */
4164 static int
4165reg_prev_class()
4166{
4167 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004168 return mb_get_class_buf(reginput - 1
4169 - (*mb_head_off)(regline, reginput - 1), reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004170 return -1;
4171}
Bram Moolenaar071d4272004-06-13 20:20:40 +00004172#endif
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01004173
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004174static int reg_match_visual __ARGS((void));
4175
4176/*
4177 * Return TRUE if the current reginput position matches the Visual area.
4178 */
4179 static int
4180reg_match_visual()
4181{
4182 pos_T top, bot;
4183 linenr_T lnum;
4184 colnr_T col;
4185 win_T *wp = reg_win == NULL ? curwin : reg_win;
4186 int mode;
4187 colnr_T start, end;
4188 colnr_T start2, end2;
4189 colnr_T cols;
4190
4191 /* Check if the buffer is the current buffer. */
4192 if (reg_buf != curbuf || VIsual.lnum == 0)
4193 return FALSE;
4194
4195 if (VIsual_active)
4196 {
4197 if (lt(VIsual, wp->w_cursor))
4198 {
4199 top = VIsual;
4200 bot = wp->w_cursor;
4201 }
4202 else
4203 {
4204 top = wp->w_cursor;
4205 bot = VIsual;
4206 }
4207 mode = VIsual_mode;
4208 }
4209 else
4210 {
4211 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
4212 {
4213 top = curbuf->b_visual.vi_start;
4214 bot = curbuf->b_visual.vi_end;
4215 }
4216 else
4217 {
4218 top = curbuf->b_visual.vi_end;
4219 bot = curbuf->b_visual.vi_start;
4220 }
4221 mode = curbuf->b_visual.vi_mode;
4222 }
4223 lnum = reglnum + reg_firstlnum;
4224 if (lnum < top.lnum || lnum > bot.lnum)
4225 return FALSE;
4226
4227 if (mode == 'v')
4228 {
4229 col = (colnr_T)(reginput - regline);
4230 if ((lnum == top.lnum && col < top.col)
4231 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4232 return FALSE;
4233 }
4234 else if (mode == Ctrl_V)
4235 {
4236 getvvcol(wp, &top, &start, NULL, &end);
4237 getvvcol(wp, &bot, &start2, NULL, &end2);
4238 if (start2 < start)
4239 start = start2;
4240 if (end2 > end)
4241 end = end2;
4242 if (top.col == MAXCOL || bot.col == MAXCOL)
4243 end = MAXCOL;
4244 cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline));
4245 if (cols < start || cols > end - (*p_sel == 'e'))
4246 return FALSE;
4247 }
4248 return TRUE;
4249}
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004250
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00004251#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004252
4253/*
4254 * The arguments from BRACE_LIMITS are stored here. They are actually local
4255 * to regmatch(), but they are here to reduce the amount of stack space used
4256 * (it can be called recursively many times).
4257 */
4258static long bl_minval;
4259static long bl_maxval;
4260
4261/*
4262 * regmatch - main matching routine
4263 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004264 * Conceptually the strategy is simple: Check to see whether the current node
4265 * matches, push an item onto the regstack and loop to see whether the rest
4266 * matches, and then act accordingly. In practice we make some effort to
4267 * avoid using the regstack, in particular by going through "ordinary" nodes
4268 * (that don't need to know whether the rest of the match failed) by a nested
4269 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004270 *
4271 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4272 * the last matched character.
4273 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4274 * undefined state!
4275 */
4276 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004277regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004278 char_u *scan; /* Current node. */
4279{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004280 char_u *next; /* Next node. */
4281 int op;
4282 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004283 regitem_T *rp;
4284 int no;
4285 int status; /* one of the RA_ values: */
4286#define RA_FAIL 1 /* something failed, abort */
4287#define RA_CONT 2 /* continue in inner loop */
4288#define RA_BREAK 3 /* break inner loop */
4289#define RA_MATCH 4 /* successful match */
4290#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004291
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004292 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004293 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004294 regstack.ga_len = 0;
4295 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004296
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004297 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004298 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004299 */
4300 for (;;)
4301 {
Bram Moolenaar41f12052013-08-25 17:01:42 +02004302 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4303 * Allow interrupting them with CTRL-C. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004304 fast_breakcheck();
4305
4306#ifdef DEBUG
4307 if (scan != NULL && regnarrate)
4308 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004309 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004310 mch_errmsg("(\n");
4311 }
4312#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004313
4314 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004315 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004316 * regstack.
4317 */
4318 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004319 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004320 if (got_int || scan == NULL)
4321 {
4322 status = RA_FAIL;
4323 break;
4324 }
4325 status = RA_CONT;
4326
Bram Moolenaar071d4272004-06-13 20:20:40 +00004327#ifdef DEBUG
4328 if (regnarrate)
4329 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004330 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004331 mch_errmsg("...\n");
4332# ifdef FEAT_SYN_HL
4333 if (re_extmatch_in != NULL)
4334 {
4335 int i;
4336
4337 mch_errmsg(_("External submatches:\n"));
4338 for (i = 0; i < NSUBEXP; i++)
4339 {
4340 mch_errmsg(" \"");
4341 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004342 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004343 mch_errmsg("\"\n");
4344 }
4345 }
4346# endif
4347 }
4348#endif
4349 next = regnext(scan);
4350
4351 op = OP(scan);
4352 /* Check for character class with NL added. */
Bram Moolenaar640009d2006-10-17 16:48:26 +00004353 if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
4354 && *reginput == NUL && reglnum <= reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004355 {
4356 reg_nextline();
4357 }
4358 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
4359 {
4360 ADVANCE_REGINPUT();
4361 }
4362 else
4363 {
4364 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004365 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004366#ifdef FEAT_MBYTE
4367 if (has_mbyte)
4368 c = (*mb_ptr2char)(reginput);
4369 else
4370#endif
4371 c = *reginput;
4372 switch (op)
4373 {
4374 case BOL:
4375 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004376 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004377 break;
4378
4379 case EOL:
4380 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004381 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004382 break;
4383
4384 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004385 /* We're not at the beginning of the file when below the first
4386 * line where we started, not at the start of the line or we
4387 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004388 if (reglnum != 0 || reginput != regline
Bram Moolenaara7139332007-12-09 18:26:22 +00004389 || (REG_MULTI && reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004390 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004391 break;
4392
4393 case RE_EOF:
4394 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004395 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004396 break;
4397
4398 case CURSOR:
4399 /* Check if the buffer is in a window and compare the
4400 * reg_win->w_cursor position to the match position. */
4401 if (reg_win == NULL
4402 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
4403 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004404 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004405 break;
4406
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004407 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004408 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004409 {
4410 int mark = OPERAND(scan)[0];
4411 int cmp = OPERAND(scan)[1];
4412 pos_T *pos;
4413
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004414 pos = getmark_buf(reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004415 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004416 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004417 || (pos->lnum == reglnum + reg_firstlnum
4418 ? (pos->col == (colnr_T)(reginput - regline)
4419 ? (cmp == '<' || cmp == '>')
4420 : (pos->col < (colnr_T)(reginput - regline)
4421 ? cmp != '>'
4422 : cmp != '<'))
4423 : (pos->lnum < reglnum + reg_firstlnum
4424 ? cmp != '>'
4425 : cmp != '<')))
4426 status = RA_NOMATCH;
4427 }
4428 break;
4429
4430 case RE_VISUAL:
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004431 if (!reg_match_visual())
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004432 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004433 break;
4434
Bram Moolenaar071d4272004-06-13 20:20:40 +00004435 case RE_LNUM:
4436 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4437 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004438 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004439 break;
4440
4441 case RE_COL:
4442 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004443 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004444 break;
4445
4446 case RE_VCOL:
4447 if (!re_num_cmp((long_u)win_linetabsize(
4448 reg_win == NULL ? curwin : reg_win,
4449 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004450 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004451 break;
4452
4453 case BOW: /* \<word; reginput points to w */
4454 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004455 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004456#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004457 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004458 {
4459 int this_class;
4460
4461 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004462 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004463 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004464 status = RA_NOMATCH; /* not on a word at all */
4465 else if (reg_prev_class() == this_class)
4466 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004467 }
4468#endif
4469 else
4470 {
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01004471 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline
4472 && vim_iswordc_buf(reginput[-1], reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004473 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004474 }
4475 break;
4476
4477 case EOW: /* word\>; reginput points after d */
4478 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004479 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004480#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004481 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004482 {
4483 int this_class, prev_class;
4484
4485 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004486 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004487 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004488 if (this_class == prev_class
4489 || prev_class == 0 || prev_class == 1)
4490 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004491 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004492#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004493 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004494 {
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004495 if (!vim_iswordc_buf(reginput[-1], reg_buf)
4496 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004497 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004498 }
4499 break; /* Matched with EOW */
4500
4501 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004502 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004503 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004504 status = RA_NOMATCH;
4505 else
4506 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004507 break;
4508
4509 case IDENT:
4510 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004511 status = RA_NOMATCH;
4512 else
4513 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004514 break;
4515
4516 case SIDENT:
4517 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004518 status = RA_NOMATCH;
4519 else
4520 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004521 break;
4522
4523 case KWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004524 if (!vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004525 status = RA_NOMATCH;
4526 else
4527 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004528 break;
4529
4530 case SKWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004531 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004532 status = RA_NOMATCH;
4533 else
4534 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004535 break;
4536
4537 case FNAME:
4538 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004539 status = RA_NOMATCH;
4540 else
4541 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004542 break;
4543
4544 case SFNAME:
4545 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004546 status = RA_NOMATCH;
4547 else
4548 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004549 break;
4550
4551 case PRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004552 if (!vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004553 status = RA_NOMATCH;
4554 else
4555 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004556 break;
4557
4558 case SPRINT:
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02004559 if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004560 status = RA_NOMATCH;
4561 else
4562 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004563 break;
4564
4565 case WHITE:
4566 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004567 status = RA_NOMATCH;
4568 else
4569 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004570 break;
4571
4572 case NWHITE:
4573 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004574 status = RA_NOMATCH;
4575 else
4576 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004577 break;
4578
4579 case DIGIT:
4580 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004581 status = RA_NOMATCH;
4582 else
4583 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004584 break;
4585
4586 case NDIGIT:
4587 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004588 status = RA_NOMATCH;
4589 else
4590 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004591 break;
4592
4593 case HEX:
4594 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004595 status = RA_NOMATCH;
4596 else
4597 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004598 break;
4599
4600 case NHEX:
4601 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004602 status = RA_NOMATCH;
4603 else
4604 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004605 break;
4606
4607 case OCTAL:
4608 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004609 status = RA_NOMATCH;
4610 else
4611 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004612 break;
4613
4614 case NOCTAL:
4615 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004616 status = RA_NOMATCH;
4617 else
4618 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004619 break;
4620
4621 case WORD:
4622 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004623 status = RA_NOMATCH;
4624 else
4625 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004626 break;
4627
4628 case NWORD:
4629 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004630 status = RA_NOMATCH;
4631 else
4632 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004633 break;
4634
4635 case HEAD:
4636 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004637 status = RA_NOMATCH;
4638 else
4639 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004640 break;
4641
4642 case NHEAD:
4643 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004644 status = RA_NOMATCH;
4645 else
4646 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004647 break;
4648
4649 case ALPHA:
4650 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004651 status = RA_NOMATCH;
4652 else
4653 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004654 break;
4655
4656 case NALPHA:
4657 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004658 status = RA_NOMATCH;
4659 else
4660 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004661 break;
4662
4663 case LOWER:
4664 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004665 status = RA_NOMATCH;
4666 else
4667 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004668 break;
4669
4670 case NLOWER:
4671 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004672 status = RA_NOMATCH;
4673 else
4674 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004675 break;
4676
4677 case UPPER:
4678 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004679 status = RA_NOMATCH;
4680 else
4681 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004682 break;
4683
4684 case NUPPER:
4685 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004686 status = RA_NOMATCH;
4687 else
4688 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004689 break;
4690
4691 case EXACTLY:
4692 {
4693 int len;
4694 char_u *opnd;
4695
4696 opnd = OPERAND(scan);
4697 /* Inline the first byte, for speed. */
4698 if (*opnd != *reginput
4699 && (!ireg_ic || (
4700#ifdef FEAT_MBYTE
4701 !enc_utf8 &&
4702#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004703 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004704 status = RA_NOMATCH;
4705 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004706 {
4707 /* match empty string always works; happens when "~" is
4708 * empty. */
4709 }
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004710 else
4711 {
4712 if (opnd[1] == NUL
Bram Moolenaar071d4272004-06-13 20:20:40 +00004713#ifdef FEAT_MBYTE
4714 && !(enc_utf8 && ireg_ic)
4715#endif
4716 )
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004717 {
4718 len = 1; /* matched a single byte above */
4719 }
4720 else
4721 {
4722 /* Need to match first byte again for multi-byte. */
4723 len = (int)STRLEN(opnd);
4724 if (cstrncmp(opnd, reginput, &len) != 0)
4725 status = RA_NOMATCH;
4726 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004727#ifdef FEAT_MBYTE
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004728 /* Check for following composing character, unless %C
4729 * follows (skips over all composing chars). */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004730 if (status != RA_NOMATCH
4731 && enc_utf8
4732 && UTF_COMPOSINGLIKE(reginput, reginput + len)
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004733 && !ireg_icombine
4734 && OP(next) != RE_COMPOSING)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004735 {
4736 /* raaron: This code makes a composing character get
4737 * ignored, which is the correct behavior (sometimes)
4738 * for voweled Hebrew texts. */
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004739 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004740 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004741#endif
Bram Moolenaar6082bea2014-05-13 18:04:00 +02004742 if (status != RA_NOMATCH)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004743 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004744 }
4745 }
4746 break;
4747
4748 case ANYOF:
4749 case ANYBUT:
4750 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004751 status = RA_NOMATCH;
4752 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4753 status = RA_NOMATCH;
4754 else
4755 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004756 break;
4757
4758#ifdef FEAT_MBYTE
4759 case MULTIBYTECODE:
4760 if (has_mbyte)
4761 {
4762 int i, len;
4763 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004764 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004765
4766 opnd = OPERAND(scan);
4767 /* Safety check (just in case 'encoding' was changed since
4768 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004769 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004770 {
4771 status = RA_NOMATCH;
4772 break;
4773 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004774 if (enc_utf8)
4775 opndc = mb_ptr2char(opnd);
4776 if (enc_utf8 && utf_iscomposing(opndc))
4777 {
4778 /* When only a composing char is given match at any
4779 * position where that composing char appears. */
4780 status = RA_NOMATCH;
4781 for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004782 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004783 inpc = mb_ptr2char(reginput + i);
4784 if (!utf_iscomposing(inpc))
4785 {
4786 if (i > 0)
4787 break;
4788 }
4789 else if (opndc == inpc)
4790 {
4791 /* Include all following composing chars. */
4792 len = i + mb_ptr2len(reginput + i);
4793 status = RA_MATCH;
4794 break;
4795 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004796 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004797 }
4798 else
4799 for (i = 0; i < len; ++i)
4800 if (opnd[i] != reginput[i])
4801 {
4802 status = RA_NOMATCH;
4803 break;
4804 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004805 reginput += len;
4806 }
4807 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004808 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004809 break;
4810#endif
Bram Moolenaar8df5acf2014-05-13 19:37:29 +02004811 case RE_COMPOSING:
4812#ifdef FEAT_MBYTE
4813 if (enc_utf8)
4814 {
4815 /* Skip composing characters. */
4816 while (utf_iscomposing(utf_ptr2char(reginput)))
4817 mb_cptr_adv(reginput);
4818 }
4819#endif
4820 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004821
4822 case NOTHING:
4823 break;
4824
4825 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004826 {
4827 int i;
4828 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004829
Bram Moolenaar582fd852005-03-28 20:58:01 +00004830 /*
4831 * When we run into BACK we need to check if we don't keep
4832 * looping without matching any input. The second and later
4833 * times a BACK is encountered it fails if the input is still
4834 * at the same position as the previous time.
4835 * The positions are stored in "backpos" and found by the
4836 * current value of "scan", the position in the RE program.
4837 */
4838 bp = (backpos_T *)backpos.ga_data;
4839 for (i = 0; i < backpos.ga_len; ++i)
4840 if (bp[i].bp_scan == scan)
4841 break;
4842 if (i == backpos.ga_len)
4843 {
4844 /* First time at this BACK, make room to store the pos. */
4845 if (ga_grow(&backpos, 1) == FAIL)
4846 status = RA_FAIL;
4847 else
4848 {
4849 /* get "ga_data" again, it may have changed */
4850 bp = (backpos_T *)backpos.ga_data;
4851 bp[i].bp_scan = scan;
4852 ++backpos.ga_len;
4853 }
4854 }
4855 else if (reg_save_equal(&bp[i].bp_pos))
4856 /* Still at same position as last time, fail. */
4857 status = RA_NOMATCH;
4858
4859 if (status != RA_FAIL && status != RA_NOMATCH)
4860 reg_save(&bp[i].bp_pos, &backpos);
4861 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004862 break;
4863
Bram Moolenaar071d4272004-06-13 20:20:40 +00004864 case MOPEN + 0: /* Match start: \zs */
4865 case MOPEN + 1: /* \( */
4866 case MOPEN + 2:
4867 case MOPEN + 3:
4868 case MOPEN + 4:
4869 case MOPEN + 5:
4870 case MOPEN + 6:
4871 case MOPEN + 7:
4872 case MOPEN + 8:
4873 case MOPEN + 9:
4874 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004875 no = op - MOPEN;
4876 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004877 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004878 if (rp == NULL)
4879 status = RA_FAIL;
4880 else
4881 {
4882 rp->rs_no = no;
4883 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4884 &reg_startp[no]);
4885 /* We simply continue and handle the result when done. */
4886 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004887 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004888 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004889
4890 case NOPEN: /* \%( */
4891 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004892 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004893 status = RA_FAIL;
4894 /* We simply continue and handle the result when done. */
4895 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004896
4897#ifdef FEAT_SYN_HL
4898 case ZOPEN + 1:
4899 case ZOPEN + 2:
4900 case ZOPEN + 3:
4901 case ZOPEN + 4:
4902 case ZOPEN + 5:
4903 case ZOPEN + 6:
4904 case ZOPEN + 7:
4905 case ZOPEN + 8:
4906 case ZOPEN + 9:
4907 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004908 no = op - ZOPEN;
4909 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004910 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004911 if (rp == NULL)
4912 status = RA_FAIL;
4913 else
4914 {
4915 rp->rs_no = no;
4916 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4917 &reg_startzp[no]);
4918 /* We simply continue and handle the result when done. */
4919 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004920 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004921 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004922#endif
4923
4924 case MCLOSE + 0: /* Match end: \ze */
4925 case MCLOSE + 1: /* \) */
4926 case MCLOSE + 2:
4927 case MCLOSE + 3:
4928 case MCLOSE + 4:
4929 case MCLOSE + 5:
4930 case MCLOSE + 6:
4931 case MCLOSE + 7:
4932 case MCLOSE + 8:
4933 case MCLOSE + 9:
4934 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004935 no = op - MCLOSE;
4936 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004937 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004938 if (rp == NULL)
4939 status = RA_FAIL;
4940 else
4941 {
4942 rp->rs_no = no;
4943 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4944 /* We simply continue and handle the result when done. */
4945 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004946 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004947 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004948
4949#ifdef FEAT_SYN_HL
4950 case ZCLOSE + 1: /* \) after \z( */
4951 case ZCLOSE + 2:
4952 case ZCLOSE + 3:
4953 case ZCLOSE + 4:
4954 case ZCLOSE + 5:
4955 case ZCLOSE + 6:
4956 case ZCLOSE + 7:
4957 case ZCLOSE + 8:
4958 case ZCLOSE + 9:
4959 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004960 no = op - ZCLOSE;
4961 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004962 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004963 if (rp == NULL)
4964 status = RA_FAIL;
4965 else
4966 {
4967 rp->rs_no = no;
4968 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4969 &reg_endzp[no]);
4970 /* We simply continue and handle the result when done. */
4971 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004972 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004973 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004974#endif
4975
4976 case BACKREF + 1:
4977 case BACKREF + 2:
4978 case BACKREF + 3:
4979 case BACKREF + 4:
4980 case BACKREF + 5:
4981 case BACKREF + 6:
4982 case BACKREF + 7:
4983 case BACKREF + 8:
4984 case BACKREF + 9:
4985 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004986 int len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004987
4988 no = op - BACKREF;
4989 cleanup_subexpr();
4990 if (!REG_MULTI) /* Single-line regexp */
4991 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004992 if (reg_startp[no] == NULL || reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004993 {
4994 /* Backref was not set: Match an empty string. */
4995 len = 0;
4996 }
4997 else
4998 {
4999 /* Compare current input with back-ref in the same
5000 * line. */
5001 len = (int)(reg_endp[no] - reg_startp[no]);
5002 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005003 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005004 }
5005 }
5006 else /* Multi-line regexp */
5007 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00005008 if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005009 {
5010 /* Backref was not set: Match an empty string. */
5011 len = 0;
5012 }
5013 else
5014 {
5015 if (reg_startpos[no].lnum == reglnum
5016 && reg_endpos[no].lnum == reglnum)
5017 {
5018 /* Compare back-ref within the current line. */
5019 len = reg_endpos[no].col - reg_startpos[no].col;
5020 if (cstrncmp(regline + reg_startpos[no].col,
5021 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005022 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005023 }
5024 else
5025 {
5026 /* Messy situation: Need to compare between two
5027 * lines. */
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005028 int r = match_with_backref(
Bram Moolenaar580abea2013-06-14 20:31:28 +02005029 reg_startpos[no].lnum,
5030 reg_startpos[no].col,
5031 reg_endpos[no].lnum,
5032 reg_endpos[no].col,
Bram Moolenaar4cff8fa2013-06-14 22:48:54 +02005033 &len);
Bram Moolenaar141f6bb2013-06-15 15:09:50 +02005034
5035 if (r != RA_MATCH)
5036 status = r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005037 }
5038 }
5039 }
5040
5041 /* Matched the backref, skip over it. */
5042 reginput += len;
5043 }
5044 break;
5045
5046#ifdef FEAT_SYN_HL
5047 case ZREF + 1:
5048 case ZREF + 2:
5049 case ZREF + 3:
5050 case ZREF + 4:
5051 case ZREF + 5:
5052 case ZREF + 6:
5053 case ZREF + 7:
5054 case ZREF + 8:
5055 case ZREF + 9:
5056 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005057 int len;
5058
5059 cleanup_zsubexpr();
5060 no = op - ZREF;
5061 if (re_extmatch_in != NULL
5062 && re_extmatch_in->matches[no] != NULL)
5063 {
5064 len = (int)STRLEN(re_extmatch_in->matches[no]);
5065 if (cstrncmp(re_extmatch_in->matches[no],
5066 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005067 status = RA_NOMATCH;
5068 else
5069 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005070 }
5071 else
5072 {
5073 /* Backref was not set: Match an empty string. */
5074 }
5075 }
5076 break;
5077#endif
5078
5079 case BRANCH:
5080 {
5081 if (OP(next) != BRANCH) /* No choice. */
5082 next = OPERAND(scan); /* Avoid recursion. */
5083 else
5084 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005085 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005086 if (rp == NULL)
5087 status = RA_FAIL;
5088 else
5089 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005090 }
5091 }
5092 break;
5093
5094 case BRACE_LIMITS:
5095 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005096 if (OP(next) == BRACE_SIMPLE)
5097 {
5098 bl_minval = OPERAND_MIN(scan);
5099 bl_maxval = OPERAND_MAX(scan);
5100 }
5101 else if (OP(next) >= BRACE_COMPLEX
5102 && OP(next) < BRACE_COMPLEX + 10)
5103 {
5104 no = OP(next) - BRACE_COMPLEX;
5105 brace_min[no] = OPERAND_MIN(scan);
5106 brace_max[no] = OPERAND_MAX(scan);
5107 brace_count[no] = 0;
5108 }
5109 else
5110 {
5111 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005112 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005113 }
5114 }
5115 break;
5116
5117 case BRACE_COMPLEX + 0:
5118 case BRACE_COMPLEX + 1:
5119 case BRACE_COMPLEX + 2:
5120 case BRACE_COMPLEX + 3:
5121 case BRACE_COMPLEX + 4:
5122 case BRACE_COMPLEX + 5:
5123 case BRACE_COMPLEX + 6:
5124 case BRACE_COMPLEX + 7:
5125 case BRACE_COMPLEX + 8:
5126 case BRACE_COMPLEX + 9:
5127 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005128 no = op - BRACE_COMPLEX;
5129 ++brace_count[no];
5130
5131 /* If not matched enough times yet, try one more */
5132 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005133 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005134 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005135 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005136 if (rp == NULL)
5137 status = RA_FAIL;
5138 else
5139 {
5140 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005141 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005142 next = OPERAND(scan);
5143 /* We continue and handle the result when done. */
5144 }
5145 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005146 }
5147
5148 /* If matched enough times, may try matching some more */
5149 if (brace_min[no] <= brace_max[no])
5150 {
5151 /* Range is the normal way around, use longest match */
5152 if (brace_count[no] <= brace_max[no])
5153 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005154 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005155 if (rp == NULL)
5156 status = RA_FAIL;
5157 else
5158 {
5159 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005160 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005161 next = OPERAND(scan);
5162 /* We continue and handle the result when done. */
5163 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005164 }
5165 }
5166 else
5167 {
5168 /* Range is backwards, use shortest match first */
5169 if (brace_count[no] <= brace_min[no])
5170 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005171 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005172 if (rp == NULL)
5173 status = RA_FAIL;
5174 else
5175 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005176 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005177 /* We continue and handle the result when done. */
5178 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005179 }
5180 }
5181 }
5182 break;
5183
5184 case BRACE_SIMPLE:
5185 case STAR:
5186 case PLUS:
5187 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005188 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005189
5190 /*
5191 * Lookahead to avoid useless match attempts when we know
5192 * what character comes next.
5193 */
5194 if (OP(next) == EXACTLY)
5195 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005196 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005197 if (ireg_ic)
5198 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005199 if (MB_ISUPPER(rst.nextb))
5200 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005201 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005202 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005203 }
5204 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005205 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005206 }
5207 else
5208 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005209 rst.nextb = NUL;
5210 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005211 }
5212 if (op != BRACE_SIMPLE)
5213 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005214 rst.minval = (op == STAR) ? 0 : 1;
5215 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005216 }
5217 else
5218 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005219 rst.minval = bl_minval;
5220 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005221 }
5222
5223 /*
5224 * When maxval > minval, try matching as much as possible, up
5225 * to maxval. When maxval < minval, try matching at least the
5226 * minimal number (since the range is backwards, that's also
5227 * maxval!).
5228 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005229 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005230 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005231 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005232 status = RA_FAIL;
5233 break;
5234 }
5235 if (rst.minval <= rst.maxval
5236 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5237 {
5238 /* It could match. Prepare for trying to match what
5239 * follows. The code is below. Parameters are stored in
5240 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005241 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005242 {
5243 EMSG(_(e_maxmempat));
5244 status = RA_FAIL;
5245 }
5246 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005247 status = RA_FAIL;
5248 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005249 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005250 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005251 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005252 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005253 if (rp == NULL)
5254 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005255 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005256 {
5257 *(((regstar_T *)rp) - 1) = rst;
5258 status = RA_BREAK; /* skip the restore bits */
5259 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005260 }
5261 }
5262 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005263 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005264
Bram Moolenaar071d4272004-06-13 20:20:40 +00005265 }
5266 break;
5267
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005268 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005269 case MATCH:
5270 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005271 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005272 if (rp == NULL)
5273 status = RA_FAIL;
5274 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005275 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005276 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005277 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005278 next = OPERAND(scan);
5279 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005280 }
5281 break;
5282
5283 case BEHIND:
5284 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005285 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005286 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005287 {
5288 EMSG(_(e_maxmempat));
5289 status = RA_FAIL;
5290 }
5291 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005292 status = RA_FAIL;
5293 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005294 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005295 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005296 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005297 if (rp == NULL)
5298 status = RA_FAIL;
5299 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005300 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005301 /* Need to save the subexpr to be able to restore them
5302 * when there is a match but we don't use it. */
5303 save_subexpr(((regbehind_T *)rp) - 1);
5304
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005305 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005306 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005307 /* First try if what follows matches. If it does then we
5308 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005309 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005310 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005311 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005312
5313 case BHPOS:
5314 if (REG_MULTI)
5315 {
5316 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5317 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005318 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005319 }
5320 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005321 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005322 break;
5323
5324 case NEWL:
Bram Moolenaar640009d2006-10-17 16:48:26 +00005325 if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
5326 || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005327 status = RA_NOMATCH;
5328 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005329 ADVANCE_REGINPUT();
5330 else
5331 reg_nextline();
5332 break;
5333
5334 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005335 status = RA_MATCH; /* Success! */
5336 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005337
5338 default:
5339 EMSG(_(e_re_corr));
5340#ifdef DEBUG
5341 printf("Illegal op code %d\n", op);
5342#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005343 status = RA_FAIL;
5344 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005345 }
5346 }
5347
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005348 /* If we can't continue sequentially, break the inner loop. */
5349 if (status != RA_CONT)
5350 break;
5351
5352 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005353 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005354
5355 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005356
5357 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005358 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005359 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005360 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005361 while (regstack.ga_len > 0 && status != RA_FAIL)
5362 {
5363 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5364 switch (rp->rs_state)
5365 {
5366 case RS_NOPEN:
5367 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005368 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005369 break;
5370
5371 case RS_MOPEN:
5372 /* Pop the state. Restore pointers when there is no match. */
5373 if (status == RA_NOMATCH)
5374 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5375 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005376 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005377 break;
5378
5379#ifdef FEAT_SYN_HL
5380 case RS_ZOPEN:
5381 /* Pop the state. Restore pointers when there is no match. */
5382 if (status == RA_NOMATCH)
5383 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5384 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005385 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005386 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005387#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005388
5389 case RS_MCLOSE:
5390 /* Pop the state. Restore pointers when there is no match. */
5391 if (status == RA_NOMATCH)
5392 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5393 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005394 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005395 break;
5396
5397#ifdef FEAT_SYN_HL
5398 case RS_ZCLOSE:
5399 /* Pop the state. Restore pointers when there is no match. */
5400 if (status == RA_NOMATCH)
5401 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5402 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005403 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005404 break;
5405#endif
5406
5407 case RS_BRANCH:
5408 if (status == RA_MATCH)
5409 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005410 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005411 else
5412 {
5413 if (status != RA_BREAK)
5414 {
5415 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005416 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005417 scan = rp->rs_scan;
5418 }
5419 if (scan == NULL || OP(scan) != BRANCH)
5420 {
5421 /* no more branches, didn't find a match */
5422 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005423 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005424 }
5425 else
5426 {
5427 /* Prepare to try a branch. */
5428 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005429 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005430 scan = OPERAND(scan);
5431 }
5432 }
5433 break;
5434
5435 case RS_BRCPLX_MORE:
5436 /* Pop the state. Restore pointers when there is no match. */
5437 if (status == RA_NOMATCH)
5438 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005439 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005440 --brace_count[rp->rs_no]; /* decrement match count */
5441 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005442 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005443 break;
5444
5445 case RS_BRCPLX_LONG:
5446 /* Pop the state. Restore pointers when there is no match. */
5447 if (status == RA_NOMATCH)
5448 {
5449 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005450 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005451 --brace_count[rp->rs_no];
5452 /* continue with the items after "\{}" */
5453 status = RA_CONT;
5454 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005455 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005456 if (status == RA_CONT)
5457 scan = regnext(scan);
5458 break;
5459
5460 case RS_BRCPLX_SHORT:
5461 /* Pop the state. Restore pointers when there is no match. */
5462 if (status == RA_NOMATCH)
5463 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005464 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005465 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005466 if (status == RA_NOMATCH)
5467 {
5468 scan = OPERAND(scan);
5469 status = RA_CONT;
5470 }
5471 break;
5472
5473 case RS_NOMATCH:
5474 /* Pop the state. If the operand matches for NOMATCH or
5475 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5476 * except for SUBPAT, and continue with the next item. */
5477 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5478 status = RA_NOMATCH;
5479 else
5480 {
5481 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005482 if (rp->rs_no != SUBPAT) /* zero-width */
5483 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005484 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005485 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005486 if (status == RA_CONT)
5487 scan = regnext(scan);
5488 break;
5489
5490 case RS_BEHIND1:
5491 if (status == RA_NOMATCH)
5492 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005493 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005494 regstack.ga_len -= sizeof(regbehind_T);
5495 }
5496 else
5497 {
5498 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5499 * the behind part does (not) match before the current
5500 * position in the input. This must be done at every
5501 * position in the input and checking if the match ends at
5502 * the current position. */
5503
5504 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005505 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005506
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005507 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005508 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005509 * result, hitting the start of the line or the previous
5510 * line (for multi-line matching).
5511 * Set behind_pos to where the match should end, BHPOS
5512 * will match it. Save the current value. */
5513 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5514 behind_pos = rp->rs_un.regsave;
5515
5516 rp->rs_state = RS_BEHIND2;
5517
Bram Moolenaar582fd852005-03-28 20:58:01 +00005518 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005519 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005520 }
5521 break;
5522
5523 case RS_BEHIND2:
5524 /*
5525 * Looping for BEHIND / NOBEHIND match.
5526 */
5527 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5528 {
5529 /* found a match that ends where "next" started */
5530 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5531 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005532 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5533 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005534 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005535 {
5536 /* But we didn't want a match. Need to restore the
5537 * subexpr, because what follows matched, so they have
5538 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005539 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005540 restore_subexpr(((regbehind_T *)rp) - 1);
5541 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005542 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005543 regstack.ga_len -= sizeof(regbehind_T);
5544 }
5545 else
5546 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005547 long limit;
5548
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005549 /* No match or a match that doesn't end where we want it: Go
5550 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005551 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005552 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005553 if (REG_MULTI)
5554 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005555 if (limit > 0
5556 && ((rp->rs_un.regsave.rs_u.pos.lnum
5557 < behind_pos.rs_u.pos.lnum
5558 ? (colnr_T)STRLEN(regline)
5559 : behind_pos.rs_u.pos.col)
5560 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5561 no = FAIL;
5562 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005563 {
5564 if (rp->rs_un.regsave.rs_u.pos.lnum
5565 < behind_pos.rs_u.pos.lnum
5566 || reg_getline(
5567 --rp->rs_un.regsave.rs_u.pos.lnum)
5568 == NULL)
5569 no = FAIL;
5570 else
5571 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005572 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005573 rp->rs_un.regsave.rs_u.pos.col =
5574 (colnr_T)STRLEN(regline);
5575 }
5576 }
5577 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005578 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005579#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005580 if (has_mbyte)
5581 rp->rs_un.regsave.rs_u.pos.col -=
5582 (*mb_head_off)(regline, regline
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005583 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005584 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005585#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005586 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005587 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005588 }
5589 else
5590 {
5591 if (rp->rs_un.regsave.rs_u.ptr == regline)
5592 no = FAIL;
5593 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005594 {
5595 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
5596 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5597 - rp->rs_un.regsave.rs_u.ptr) > limit)
5598 no = FAIL;
5599 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005600 }
5601 if (no == OK)
5602 {
5603 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005604 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005605 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005606 if (status == RA_MATCH)
5607 {
5608 /* We did match, so subexpr may have been changed,
5609 * need to restore them for the next try. */
5610 status = RA_NOMATCH;
5611 restore_subexpr(((regbehind_T *)rp) - 1);
5612 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005613 }
5614 else
5615 {
5616 /* Can't advance. For NOBEHIND that's a match. */
5617 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5618 if (rp->rs_no == NOBEHIND)
5619 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005620 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5621 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005622 status = RA_MATCH;
5623 }
5624 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005625 {
5626 /* We do want a proper match. Need to restore the
5627 * subexpr if we had a match, because they may have
5628 * been set. */
5629 if (status == RA_MATCH)
5630 {
5631 status = RA_NOMATCH;
5632 restore_subexpr(((regbehind_T *)rp) - 1);
5633 }
5634 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005635 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005636 regstack.ga_len -= sizeof(regbehind_T);
5637 }
5638 }
5639 break;
5640
5641 case RS_STAR_LONG:
5642 case RS_STAR_SHORT:
5643 {
5644 regstar_T *rst = ((regstar_T *)rp) - 1;
5645
5646 if (status == RA_MATCH)
5647 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005648 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005649 regstack.ga_len -= sizeof(regstar_T);
5650 break;
5651 }
5652
5653 /* Tried once already, restore input pointers. */
5654 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005655 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005656
5657 /* Repeat until we found a position where it could match. */
5658 for (;;)
5659 {
5660 if (status != RA_BREAK)
5661 {
5662 /* Tried first position already, advance. */
5663 if (rp->rs_state == RS_STAR_LONG)
5664 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005665 /* Trying for longest match, but couldn't or
5666 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005667 if (--rst->count < rst->minval)
5668 break;
5669 if (reginput == regline)
5670 {
5671 /* backup to last char of previous line */
5672 --reglnum;
5673 regline = reg_getline(reglnum);
5674 /* Just in case regrepeat() didn't count
5675 * right. */
5676 if (regline == NULL)
5677 break;
5678 reginput = regline + STRLEN(regline);
5679 fast_breakcheck();
5680 }
5681 else
5682 mb_ptr_back(regline, reginput);
5683 }
5684 else
5685 {
5686 /* Range is backwards, use shortest match first.
5687 * Careful: maxval and minval are exchanged!
5688 * Couldn't or didn't match: try advancing one
5689 * char. */
5690 if (rst->count == rst->minval
5691 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5692 break;
5693 ++rst->count;
5694 }
5695 if (got_int)
5696 break;
5697 }
5698 else
5699 status = RA_NOMATCH;
5700
5701 /* If it could match, try it. */
5702 if (rst->nextb == NUL || *reginput == rst->nextb
5703 || *reginput == rst->nextb_ic)
5704 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005705 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005706 scan = regnext(rp->rs_scan);
5707 status = RA_CONT;
5708 break;
5709 }
5710 }
5711 if (status != RA_CONT)
5712 {
5713 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005714 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005715 regstack.ga_len -= sizeof(regstar_T);
5716 status = RA_NOMATCH;
5717 }
5718 }
5719 break;
5720 }
5721
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005722 /* If we want to continue the inner loop or didn't pop a state
5723 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005724 if (status == RA_CONT || rp == (regitem_T *)
5725 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5726 break;
5727 }
5728
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005729 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005730 if (status == RA_CONT)
5731 continue;
5732
5733 /*
5734 * If the regstack is empty or something failed we are done.
5735 */
5736 if (regstack.ga_len == 0 || status == RA_FAIL)
5737 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005738 if (scan == NULL)
5739 {
5740 /*
5741 * We get here only if there's trouble -- normally "case END" is
5742 * the terminating point.
5743 */
5744 EMSG(_(e_re_corr));
5745#ifdef DEBUG
5746 printf("Premature EOL\n");
5747#endif
5748 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005749 if (status == RA_FAIL)
5750 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005751 return (status == RA_MATCH);
5752 }
5753
5754 } /* End of loop until the regstack is empty. */
5755
5756 /* NOTREACHED */
5757}
5758
5759/*
5760 * Push an item onto the regstack.
5761 * Returns pointer to new item. Returns NULL when out of memory.
5762 */
5763 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005764regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005765 regstate_T state;
5766 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005767{
5768 regitem_T *rp;
5769
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005770 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005771 {
5772 EMSG(_(e_maxmempat));
5773 return NULL;
5774 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005775 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005776 return NULL;
5777
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005778 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005779 rp->rs_state = state;
5780 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005781
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005782 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005783 return rp;
5784}
5785
5786/*
5787 * Pop an item from the regstack.
5788 */
5789 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005790regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005791 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005792{
5793 regitem_T *rp;
5794
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005795 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005796 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005797
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005798 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005799}
5800
Bram Moolenaar071d4272004-06-13 20:20:40 +00005801/*
5802 * regrepeat - repeatedly match something simple, return how many.
5803 * Advances reginput (and reglnum) to just after the matched chars.
5804 */
5805 static int
5806regrepeat(p, maxcount)
5807 char_u *p;
5808 long maxcount; /* maximum number of matches allowed */
5809{
5810 long count = 0;
5811 char_u *scan;
5812 char_u *opnd;
5813 int mask;
5814 int testval = 0;
5815
5816 scan = reginput; /* Make local copy of reginput for speed. */
5817 opnd = OPERAND(p);
5818 switch (OP(p))
5819 {
5820 case ANY:
5821 case ANY + ADD_NL:
5822 while (count < maxcount)
5823 {
5824 /* Matching anything means we continue until end-of-line (or
5825 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5826 while (*scan != NUL && count < maxcount)
5827 {
5828 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005829 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005830 }
Bram Moolenaar640009d2006-10-17 16:48:26 +00005831 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5832 || reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005833 break;
5834 ++count; /* count the line-break */
5835 reg_nextline();
5836 scan = reginput;
5837 if (got_int)
5838 break;
5839 }
5840 break;
5841
5842 case IDENT:
5843 case IDENT + ADD_NL:
5844 testval = TRUE;
5845 /*FALLTHROUGH*/
5846 case SIDENT:
5847 case SIDENT + ADD_NL:
5848 while (count < maxcount)
5849 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005850 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005851 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005852 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005853 }
5854 else if (*scan == NUL)
5855 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005856 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5857 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005858 break;
5859 reg_nextline();
5860 scan = reginput;
5861 if (got_int)
5862 break;
5863 }
5864 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5865 ++scan;
5866 else
5867 break;
5868 ++count;
5869 }
5870 break;
5871
5872 case KWORD:
5873 case KWORD + ADD_NL:
5874 testval = TRUE;
5875 /*FALLTHROUGH*/
5876 case SKWORD:
5877 case SKWORD + ADD_NL:
5878 while (count < maxcount)
5879 {
Bram Moolenaarf813a182013-01-30 13:59:37 +01005880 if (vim_iswordp_buf(scan, reg_buf)
5881 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005882 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005883 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005884 }
5885 else if (*scan == NUL)
5886 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005887 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5888 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005889 break;
5890 reg_nextline();
5891 scan = reginput;
5892 if (got_int)
5893 break;
5894 }
5895 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5896 ++scan;
5897 else
5898 break;
5899 ++count;
5900 }
5901 break;
5902
5903 case FNAME:
5904 case FNAME + ADD_NL:
5905 testval = TRUE;
5906 /*FALLTHROUGH*/
5907 case SFNAME:
5908 case SFNAME + ADD_NL:
5909 while (count < maxcount)
5910 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005911 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005912 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005913 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005914 }
5915 else if (*scan == NUL)
5916 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005917 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5918 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005919 break;
5920 reg_nextline();
5921 scan = reginput;
5922 if (got_int)
5923 break;
5924 }
5925 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5926 ++scan;
5927 else
5928 break;
5929 ++count;
5930 }
5931 break;
5932
5933 case PRINT:
5934 case PRINT + ADD_NL:
5935 testval = TRUE;
5936 /*FALLTHROUGH*/
5937 case SPRINT:
5938 case SPRINT + ADD_NL:
5939 while (count < maxcount)
5940 {
5941 if (*scan == NUL)
5942 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005943 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5944 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005945 break;
5946 reg_nextline();
5947 scan = reginput;
5948 if (got_int)
5949 break;
5950 }
Bram Moolenaarac7c33e2013-07-21 17:06:00 +02005951 else if (vim_isprintc(PTR2CHAR(scan)) == 1
5952 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005953 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005954 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005955 }
5956 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5957 ++scan;
5958 else
5959 break;
5960 ++count;
5961 }
5962 break;
5963
5964 case WHITE:
5965 case WHITE + ADD_NL:
5966 testval = mask = RI_WHITE;
5967do_class:
5968 while (count < maxcount)
5969 {
5970#ifdef FEAT_MBYTE
5971 int l;
5972#endif
5973 if (*scan == NUL)
5974 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005975 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5976 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005977 break;
5978 reg_nextline();
5979 scan = reginput;
5980 if (got_int)
5981 break;
5982 }
5983#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005984 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005985 {
5986 if (testval != 0)
5987 break;
5988 scan += l;
5989 }
5990#endif
5991 else if ((class_tab[*scan] & mask) == testval)
5992 ++scan;
5993 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5994 ++scan;
5995 else
5996 break;
5997 ++count;
5998 }
5999 break;
6000
6001 case NWHITE:
6002 case NWHITE + ADD_NL:
6003 mask = RI_WHITE;
6004 goto do_class;
6005 case DIGIT:
6006 case DIGIT + ADD_NL:
6007 testval = mask = RI_DIGIT;
6008 goto do_class;
6009 case NDIGIT:
6010 case NDIGIT + ADD_NL:
6011 mask = RI_DIGIT;
6012 goto do_class;
6013 case HEX:
6014 case HEX + ADD_NL:
6015 testval = mask = RI_HEX;
6016 goto do_class;
6017 case NHEX:
6018 case NHEX + ADD_NL:
6019 mask = RI_HEX;
6020 goto do_class;
6021 case OCTAL:
6022 case OCTAL + ADD_NL:
6023 testval = mask = RI_OCTAL;
6024 goto do_class;
6025 case NOCTAL:
6026 case NOCTAL + ADD_NL:
6027 mask = RI_OCTAL;
6028 goto do_class;
6029 case WORD:
6030 case WORD + ADD_NL:
6031 testval = mask = RI_WORD;
6032 goto do_class;
6033 case NWORD:
6034 case NWORD + ADD_NL:
6035 mask = RI_WORD;
6036 goto do_class;
6037 case HEAD:
6038 case HEAD + ADD_NL:
6039 testval = mask = RI_HEAD;
6040 goto do_class;
6041 case NHEAD:
6042 case NHEAD + ADD_NL:
6043 mask = RI_HEAD;
6044 goto do_class;
6045 case ALPHA:
6046 case ALPHA + ADD_NL:
6047 testval = mask = RI_ALPHA;
6048 goto do_class;
6049 case NALPHA:
6050 case NALPHA + ADD_NL:
6051 mask = RI_ALPHA;
6052 goto do_class;
6053 case LOWER:
6054 case LOWER + ADD_NL:
6055 testval = mask = RI_LOWER;
6056 goto do_class;
6057 case NLOWER:
6058 case NLOWER + ADD_NL:
6059 mask = RI_LOWER;
6060 goto do_class;
6061 case UPPER:
6062 case UPPER + ADD_NL:
6063 testval = mask = RI_UPPER;
6064 goto do_class;
6065 case NUPPER:
6066 case NUPPER + ADD_NL:
6067 mask = RI_UPPER;
6068 goto do_class;
6069
6070 case EXACTLY:
6071 {
6072 int cu, cl;
6073
6074 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006075 * would have been used for it. It does handle single-byte
6076 * characters, such as latin1. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006077 if (ireg_ic)
6078 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006079 cu = MB_TOUPPER(*opnd);
6080 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006081 while (count < maxcount && (*scan == cu || *scan == cl))
6082 {
6083 count++;
6084 scan++;
6085 }
6086 }
6087 else
6088 {
6089 cu = *opnd;
6090 while (count < maxcount && *scan == cu)
6091 {
6092 count++;
6093 scan++;
6094 }
6095 }
6096 break;
6097 }
6098
6099#ifdef FEAT_MBYTE
6100 case MULTIBYTECODE:
6101 {
6102 int i, len, cf = 0;
6103
6104 /* Safety check (just in case 'encoding' was changed since
6105 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006106 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006107 {
6108 if (ireg_ic && enc_utf8)
6109 cf = utf_fold(utf_ptr2char(opnd));
6110 while (count < maxcount)
6111 {
6112 for (i = 0; i < len; ++i)
6113 if (opnd[i] != scan[i])
6114 break;
6115 if (i < len && (!ireg_ic || !enc_utf8
6116 || utf_fold(utf_ptr2char(scan)) != cf))
6117 break;
6118 scan += len;
6119 ++count;
6120 }
6121 }
6122 }
6123 break;
6124#endif
6125
6126 case ANYOF:
6127 case ANYOF + ADD_NL:
6128 testval = TRUE;
6129 /*FALLTHROUGH*/
6130
6131 case ANYBUT:
6132 case ANYBUT + ADD_NL:
6133 while (count < maxcount)
6134 {
6135#ifdef FEAT_MBYTE
6136 int len;
6137#endif
6138 if (*scan == NUL)
6139 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00006140 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6141 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006142 break;
6143 reg_nextline();
6144 scan = reginput;
6145 if (got_int)
6146 break;
6147 }
6148 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6149 ++scan;
6150#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006151 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006152 {
6153 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6154 break;
6155 scan += len;
6156 }
6157#endif
6158 else
6159 {
6160 if ((cstrchr(opnd, *scan) == NULL) == testval)
6161 break;
6162 ++scan;
6163 }
6164 ++count;
6165 }
6166 break;
6167
6168 case NEWL:
6169 while (count < maxcount
Bram Moolenaar640009d2006-10-17 16:48:26 +00006170 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
6171 && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006172 {
6173 count++;
6174 if (reg_line_lbr)
6175 ADVANCE_REGINPUT();
6176 else
6177 reg_nextline();
6178 scan = reginput;
6179 if (got_int)
6180 break;
6181 }
6182 break;
6183
6184 default: /* Oh dear. Called inappropriately. */
6185 EMSG(_(e_re_corr));
6186#ifdef DEBUG
6187 printf("Called regrepeat with op code %d\n", OP(p));
6188#endif
6189 break;
6190 }
6191
6192 reginput = scan;
6193
6194 return (int)count;
6195}
6196
6197/*
6198 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006199 * Returns NULL when calculating size, when there is no next item and when
6200 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006201 */
6202 static char_u *
6203regnext(p)
6204 char_u *p;
6205{
6206 int offset;
6207
Bram Moolenaard3005802009-11-25 17:21:32 +00006208 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006209 return NULL;
6210
6211 offset = NEXT(p);
6212 if (offset == 0)
6213 return NULL;
6214
Bram Moolenaar582fd852005-03-28 20:58:01 +00006215 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006216 return p - offset;
6217 else
6218 return p + offset;
6219}
6220
6221/*
6222 * Check the regexp program for its magic number.
6223 * Return TRUE if it's wrong.
6224 */
6225 static int
6226prog_magic_wrong()
6227{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006228 regprog_T *prog;
6229
6230 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
6231 if (prog->engine == &nfa_regengine)
6232 /* For NFA matcher we don't check the magic */
6233 return FALSE;
6234
6235 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006236 {
6237 EMSG(_(e_re_corr));
6238 return TRUE;
6239 }
6240 return FALSE;
6241}
6242
6243/*
6244 * Cleanup the subexpressions, if this wasn't done yet.
6245 * This construction is used to clear the subexpressions only when they are
6246 * used (to increase speed).
6247 */
6248 static void
6249cleanup_subexpr()
6250{
6251 if (need_clear_subexpr)
6252 {
6253 if (REG_MULTI)
6254 {
6255 /* Use 0xff to set lnum to -1 */
6256 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6257 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6258 }
6259 else
6260 {
6261 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6262 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
6263 }
6264 need_clear_subexpr = FALSE;
6265 }
6266}
6267
6268#ifdef FEAT_SYN_HL
6269 static void
6270cleanup_zsubexpr()
6271{
6272 if (need_clear_zsubexpr)
6273 {
6274 if (REG_MULTI)
6275 {
6276 /* Use 0xff to set lnum to -1 */
6277 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6278 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6279 }
6280 else
6281 {
6282 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6283 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6284 }
6285 need_clear_zsubexpr = FALSE;
6286 }
6287}
6288#endif
6289
6290/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006291 * Save the current subexpr to "bp", so that they can be restored
6292 * later by restore_subexpr().
6293 */
6294 static void
6295save_subexpr(bp)
6296 regbehind_T *bp;
6297{
6298 int i;
6299
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006300 /* When "need_clear_subexpr" is set we don't need to save the values, only
6301 * remember that this flag needs to be set again when restoring. */
6302 bp->save_need_clear_subexpr = need_clear_subexpr;
6303 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006304 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006305 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006306 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006307 if (REG_MULTI)
6308 {
6309 bp->save_start[i].se_u.pos = reg_startpos[i];
6310 bp->save_end[i].se_u.pos = reg_endpos[i];
6311 }
6312 else
6313 {
6314 bp->save_start[i].se_u.ptr = reg_startp[i];
6315 bp->save_end[i].se_u.ptr = reg_endp[i];
6316 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006317 }
6318 }
6319}
6320
6321/*
6322 * Restore the subexpr from "bp".
6323 */
6324 static void
6325restore_subexpr(bp)
6326 regbehind_T *bp;
6327{
6328 int i;
6329
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006330 /* Only need to restore saved values when they are not to be cleared. */
6331 need_clear_subexpr = bp->save_need_clear_subexpr;
6332 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006333 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006334 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006335 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006336 if (REG_MULTI)
6337 {
6338 reg_startpos[i] = bp->save_start[i].se_u.pos;
6339 reg_endpos[i] = bp->save_end[i].se_u.pos;
6340 }
6341 else
6342 {
6343 reg_startp[i] = bp->save_start[i].se_u.ptr;
6344 reg_endp[i] = bp->save_end[i].se_u.ptr;
6345 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006346 }
6347 }
6348}
6349
6350/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006351 * Advance reglnum, regline and reginput to the next line.
6352 */
6353 static void
6354reg_nextline()
6355{
6356 regline = reg_getline(++reglnum);
6357 reginput = regline;
6358 fast_breakcheck();
6359}
6360
6361/*
6362 * Save the input line and position in a regsave_T.
6363 */
6364 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006365reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006366 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006367 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006368{
6369 if (REG_MULTI)
6370 {
6371 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6372 save->rs_u.pos.lnum = reglnum;
6373 }
6374 else
6375 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006376 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006377}
6378
6379/*
6380 * Restore the input line and position from a regsave_T.
6381 */
6382 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006383reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006384 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006385 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006386{
6387 if (REG_MULTI)
6388 {
6389 if (reglnum != save->rs_u.pos.lnum)
6390 {
6391 /* only call reg_getline() when the line number changed to save
6392 * a bit of time */
6393 reglnum = save->rs_u.pos.lnum;
6394 regline = reg_getline(reglnum);
6395 }
6396 reginput = regline + save->rs_u.pos.col;
6397 }
6398 else
6399 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006400 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006401}
6402
6403/*
6404 * Return TRUE if current position is equal to saved position.
6405 */
6406 static int
6407reg_save_equal(save)
6408 regsave_T *save;
6409{
6410 if (REG_MULTI)
6411 return reglnum == save->rs_u.pos.lnum
6412 && reginput == regline + save->rs_u.pos.col;
6413 return reginput == save->rs_u.ptr;
6414}
6415
6416/*
6417 * Tentatively set the sub-expression start to the current position (after
6418 * calling regmatch() they will have changed). Need to save the existing
6419 * values for when there is no match.
6420 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6421 * depending on REG_MULTI.
6422 */
6423 static void
6424save_se_multi(savep, posp)
6425 save_se_T *savep;
6426 lpos_T *posp;
6427{
6428 savep->se_u.pos = *posp;
6429 posp->lnum = reglnum;
6430 posp->col = (colnr_T)(reginput - regline);
6431}
6432
6433 static void
6434save_se_one(savep, pp)
6435 save_se_T *savep;
6436 char_u **pp;
6437{
6438 savep->se_u.ptr = *pp;
6439 *pp = reginput;
6440}
6441
6442/*
6443 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6444 */
6445 static int
6446re_num_cmp(val, scan)
6447 long_u val;
6448 char_u *scan;
6449{
6450 long_u n = OPERAND_MIN(scan);
6451
6452 if (OPERAND_CMP(scan) == '>')
6453 return val > n;
6454 if (OPERAND_CMP(scan) == '<')
6455 return val < n;
6456 return val == n;
6457}
6458
Bram Moolenaar580abea2013-06-14 20:31:28 +02006459/*
6460 * Check whether a backreference matches.
6461 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006462 * If "bytelen" is not NULL, it is set to the byte length of the match in the
6463 * last line.
Bram Moolenaar580abea2013-06-14 20:31:28 +02006464 */
6465 static int
6466match_with_backref(start_lnum, start_col, end_lnum, end_col, bytelen)
6467 linenr_T start_lnum;
6468 colnr_T start_col;
6469 linenr_T end_lnum;
6470 colnr_T end_col;
6471 int *bytelen;
6472{
6473 linenr_T clnum = start_lnum;
6474 colnr_T ccol = start_col;
6475 int len;
6476 char_u *p;
6477
6478 if (bytelen != NULL)
6479 *bytelen = 0;
6480 for (;;)
6481 {
6482 /* Since getting one line may invalidate the other, need to make copy.
6483 * Slow! */
6484 if (regline != reg_tofree)
6485 {
6486 len = (int)STRLEN(regline);
6487 if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6488 {
6489 len += 50; /* get some extra */
6490 vim_free(reg_tofree);
6491 reg_tofree = alloc(len);
6492 if (reg_tofree == NULL)
6493 return RA_FAIL; /* out of memory!*/
6494 reg_tofreelen = len;
6495 }
6496 STRCPY(reg_tofree, regline);
6497 reginput = reg_tofree + (reginput - regline);
6498 regline = reg_tofree;
6499 }
6500
6501 /* Get the line to compare with. */
6502 p = reg_getline(clnum);
6503 if (clnum == end_lnum)
6504 len = end_col - ccol;
6505 else
6506 len = (int)STRLEN(p + ccol);
6507
6508 if (cstrncmp(p + ccol, reginput, &len) != 0)
6509 return RA_NOMATCH; /* doesn't match */
6510 if (bytelen != NULL)
6511 *bytelen += len;
6512 if (clnum == end_lnum)
6513 break; /* match and at end! */
6514 if (reglnum >= reg_maxline)
6515 return RA_NOMATCH; /* text too short */
6516
6517 /* Advance to next line. */
6518 reg_nextline();
Bram Moolenaar438ee5b2013-11-21 17:13:00 +01006519 if (bytelen != NULL)
6520 *bytelen = 0;
Bram Moolenaar580abea2013-06-14 20:31:28 +02006521 ++clnum;
6522 ccol = 0;
6523 if (got_int)
6524 return RA_FAIL;
6525 }
6526
6527 /* found a match! Note that regline may now point to a copy of the line,
6528 * that should not matter. */
6529 return RA_MATCH;
6530}
Bram Moolenaar071d4272004-06-13 20:20:40 +00006531
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006532#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006533
6534/*
6535 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6536 */
6537 static void
6538regdump(pattern, r)
6539 char_u *pattern;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006540 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006541{
6542 char_u *s;
6543 int op = EXACTLY; /* Arbitrary non-END op. */
6544 char_u *next;
6545 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006546 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006547
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006548#ifdef BT_REGEXP_LOG
6549 f = fopen("bt_regexp_log.log", "a");
6550#else
6551 f = stdout;
6552#endif
6553 if (f == NULL)
6554 return;
6555 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006556
6557 s = r->program + 1;
6558 /*
6559 * Loop until we find the END that isn't before a referred next (an END
6560 * can also appear in a NOMATCH operand).
6561 */
6562 while (op != END || s <= end)
6563 {
6564 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006565 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006566 next = regnext(s);
6567 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006568 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006569 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006570 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006571 if (end < next)
6572 end = next;
6573 if (op == BRACE_LIMITS)
6574 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006575 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006576 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006577 s += 8;
6578 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006579 else if (op == BEHIND || op == NOBEHIND)
6580 {
6581 /* one int */
6582 fprintf(f, " count %ld", OPERAND_MIN(s));
6583 s += 4;
6584 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006585 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6586 {
6587 /* one int plus comperator */
6588 fprintf(f, " count %ld", OPERAND_MIN(s));
6589 s += 5;
6590 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006591 s += 3;
6592 if (op == ANYOF || op == ANYOF + ADD_NL
6593 || op == ANYBUT || op == ANYBUT + ADD_NL
6594 || op == EXACTLY)
6595 {
6596 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006597 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006598 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006599 fprintf(f, "%c", *s++);
6600 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006601 s++;
6602 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006603 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006604 }
6605
6606 /* Header fields of interest. */
6607 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006608 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006609 ? (char *)transchar(r->regstart)
6610 : "multibyte", r->regstart);
6611 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006612 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006613 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006614 fprintf(f, "must have \"%s\"", r->regmust);
6615 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006616
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006617#ifdef BT_REGEXP_LOG
6618 fclose(f);
6619#endif
6620}
6621#endif /* BT_REGEXP_DUMP */
6622
6623#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006624/*
6625 * regprop - printable representation of opcode
6626 */
6627 static char_u *
6628regprop(op)
6629 char_u *op;
6630{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006631 char *p;
6632 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006633
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006634 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006635
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006636 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006637 {
6638 case BOL:
6639 p = "BOL";
6640 break;
6641 case EOL:
6642 p = "EOL";
6643 break;
6644 case RE_BOF:
6645 p = "BOF";
6646 break;
6647 case RE_EOF:
6648 p = "EOF";
6649 break;
6650 case CURSOR:
6651 p = "CURSOR";
6652 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006653 case RE_VISUAL:
6654 p = "RE_VISUAL";
6655 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006656 case RE_LNUM:
6657 p = "RE_LNUM";
6658 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006659 case RE_MARK:
6660 p = "RE_MARK";
6661 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006662 case RE_COL:
6663 p = "RE_COL";
6664 break;
6665 case RE_VCOL:
6666 p = "RE_VCOL";
6667 break;
6668 case BOW:
6669 p = "BOW";
6670 break;
6671 case EOW:
6672 p = "EOW";
6673 break;
6674 case ANY:
6675 p = "ANY";
6676 break;
6677 case ANY + ADD_NL:
6678 p = "ANY+NL";
6679 break;
6680 case ANYOF:
6681 p = "ANYOF";
6682 break;
6683 case ANYOF + ADD_NL:
6684 p = "ANYOF+NL";
6685 break;
6686 case ANYBUT:
6687 p = "ANYBUT";
6688 break;
6689 case ANYBUT + ADD_NL:
6690 p = "ANYBUT+NL";
6691 break;
6692 case IDENT:
6693 p = "IDENT";
6694 break;
6695 case IDENT + ADD_NL:
6696 p = "IDENT+NL";
6697 break;
6698 case SIDENT:
6699 p = "SIDENT";
6700 break;
6701 case SIDENT + ADD_NL:
6702 p = "SIDENT+NL";
6703 break;
6704 case KWORD:
6705 p = "KWORD";
6706 break;
6707 case KWORD + ADD_NL:
6708 p = "KWORD+NL";
6709 break;
6710 case SKWORD:
6711 p = "SKWORD";
6712 break;
6713 case SKWORD + ADD_NL:
6714 p = "SKWORD+NL";
6715 break;
6716 case FNAME:
6717 p = "FNAME";
6718 break;
6719 case FNAME + ADD_NL:
6720 p = "FNAME+NL";
6721 break;
6722 case SFNAME:
6723 p = "SFNAME";
6724 break;
6725 case SFNAME + ADD_NL:
6726 p = "SFNAME+NL";
6727 break;
6728 case PRINT:
6729 p = "PRINT";
6730 break;
6731 case PRINT + ADD_NL:
6732 p = "PRINT+NL";
6733 break;
6734 case SPRINT:
6735 p = "SPRINT";
6736 break;
6737 case SPRINT + ADD_NL:
6738 p = "SPRINT+NL";
6739 break;
6740 case WHITE:
6741 p = "WHITE";
6742 break;
6743 case WHITE + ADD_NL:
6744 p = "WHITE+NL";
6745 break;
6746 case NWHITE:
6747 p = "NWHITE";
6748 break;
6749 case NWHITE + ADD_NL:
6750 p = "NWHITE+NL";
6751 break;
6752 case DIGIT:
6753 p = "DIGIT";
6754 break;
6755 case DIGIT + ADD_NL:
6756 p = "DIGIT+NL";
6757 break;
6758 case NDIGIT:
6759 p = "NDIGIT";
6760 break;
6761 case NDIGIT + ADD_NL:
6762 p = "NDIGIT+NL";
6763 break;
6764 case HEX:
6765 p = "HEX";
6766 break;
6767 case HEX + ADD_NL:
6768 p = "HEX+NL";
6769 break;
6770 case NHEX:
6771 p = "NHEX";
6772 break;
6773 case NHEX + ADD_NL:
6774 p = "NHEX+NL";
6775 break;
6776 case OCTAL:
6777 p = "OCTAL";
6778 break;
6779 case OCTAL + ADD_NL:
6780 p = "OCTAL+NL";
6781 break;
6782 case NOCTAL:
6783 p = "NOCTAL";
6784 break;
6785 case NOCTAL + ADD_NL:
6786 p = "NOCTAL+NL";
6787 break;
6788 case WORD:
6789 p = "WORD";
6790 break;
6791 case WORD + ADD_NL:
6792 p = "WORD+NL";
6793 break;
6794 case NWORD:
6795 p = "NWORD";
6796 break;
6797 case NWORD + ADD_NL:
6798 p = "NWORD+NL";
6799 break;
6800 case HEAD:
6801 p = "HEAD";
6802 break;
6803 case HEAD + ADD_NL:
6804 p = "HEAD+NL";
6805 break;
6806 case NHEAD:
6807 p = "NHEAD";
6808 break;
6809 case NHEAD + ADD_NL:
6810 p = "NHEAD+NL";
6811 break;
6812 case ALPHA:
6813 p = "ALPHA";
6814 break;
6815 case ALPHA + ADD_NL:
6816 p = "ALPHA+NL";
6817 break;
6818 case NALPHA:
6819 p = "NALPHA";
6820 break;
6821 case NALPHA + ADD_NL:
6822 p = "NALPHA+NL";
6823 break;
6824 case LOWER:
6825 p = "LOWER";
6826 break;
6827 case LOWER + ADD_NL:
6828 p = "LOWER+NL";
6829 break;
6830 case NLOWER:
6831 p = "NLOWER";
6832 break;
6833 case NLOWER + ADD_NL:
6834 p = "NLOWER+NL";
6835 break;
6836 case UPPER:
6837 p = "UPPER";
6838 break;
6839 case UPPER + ADD_NL:
6840 p = "UPPER+NL";
6841 break;
6842 case NUPPER:
6843 p = "NUPPER";
6844 break;
6845 case NUPPER + ADD_NL:
6846 p = "NUPPER+NL";
6847 break;
6848 case BRANCH:
6849 p = "BRANCH";
6850 break;
6851 case EXACTLY:
6852 p = "EXACTLY";
6853 break;
6854 case NOTHING:
6855 p = "NOTHING";
6856 break;
6857 case BACK:
6858 p = "BACK";
6859 break;
6860 case END:
6861 p = "END";
6862 break;
6863 case MOPEN + 0:
6864 p = "MATCH START";
6865 break;
6866 case MOPEN + 1:
6867 case MOPEN + 2:
6868 case MOPEN + 3:
6869 case MOPEN + 4:
6870 case MOPEN + 5:
6871 case MOPEN + 6:
6872 case MOPEN + 7:
6873 case MOPEN + 8:
6874 case MOPEN + 9:
6875 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6876 p = NULL;
6877 break;
6878 case MCLOSE + 0:
6879 p = "MATCH END";
6880 break;
6881 case MCLOSE + 1:
6882 case MCLOSE + 2:
6883 case MCLOSE + 3:
6884 case MCLOSE + 4:
6885 case MCLOSE + 5:
6886 case MCLOSE + 6:
6887 case MCLOSE + 7:
6888 case MCLOSE + 8:
6889 case MCLOSE + 9:
6890 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6891 p = NULL;
6892 break;
6893 case BACKREF + 1:
6894 case BACKREF + 2:
6895 case BACKREF + 3:
6896 case BACKREF + 4:
6897 case BACKREF + 5:
6898 case BACKREF + 6:
6899 case BACKREF + 7:
6900 case BACKREF + 8:
6901 case BACKREF + 9:
6902 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6903 p = NULL;
6904 break;
6905 case NOPEN:
6906 p = "NOPEN";
6907 break;
6908 case NCLOSE:
6909 p = "NCLOSE";
6910 break;
6911#ifdef FEAT_SYN_HL
6912 case ZOPEN + 1:
6913 case ZOPEN + 2:
6914 case ZOPEN + 3:
6915 case ZOPEN + 4:
6916 case ZOPEN + 5:
6917 case ZOPEN + 6:
6918 case ZOPEN + 7:
6919 case ZOPEN + 8:
6920 case ZOPEN + 9:
6921 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6922 p = NULL;
6923 break;
6924 case ZCLOSE + 1:
6925 case ZCLOSE + 2:
6926 case ZCLOSE + 3:
6927 case ZCLOSE + 4:
6928 case ZCLOSE + 5:
6929 case ZCLOSE + 6:
6930 case ZCLOSE + 7:
6931 case ZCLOSE + 8:
6932 case ZCLOSE + 9:
6933 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6934 p = NULL;
6935 break;
6936 case ZREF + 1:
6937 case ZREF + 2:
6938 case ZREF + 3:
6939 case ZREF + 4:
6940 case ZREF + 5:
6941 case ZREF + 6:
6942 case ZREF + 7:
6943 case ZREF + 8:
6944 case ZREF + 9:
6945 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6946 p = NULL;
6947 break;
6948#endif
6949 case STAR:
6950 p = "STAR";
6951 break;
6952 case PLUS:
6953 p = "PLUS";
6954 break;
6955 case NOMATCH:
6956 p = "NOMATCH";
6957 break;
6958 case MATCH:
6959 p = "MATCH";
6960 break;
6961 case BEHIND:
6962 p = "BEHIND";
6963 break;
6964 case NOBEHIND:
6965 p = "NOBEHIND";
6966 break;
6967 case SUBPAT:
6968 p = "SUBPAT";
6969 break;
6970 case BRACE_LIMITS:
6971 p = "BRACE_LIMITS";
6972 break;
6973 case BRACE_SIMPLE:
6974 p = "BRACE_SIMPLE";
6975 break;
6976 case BRACE_COMPLEX + 0:
6977 case BRACE_COMPLEX + 1:
6978 case BRACE_COMPLEX + 2:
6979 case BRACE_COMPLEX + 3:
6980 case BRACE_COMPLEX + 4:
6981 case BRACE_COMPLEX + 5:
6982 case BRACE_COMPLEX + 6:
6983 case BRACE_COMPLEX + 7:
6984 case BRACE_COMPLEX + 8:
6985 case BRACE_COMPLEX + 9:
6986 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6987 p = NULL;
6988 break;
6989#ifdef FEAT_MBYTE
6990 case MULTIBYTECODE:
6991 p = "MULTIBYTECODE";
6992 break;
6993#endif
6994 case NEWL:
6995 p = "NEWL";
6996 break;
6997 default:
6998 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6999 p = NULL;
7000 break;
7001 }
7002 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007003 STRCAT(buf, p);
7004 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007005}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007006#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007007
7008#ifdef FEAT_MBYTE
7009static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
7010
7011typedef struct
7012{
7013 int a, b, c;
7014} decomp_T;
7015
7016
7017/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00007018static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00007019{
7020 {0x5e2,0,0}, /* 0xfb20 alt ayin */
7021 {0x5d0,0,0}, /* 0xfb21 alt alef */
7022 {0x5d3,0,0}, /* 0xfb22 alt dalet */
7023 {0x5d4,0,0}, /* 0xfb23 alt he */
7024 {0x5db,0,0}, /* 0xfb24 alt kaf */
7025 {0x5dc,0,0}, /* 0xfb25 alt lamed */
7026 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
7027 {0x5e8,0,0}, /* 0xfb27 alt resh */
7028 {0x5ea,0,0}, /* 0xfb28 alt tav */
7029 {'+', 0, 0}, /* 0xfb29 alt plus */
7030 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
7031 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
7032 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
7033 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
7034 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
7035 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
7036 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
7037 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
7038 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
7039 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
7040 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
7041 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
7042 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
7043 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
7044 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
7045 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
7046 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
7047 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
7048 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
7049 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
7050 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
7051 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
7052 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
7053 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
7054 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
7055 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
7056 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
7057 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
7058 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
7059 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
7060 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7061 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7062 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7063 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7064 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7065 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7066 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7067 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7068};
7069
7070 static void
7071mb_decompose(c, c1, c2, c3)
7072 int c, *c1, *c2, *c3;
7073{
7074 decomp_T d;
7075
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007076 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007077 {
7078 d = decomp_table[c - 0xfb20];
7079 *c1 = d.a;
7080 *c2 = d.b;
7081 *c3 = d.c;
7082 }
7083 else
7084 {
7085 *c1 = c;
7086 *c2 = *c3 = 0;
7087 }
7088}
7089#endif
7090
7091/*
7092 * Compare two strings, ignore case if ireg_ic set.
7093 * Return 0 if strings match, non-zero otherwise.
7094 * Correct the length "*n" when composing characters are ignored.
7095 */
7096 static int
7097cstrncmp(s1, s2, n)
7098 char_u *s1, *s2;
7099 int *n;
7100{
7101 int result;
7102
7103 if (!ireg_ic)
7104 result = STRNCMP(s1, s2, *n);
7105 else
7106 result = MB_STRNICMP(s1, s2, *n);
7107
7108#ifdef FEAT_MBYTE
7109 /* if it failed and it's utf8 and we want to combineignore: */
7110 if (result != 0 && enc_utf8 && ireg_icombine)
7111 {
7112 char_u *str1, *str2;
7113 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007114 int junk;
7115
7116 /* we have to handle the strcmp ourselves, since it is necessary to
7117 * deal with the composing characters by ignoring them: */
7118 str1 = s1;
7119 str2 = s2;
7120 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007121 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007122 {
7123 c1 = mb_ptr2char_adv(&str1);
7124 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007125
7126 /* decompose the character if necessary, into 'base' characters
7127 * because I don't care about Arabic, I will hard-code the Hebrew
7128 * which I *do* care about! So sue me... */
7129 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
7130 {
7131 /* decomposition necessary? */
7132 mb_decompose(c1, &c11, &junk, &junk);
7133 mb_decompose(c2, &c12, &junk, &junk);
7134 c1 = c11;
7135 c2 = c12;
7136 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
7137 break;
7138 }
7139 }
7140 result = c2 - c1;
7141 if (result == 0)
7142 *n = (int)(str2 - s2);
7143 }
7144#endif
7145
7146 return result;
7147}
7148
7149/*
7150 * cstrchr: This function is used a lot for simple searches, keep it fast!
7151 */
7152 static char_u *
7153cstrchr(s, c)
7154 char_u *s;
7155 int c;
7156{
7157 char_u *p;
7158 int cc;
7159
7160 if (!ireg_ic
7161#ifdef FEAT_MBYTE
7162 || (!enc_utf8 && mb_char2len(c) > 1)
7163#endif
7164 )
7165 return vim_strchr(s, c);
7166
7167 /* tolower() and toupper() can be slow, comparing twice should be a lot
7168 * faster (esp. when using MS Visual C++!).
7169 * For UTF-8 need to use folded case. */
7170#ifdef FEAT_MBYTE
7171 if (enc_utf8 && c > 0x80)
7172 cc = utf_fold(c);
7173 else
7174#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007175 if (MB_ISUPPER(c))
7176 cc = MB_TOLOWER(c);
7177 else if (MB_ISLOWER(c))
7178 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007179 else
7180 return vim_strchr(s, c);
7181
7182#ifdef FEAT_MBYTE
7183 if (has_mbyte)
7184 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007185 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007186 {
7187 if (enc_utf8 && c > 0x80)
7188 {
7189 if (utf_fold(utf_ptr2char(p)) == cc)
7190 return p;
7191 }
7192 else if (*p == c || *p == cc)
7193 return p;
7194 }
7195 }
7196 else
7197#endif
7198 /* Faster version for when there are no multi-byte characters. */
7199 for (p = s; *p != NUL; ++p)
7200 if (*p == c || *p == cc)
7201 return p;
7202
7203 return NULL;
7204}
7205
7206/***************************************************************
7207 * regsub stuff *
7208 ***************************************************************/
7209
7210/* This stuff below really confuses cc on an SGI -- webb */
7211#ifdef __sgi
7212# undef __ARGS
7213# define __ARGS(x) ()
7214#endif
7215
7216/*
7217 * We should define ftpr as a pointer to a function returning a pointer to
7218 * a function returning a pointer to a function ...
7219 * This is impossible, so we declare a pointer to a function returning a
7220 * pointer to a function returning void. This should work for all compilers.
7221 */
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007222typedef void (*(*fptr_T) __ARGS((int *, int)))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007223
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007224static fptr_T do_upper __ARGS((int *, int));
7225static fptr_T do_Upper __ARGS((int *, int));
7226static fptr_T do_lower __ARGS((int *, int));
7227static fptr_T do_Lower __ARGS((int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007228
7229static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
7230
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007231 static fptr_T
Bram Moolenaar071d4272004-06-13 20:20:40 +00007232do_upper(d, c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007233 int *d;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007234 int c;
7235{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007236 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007237
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007238 return (fptr_T)NULL;
7239}
7240
7241 static fptr_T
7242do_Upper(d, c)
7243 int *d;
7244 int c;
7245{
7246 *d = MB_TOUPPER(c);
7247
7248 return (fptr_T)do_Upper;
7249}
7250
7251 static fptr_T
7252do_lower(d, c)
7253 int *d;
7254 int c;
7255{
7256 *d = MB_TOLOWER(c);
7257
7258 return (fptr_T)NULL;
7259}
7260
7261 static fptr_T
7262do_Lower(d, c)
7263 int *d;
7264 int c;
7265{
7266 *d = MB_TOLOWER(c);
7267
7268 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007269}
7270
7271/*
7272 * regtilde(): Replace tildes in the pattern by the old pattern.
7273 *
7274 * Short explanation of the tilde: It stands for the previous replacement
7275 * pattern. If that previous pattern also contains a ~ we should go back a
7276 * step further... But we insert the previous pattern into the current one
7277 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007278 * This still does not handle the case where "magic" changes. So require the
7279 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007280 *
7281 * The tildes are parsed once before the first call to vim_regsub().
7282 */
7283 char_u *
7284regtilde(source, magic)
7285 char_u *source;
7286 int magic;
7287{
7288 char_u *newsub = source;
7289 char_u *tmpsub;
7290 char_u *p;
7291 int len;
7292 int prevlen;
7293
7294 for (p = newsub; *p; ++p)
7295 {
7296 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7297 {
7298 if (reg_prev_sub != NULL)
7299 {
7300 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7301 prevlen = (int)STRLEN(reg_prev_sub);
7302 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7303 if (tmpsub != NULL)
7304 {
7305 /* copy prefix */
7306 len = (int)(p - newsub); /* not including ~ */
7307 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007308 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007309 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7310 /* copy postfix */
7311 if (!magic)
7312 ++p; /* back off \ */
7313 STRCPY(tmpsub + len + prevlen, p + 1);
7314
7315 if (newsub != source) /* already allocated newsub */
7316 vim_free(newsub);
7317 newsub = tmpsub;
7318 p = newsub + len + prevlen;
7319 }
7320 }
7321 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007322 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007323 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007324 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007325 --p;
7326 }
7327 else
7328 {
7329 if (*p == '\\' && p[1]) /* skip escaped characters */
7330 ++p;
7331#ifdef FEAT_MBYTE
7332 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007333 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007334#endif
7335 }
7336 }
7337
7338 vim_free(reg_prev_sub);
7339 if (newsub != source) /* newsub was allocated, just keep it */
7340 reg_prev_sub = newsub;
7341 else /* no ~ found, need to save newsub */
7342 reg_prev_sub = vim_strsave(newsub);
7343 return newsub;
7344}
7345
7346#ifdef FEAT_EVAL
7347static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7348
7349/* These pointers are used instead of reg_match and reg_mmatch for
7350 * reg_submatch(). Needed for when the substitution string is an expression
7351 * that contains a call to substitute() and submatch(). */
7352static regmatch_T *submatch_match;
7353static regmmatch_T *submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007354static linenr_T submatch_firstlnum;
7355static linenr_T submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007356static int submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007357#endif
7358
7359#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
7360/*
7361 * vim_regsub() - perform substitutions after a vim_regexec() or
7362 * vim_regexec_multi() match.
7363 *
7364 * If "copy" is TRUE really copy into "dest".
7365 * If "copy" is FALSE nothing is copied, this is just to find out the length
7366 * of the result.
7367 *
7368 * If "backslash" is TRUE, a backslash will be removed later, need to double
7369 * them to keep them, and insert a backslash before a CR to avoid it being
7370 * replaced with a line break later.
7371 *
7372 * Note: The matched text must not change between the call of
7373 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7374 * references invalid!
7375 *
7376 * Returns the size of the replacement, including terminating NUL.
7377 */
7378 int
7379vim_regsub(rmp, source, dest, copy, magic, backslash)
7380 regmatch_T *rmp;
7381 char_u *source;
7382 char_u *dest;
7383 int copy;
7384 int magic;
7385 int backslash;
7386{
7387 reg_match = rmp;
7388 reg_mmatch = NULL;
7389 reg_maxline = 0;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01007390 reg_buf = curbuf;
Bram Moolenaar93fc4812014-04-23 18:48:47 +02007391 reg_line_lbr = TRUE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007392 return vim_regsub_both(source, dest, copy, magic, backslash);
7393}
7394#endif
7395
7396 int
7397vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
7398 regmmatch_T *rmp;
7399 linenr_T lnum;
7400 char_u *source;
7401 char_u *dest;
7402 int copy;
7403 int magic;
7404 int backslash;
7405{
7406 reg_match = NULL;
7407 reg_mmatch = rmp;
7408 reg_buf = curbuf; /* always works on the current buffer! */
7409 reg_firstlnum = lnum;
7410 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
Bram Moolenaar93fc4812014-04-23 18:48:47 +02007411 reg_line_lbr = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007412 return vim_regsub_both(source, dest, copy, magic, backslash);
7413}
7414
7415 static int
7416vim_regsub_both(source, dest, copy, magic, backslash)
7417 char_u *source;
7418 char_u *dest;
7419 int copy;
7420 int magic;
7421 int backslash;
7422{
7423 char_u *src;
7424 char_u *dst;
7425 char_u *s;
7426 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007427 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007428 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007429 fptr_T func_all = (fptr_T)NULL;
7430 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007431 linenr_T clnum = 0; /* init for GCC */
7432 int len = 0; /* init for GCC */
7433#ifdef FEAT_EVAL
7434 static char_u *eval_result = NULL;
7435#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007436
7437 /* Be paranoid... */
7438 if (source == NULL || dest == NULL)
7439 {
7440 EMSG(_(e_null));
7441 return 0;
7442 }
7443 if (prog_magic_wrong())
7444 return 0;
7445 src = source;
7446 dst = dest;
7447
7448 /*
7449 * When the substitute part starts with "\=" evaluate it as an expression.
7450 */
7451 if (source[0] == '\\' && source[1] == '='
7452#ifdef FEAT_EVAL
7453 && !can_f_submatch /* can't do this recursively */
7454#endif
7455 )
7456 {
7457#ifdef FEAT_EVAL
7458 /* To make sure that the length doesn't change between checking the
7459 * length and copying the string, and to speed up things, the
7460 * resulting string is saved from the call with "copy" == FALSE to the
7461 * call with "copy" == TRUE. */
7462 if (copy)
7463 {
7464 if (eval_result != NULL)
7465 {
7466 STRCPY(dest, eval_result);
7467 dst += STRLEN(eval_result);
7468 vim_free(eval_result);
7469 eval_result = NULL;
7470 }
7471 }
7472 else
7473 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007474 win_T *save_reg_win;
7475 int save_ireg_ic;
7476
7477 vim_free(eval_result);
7478
7479 /* The expression may contain substitute(), which calls us
7480 * recursively. Make sure submatch() gets the text from the first
7481 * level. Don't need to save "reg_buf", because
7482 * vim_regexec_multi() can't be called recursively. */
7483 submatch_match = reg_match;
7484 submatch_mmatch = reg_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007485 submatch_firstlnum = reg_firstlnum;
7486 submatch_maxline = reg_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007487 submatch_line_lbr = reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007488 save_reg_win = reg_win;
7489 save_ireg_ic = ireg_ic;
7490 can_f_submatch = TRUE;
7491
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007492 eval_result = eval_to_string(source + 2, NULL, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007493 if (eval_result != NULL)
7494 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007495 int had_backslash = FALSE;
7496
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00007497 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007498 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007499 /* Change NL to CR, so that it becomes a line break,
7500 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007501 * Skip over a backslashed character. */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007502 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007503 *s = CAR;
7504 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007505 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007506 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007507 /* Change NL to CR here too, so that this works:
7508 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7509 * abc\
7510 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007511 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007512 */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007513 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007514 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007515 had_backslash = TRUE;
7516 }
7517 }
7518 if (had_backslash && backslash)
7519 {
7520 /* Backslashes will be consumed, need to double them. */
7521 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7522 if (s != NULL)
7523 {
7524 vim_free(eval_result);
7525 eval_result = s;
7526 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007527 }
7528
7529 dst += STRLEN(eval_result);
7530 }
7531
7532 reg_match = submatch_match;
7533 reg_mmatch = submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007534 reg_firstlnum = submatch_firstlnum;
7535 reg_maxline = submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007536 reg_line_lbr = submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007537 reg_win = save_reg_win;
7538 ireg_ic = save_ireg_ic;
7539 can_f_submatch = FALSE;
7540 }
7541#endif
7542 }
7543 else
7544 while ((c = *src++) != NUL)
7545 {
7546 if (c == '&' && magic)
7547 no = 0;
7548 else if (c == '\\' && *src != NUL)
7549 {
7550 if (*src == '&' && !magic)
7551 {
7552 ++src;
7553 no = 0;
7554 }
7555 else if ('0' <= *src && *src <= '9')
7556 {
7557 no = *src++ - '0';
7558 }
7559 else if (vim_strchr((char_u *)"uUlLeE", *src))
7560 {
7561 switch (*src++)
7562 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007563 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007564 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007565 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007566 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007567 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007568 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007569 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007570 continue;
7571 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007572 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007573 continue;
7574 }
7575 }
7576 }
7577 if (no < 0) /* Ordinary character. */
7578 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007579 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7580 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007581 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007582 if (copy)
7583 {
7584 *dst++ = c;
7585 *dst++ = *src++;
7586 *dst++ = *src++;
7587 }
7588 else
7589 {
7590 dst += 3;
7591 src += 2;
7592 }
7593 continue;
7594 }
7595
Bram Moolenaar071d4272004-06-13 20:20:40 +00007596 if (c == '\\' && *src != NUL)
7597 {
7598 /* Check for abbreviations -- webb */
7599 switch (*src)
7600 {
7601 case 'r': c = CAR; ++src; break;
7602 case 'n': c = NL; ++src; break;
7603 case 't': c = TAB; ++src; break;
7604 /* Oh no! \e already has meaning in subst pat :-( */
7605 /* case 'e': c = ESC; ++src; break; */
7606 case 'b': c = Ctrl_H; ++src; break;
7607
7608 /* If "backslash" is TRUE the backslash will be removed
7609 * later. Used to insert a literal CR. */
7610 default: if (backslash)
7611 {
7612 if (copy)
7613 *dst = '\\';
7614 ++dst;
7615 }
7616 c = *src++;
7617 }
7618 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007619#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007620 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007621 c = mb_ptr2char(src - 1);
7622#endif
7623
Bram Moolenaardb552d602006-03-23 22:59:57 +00007624 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007625 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007626 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007627 func_one = (fptr_T)(func_one(&cc, c));
7628 else if (func_all != (fptr_T)NULL)
7629 /* Turbo C complains without the typecast */
7630 func_all = (fptr_T)(func_all(&cc, c));
7631 else /* just copy */
7632 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007633
7634#ifdef FEAT_MBYTE
7635 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007636 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007637 int totlen = mb_ptr2len(src - 1);
7638
Bram Moolenaar071d4272004-06-13 20:20:40 +00007639 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007640 mb_char2bytes(cc, dst);
7641 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007642 if (enc_utf8)
7643 {
7644 int clen = utf_ptr2len(src - 1);
7645
7646 /* If the character length is shorter than "totlen", there
7647 * are composing characters; copy them as-is. */
7648 if (clen < totlen)
7649 {
7650 if (copy)
7651 mch_memmove(dst + 1, src - 1 + clen,
7652 (size_t)(totlen - clen));
7653 dst += totlen - clen;
7654 }
7655 }
7656 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007657 }
7658 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007659#endif
7660 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007661 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007662 dst++;
7663 }
7664 else
7665 {
7666 if (REG_MULTI)
7667 {
7668 clnum = reg_mmatch->startpos[no].lnum;
7669 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7670 s = NULL;
7671 else
7672 {
7673 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7674 if (reg_mmatch->endpos[no].lnum == clnum)
7675 len = reg_mmatch->endpos[no].col
7676 - reg_mmatch->startpos[no].col;
7677 else
7678 len = (int)STRLEN(s);
7679 }
7680 }
7681 else
7682 {
7683 s = reg_match->startp[no];
7684 if (reg_match->endp[no] == NULL)
7685 s = NULL;
7686 else
7687 len = (int)(reg_match->endp[no] - s);
7688 }
7689 if (s != NULL)
7690 {
7691 for (;;)
7692 {
7693 if (len == 0)
7694 {
7695 if (REG_MULTI)
7696 {
7697 if (reg_mmatch->endpos[no].lnum == clnum)
7698 break;
7699 if (copy)
7700 *dst = CAR;
7701 ++dst;
7702 s = reg_getline(++clnum);
7703 if (reg_mmatch->endpos[no].lnum == clnum)
7704 len = reg_mmatch->endpos[no].col;
7705 else
7706 len = (int)STRLEN(s);
7707 }
7708 else
7709 break;
7710 }
7711 else if (*s == NUL) /* we hit NUL. */
7712 {
7713 if (copy)
7714 EMSG(_(e_re_damg));
7715 goto exit;
7716 }
7717 else
7718 {
7719 if (backslash && (*s == CAR || *s == '\\'))
7720 {
7721 /*
7722 * Insert a backslash in front of a CR, otherwise
7723 * it will be replaced by a line break.
7724 * Number of backslashes will be halved later,
7725 * double them here.
7726 */
7727 if (copy)
7728 {
7729 dst[0] = '\\';
7730 dst[1] = *s;
7731 }
7732 dst += 2;
7733 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007734 else
7735 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007736#ifdef FEAT_MBYTE
7737 if (has_mbyte)
7738 c = mb_ptr2char(s);
7739 else
7740#endif
7741 c = *s;
7742
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007743 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007744 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007745 func_one = (fptr_T)(func_one(&cc, c));
7746 else if (func_all != (fptr_T)NULL)
7747 /* Turbo C complains without the typecast */
7748 func_all = (fptr_T)(func_all(&cc, c));
7749 else /* just copy */
7750 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007751
7752#ifdef FEAT_MBYTE
7753 if (has_mbyte)
7754 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007755 int l;
7756
7757 /* Copy composing characters separately, one
7758 * at a time. */
7759 if (enc_utf8)
7760 l = utf_ptr2len(s) - 1;
7761 else
7762 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007763
7764 s += l;
7765 len -= l;
7766 if (copy)
7767 mb_char2bytes(cc, dst);
7768 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007769 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007770 else
7771#endif
7772 if (copy)
7773 *dst = cc;
7774 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007775 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007776
Bram Moolenaar071d4272004-06-13 20:20:40 +00007777 ++s;
7778 --len;
7779 }
7780 }
7781 }
7782 no = -1;
7783 }
7784 }
7785 if (copy)
7786 *dst = NUL;
7787
7788exit:
7789 return (int)((dst - dest) + 1);
7790}
7791
7792#ifdef FEAT_EVAL
Bram Moolenaard32a3192009-11-26 19:40:49 +00007793static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7794
Bram Moolenaar071d4272004-06-13 20:20:40 +00007795/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007796 * Call reg_getline() with the line numbers from the submatch. If a
7797 * substitute() was used the reg_maxline and other values have been
7798 * overwritten.
7799 */
7800 static char_u *
7801reg_getline_submatch(lnum)
7802 linenr_T lnum;
7803{
7804 char_u *s;
7805 linenr_T save_first = reg_firstlnum;
7806 linenr_T save_max = reg_maxline;
7807
7808 reg_firstlnum = submatch_firstlnum;
7809 reg_maxline = submatch_maxline;
7810
7811 s = reg_getline(lnum);
7812
7813 reg_firstlnum = save_first;
7814 reg_maxline = save_max;
7815 return s;
7816}
7817
7818/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007819 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007820 * allocated memory.
7821 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7822 */
7823 char_u *
7824reg_submatch(no)
7825 int no;
7826{
7827 char_u *retval = NULL;
7828 char_u *s;
7829 int len;
7830 int round;
7831 linenr_T lnum;
7832
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007833 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007834 return NULL;
7835
7836 if (submatch_match == NULL)
7837 {
7838 /*
7839 * First round: compute the length and allocate memory.
7840 * Second round: copy the text.
7841 */
7842 for (round = 1; round <= 2; ++round)
7843 {
7844 lnum = submatch_mmatch->startpos[no].lnum;
7845 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7846 return NULL;
7847
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007848 s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007849 if (s == NULL) /* anti-crash check, cannot happen? */
7850 break;
7851 if (submatch_mmatch->endpos[no].lnum == lnum)
7852 {
7853 /* Within one line: take form start to end col. */
7854 len = submatch_mmatch->endpos[no].col
7855 - submatch_mmatch->startpos[no].col;
7856 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007857 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007858 ++len;
7859 }
7860 else
7861 {
7862 /* Multiple lines: take start line from start col, middle
7863 * lines completely and end line up to end col. */
7864 len = (int)STRLEN(s);
7865 if (round == 2)
7866 {
7867 STRCPY(retval, s);
7868 retval[len] = '\n';
7869 }
7870 ++len;
7871 ++lnum;
7872 while (lnum < submatch_mmatch->endpos[no].lnum)
7873 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007874 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007875 if (round == 2)
7876 STRCPY(retval + len, s);
7877 len += (int)STRLEN(s);
7878 if (round == 2)
7879 retval[len] = '\n';
7880 ++len;
7881 }
7882 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007883 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00007884 submatch_mmatch->endpos[no].col);
7885 len += submatch_mmatch->endpos[no].col;
7886 if (round == 2)
7887 retval[len] = NUL;
7888 ++len;
7889 }
7890
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007891 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007892 {
7893 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007894 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007895 return NULL;
7896 }
7897 }
7898 }
7899 else
7900 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00007901 s = submatch_match->startp[no];
7902 if (s == NULL || submatch_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007903 retval = NULL;
7904 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007905 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007906 }
7907
7908 return retval;
7909}
Bram Moolenaar41571762014-04-02 19:00:58 +02007910
7911/*
7912 * Used for the submatch() function with the optional non-zero argument: get
7913 * the list of strings from the n'th submatch in allocated memory with NULs
7914 * represented in NLs.
7915 * Returns a list of allocated strings. Returns NULL when not in a ":s"
7916 * command, for a non-existing submatch and for any error.
7917 */
7918 list_T *
7919reg_submatch_list(no)
7920 int no;
7921{
7922 char_u *s;
7923 linenr_T slnum;
7924 linenr_T elnum;
7925 colnr_T scol;
7926 colnr_T ecol;
7927 int i;
7928 list_T *list;
7929 int error = FALSE;
7930
7931 if (!can_f_submatch || no < 0)
7932 return NULL;
7933
7934 if (submatch_match == NULL)
7935 {
7936 slnum = submatch_mmatch->startpos[no].lnum;
7937 elnum = submatch_mmatch->endpos[no].lnum;
7938 if (slnum < 0 || elnum < 0)
7939 return NULL;
7940
7941 scol = submatch_mmatch->startpos[no].col;
7942 ecol = submatch_mmatch->endpos[no].col;
7943
7944 list = list_alloc();
7945 if (list == NULL)
7946 return NULL;
7947
7948 s = reg_getline_submatch(slnum) + scol;
7949 if (slnum == elnum)
7950 {
7951 if (list_append_string(list, s, ecol - scol) == FAIL)
7952 error = TRUE;
7953 }
7954 else
7955 {
7956 if (list_append_string(list, s, -1) == FAIL)
7957 error = TRUE;
7958 for (i = 1; i < elnum - slnum; i++)
7959 {
7960 s = reg_getline_submatch(slnum + i);
7961 if (list_append_string(list, s, -1) == FAIL)
7962 error = TRUE;
7963 }
7964 s = reg_getline_submatch(elnum);
7965 if (list_append_string(list, s, ecol) == FAIL)
7966 error = TRUE;
7967 }
7968 }
7969 else
7970 {
7971 s = submatch_match->startp[no];
7972 if (s == NULL || submatch_match->endp[no] == NULL)
7973 return NULL;
7974 list = list_alloc();
7975 if (list == NULL)
7976 return NULL;
7977 if (list_append_string(list, s,
7978 (int)(submatch_match->endp[no] - s)) == FAIL)
7979 error = TRUE;
7980 }
7981
7982 if (error)
7983 {
7984 list_free(list, TRUE);
7985 return NULL;
7986 }
7987 return list;
7988}
Bram Moolenaar071d4272004-06-13 20:20:40 +00007989#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007990
7991static regengine_T bt_regengine =
7992{
7993 bt_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02007994 bt_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007995 bt_regexec_nl,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007996 bt_regexec_multi
7997#ifdef DEBUG
7998 ,(char_u *)""
7999#endif
8000};
8001
8002
8003#include "regexp_nfa.c"
8004
8005static regengine_T nfa_regengine =
8006{
8007 nfa_regcomp,
Bram Moolenaar473de612013-06-08 18:19:48 +02008008 nfa_regfree,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008009 nfa_regexec_nl,
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008010 nfa_regexec_multi
8011#ifdef DEBUG
8012 ,(char_u *)""
8013#endif
8014};
8015
8016/* Which regexp engine to use? Needed for vim_regcomp().
8017 * Must match with 'regexpengine'. */
8018static int regexp_engine = 0;
8019#define AUTOMATIC_ENGINE 0
8020#define BACKTRACKING_ENGINE 1
8021#define NFA_ENGINE 2
8022#ifdef DEBUG
8023static char_u regname[][30] = {
8024 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02008025 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008026 "NFA Regexp Engine"
8027 };
8028#endif
8029
8030/*
8031 * Compile a regular expression into internal code.
Bram Moolenaar473de612013-06-08 18:19:48 +02008032 * Returns the program in allocated memory.
8033 * Use vim_regfree() to free the memory.
8034 * Returns NULL for an error.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008035 */
8036 regprog_T *
8037vim_regcomp(expr_arg, re_flags)
8038 char_u *expr_arg;
8039 int re_flags;
8040{
8041 regprog_T *prog = NULL;
8042 char_u *expr = expr_arg;
8043
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008044 regexp_engine = p_re;
8045
8046 /* Check for prefix "\%#=", that sets the regexp engine */
8047 if (STRNCMP(expr, "\\%#=", 4) == 0)
8048 {
8049 int newengine = expr[4] - '0';
8050
8051 if (newengine == AUTOMATIC_ENGINE
8052 || newengine == BACKTRACKING_ENGINE
8053 || newengine == NFA_ENGINE)
8054 {
8055 regexp_engine = expr[4] - '0';
8056 expr += 5;
8057#ifdef DEBUG
Bram Moolenaar6e132072014-05-13 16:46:32 +02008058 smsg((char_u *)"New regexp mode selected (%d): %s",
8059 regexp_engine, regname[newengine]);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008060#endif
8061 }
8062 else
8063 {
8064 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
8065 regexp_engine = AUTOMATIC_ENGINE;
8066 }
8067 }
8068#ifdef DEBUG
8069 bt_regengine.expr = expr;
8070 nfa_regengine.expr = expr;
8071#endif
8072
8073 /*
8074 * First try the NFA engine, unless backtracking was requested.
8075 */
8076 if (regexp_engine != BACKTRACKING_ENGINE)
8077 prog = nfa_regengine.regcomp(expr, re_flags);
8078 else
8079 prog = bt_regengine.regcomp(expr, re_flags);
8080
8081 if (prog == NULL) /* error compiling regexp with initial engine */
8082 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008083#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008084 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8085 {
8086 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008087 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008088 if (f)
8089 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008090 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008091 fclose(f);
8092 }
8093 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02008094 EMSG2("(NFA) Could not open \"%s\" to write !!!",
8095 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008096 }
8097#endif
8098 /*
Bram Moolenaar917789f2013-09-19 17:04:01 +02008099 * If the NFA engine failed, the backtracking engine won't work either.
8100 *
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008101 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008102 prog = bt_regengine.regcomp(expr, re_flags);
Bram Moolenaar917789f2013-09-19 17:04:01 +02008103 */
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02008104 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008105
8106 return prog;
8107}
8108
8109/*
Bram Moolenaar473de612013-06-08 18:19:48 +02008110 * Free a compiled regexp program, returned by vim_regcomp().
8111 */
8112 void
8113vim_regfree(prog)
8114 regprog_T *prog;
8115{
8116 if (prog != NULL)
8117 prog->engine->regfree(prog);
8118}
8119
8120/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008121 * Match a regexp against a string.
8122 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
8123 * Uses curbuf for line count and 'iskeyword'.
8124 *
8125 * Return TRUE if there is a match, FALSE if not.
8126 */
8127 int
8128vim_regexec(rmp, line, col)
8129 regmatch_T *rmp;
8130 char_u *line; /* string to match against */
8131 colnr_T col; /* column to start looking for match */
8132{
Bram Moolenaar2af78a12014-04-23 19:06:37 +02008133 return rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008134}
8135
8136#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8137 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8138/*
8139 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
8140 */
8141 int
8142vim_regexec_nl(rmp, line, col)
8143 regmatch_T *rmp;
8144 char_u *line;
8145 colnr_T col;
8146{
Bram Moolenaar2af78a12014-04-23 19:06:37 +02008147 return rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02008148}
8149#endif
8150
8151/*
8152 * Match a regexp against multiple lines.
8153 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
8154 * Uses curbuf for line count and 'iskeyword'.
8155 *
8156 * Return zero if there is no match. Return number of lines contained in the
8157 * match otherwise.
8158 */
8159 long
8160vim_regexec_multi(rmp, win, buf, lnum, col, tm)
8161 regmmatch_T *rmp;
8162 win_T *win; /* window in which to search or NULL */
8163 buf_T *buf; /* buffer in which to search */
8164 linenr_T lnum; /* nr of line to start looking for match */
8165 colnr_T col; /* column to start looking for match */
8166 proftime_T *tm; /* timeout limit or NULL */
8167{
8168 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
8169}