blob: a1f71ab979428108127498b384c5d413923f282c [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
54#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000055
56/*
57 * The "internal use only" fields in regexp.h are present to pass info from
58 * compile to execute that permits the execute phase to run lots faster on
59 * simple cases. They are:
60 *
61 * regstart char that must begin a match; NUL if none obvious; Can be a
62 * multi-byte character.
63 * reganch is the match anchored (at beginning-of-line only)?
64 * regmust string (pointer into program) that match must include, or NULL
65 * regmlen length of regmust string
66 * regflags RF_ values or'ed together
67 *
68 * Regstart and reganch permit very fast decisions on suitable starting points
69 * for a match, cutting down the work a lot. Regmust permits fast rejection
70 * of lines that cannot possibly match. The regmust tests are costly enough
71 * that vim_regcomp() supplies a regmust only if the r.e. contains something
72 * potentially expensive (at present, the only such thing detected is * or +
73 * at the start of the r.e., which can involve a lot of backup). Regmlen is
74 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
75 * computing it anyway.
76 */
77
78/*
79 * Structure for regexp "program". This is essentially a linear encoding
80 * of a nondeterministic finite-state machine (aka syntax charts or
81 * "railroad normal form" in parsing technology). Each node is an opcode
82 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
83 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
84 * pointer with a BRANCH on both ends of it is connecting two alternatives.
85 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
86 * (as opposed to a collection of them) is never concatenated with anything
87 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000088 * node points to the node after the stuff to be repeated.
89 * The operand of some types of node is a literal string; for others, it is a
90 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
91 * is the first node of the branch.
92 * (NB this is *not* a tree structure: the tail of the branch connects to the
93 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000094 *
95 * pattern is coded like:
96 *
97 * +-----------------+
98 * | V
99 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
100 * | ^ | ^
101 * +------+ +----------+
102 *
103 *
104 * +------------------+
105 * V |
106 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
107 * | | ^ ^
108 * | +---------------+ |
109 * +---------------------------------------------+
110 *
111 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000112 * +----------------------+
113 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000114 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000115 * | | ^ ^
116 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000117 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000118 *
119 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000120 * +-------------------------+
121 * V |
122 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
123 * | | ^
124 * | +----------------+
125 * +-----------------------------------------------+
126 *
127 *
128 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
129 * | | ^ ^
130 * | +----------------+ |
131 * +--------------------------------+
132 *
133 * +---------+
134 * | V
135 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
136 * | | | | ^ ^
137 * | | | +-----+ |
138 * | | +----------------+ |
139 * | +---------------------------+ |
140 * +------------------------------------------------------+
141 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000142 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000143 * one alternative.
144 */
145
146/*
147 * The opcodes are:
148 */
149
150/* definition number opnd? meaning */
151#define END 0 /* End of program or NOMATCH operand. */
152#define BOL 1 /* Match "" at beginning of line. */
153#define EOL 2 /* Match "" at end of line. */
154#define BRANCH 3 /* node Match this alternative, or the
155 * next... */
156#define BACK 4 /* Match "", "next" ptr points backward. */
157#define EXACTLY 5 /* str Match this string. */
158#define NOTHING 6 /* Match empty string. */
159#define STAR 7 /* node Match this (simple) thing 0 or more
160 * times. */
161#define PLUS 8 /* node Match this (simple) thing 1 or more
162 * times. */
163#define MATCH 9 /* node match the operand zero-width */
164#define NOMATCH 10 /* node check for no match with operand */
165#define BEHIND 11 /* node look behind for a match with operand */
166#define NOBEHIND 12 /* node look behind for no match with operand */
167#define SUBPAT 13 /* node match the operand here */
168#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
169 * n times (\{m,n\}). */
170#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
171#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
172#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
173 * and BRACE_COMPLEX. */
174#define NEWL 18 /* Match line-break */
175#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
176
177
178/* character classes: 20-48 normal, 50-78 include a line-break */
179#define ADD_NL 30
180#define FIRST_NL ANY + ADD_NL
181#define ANY 20 /* Match any one character. */
182#define ANYOF 21 /* str Match any character in this string. */
183#define ANYBUT 22 /* str Match any character not in this
184 * string. */
185#define IDENT 23 /* Match identifier char */
186#define SIDENT 24 /* Match identifier char but no digit */
187#define KWORD 25 /* Match keyword char */
188#define SKWORD 26 /* Match word char but no digit */
189#define FNAME 27 /* Match file name char */
190#define SFNAME 28 /* Match file name char but no digit */
191#define PRINT 29 /* Match printable char */
192#define SPRINT 30 /* Match printable char but no digit */
193#define WHITE 31 /* Match whitespace char */
194#define NWHITE 32 /* Match non-whitespace char */
195#define DIGIT 33 /* Match digit char */
196#define NDIGIT 34 /* Match non-digit char */
197#define HEX 35 /* Match hex char */
198#define NHEX 36 /* Match non-hex char */
199#define OCTAL 37 /* Match octal char */
200#define NOCTAL 38 /* Match non-octal char */
201#define WORD 39 /* Match word char */
202#define NWORD 40 /* Match non-word char */
203#define HEAD 41 /* Match head char */
204#define NHEAD 42 /* Match non-head char */
205#define ALPHA 43 /* Match alpha char */
206#define NALPHA 44 /* Match non-alpha char */
207#define LOWER 45 /* Match lowercase char */
208#define NLOWER 46 /* Match non-lowercase char */
209#define UPPER 47 /* Match uppercase char */
210#define NUPPER 48 /* Match non-uppercase char */
211#define LAST_NL NUPPER + ADD_NL
212#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
213
214#define MOPEN 80 /* -89 Mark this point in input as start of
215 * \( subexpr. MOPEN + 0 marks start of
216 * match. */
217#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
218 * end of match. */
219#define BACKREF 100 /* -109 node Match same string again \1-\9 */
220
221#ifdef FEAT_SYN_HL
222# define ZOPEN 110 /* -119 Mark this point in input as start of
223 * \z( subexpr. */
224# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
225# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
226#endif
227
228#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
229
230#define NOPEN 150 /* Mark this point in input as start of
231 \%( subexpr. */
232#define NCLOSE 151 /* Analogous to NOPEN. */
233
234#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
235#define RE_BOF 201 /* Match "" at beginning of file. */
236#define RE_EOF 202 /* Match "" at end of file. */
237#define CURSOR 203 /* Match location of cursor. */
238
239#define RE_LNUM 204 /* nr cmp Match line number */
240#define RE_COL 205 /* nr cmp Match column number */
241#define RE_VCOL 206 /* nr cmp Match virtual column number */
242
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000243#define RE_MARK 207 /* mark cmp Match mark position */
244#define RE_VISUAL 208 /* Match Visual area */
245
Bram Moolenaar071d4272004-06-13 20:20:40 +0000246/*
247 * Magic characters have a special meaning, they don't match literally.
248 * Magic characters are negative. This separates them from literal characters
249 * (possibly multi-byte). Only ASCII characters can be Magic.
250 */
251#define Magic(x) ((int)(x) - 256)
252#define un_Magic(x) ((x) + 256)
253#define is_Magic(x) ((x) < 0)
254
255static int no_Magic __ARGS((int x));
256static int toggle_Magic __ARGS((int x));
257
258 static int
259no_Magic(x)
260 int x;
261{
262 if (is_Magic(x))
263 return un_Magic(x);
264 return x;
265}
266
267 static int
268toggle_Magic(x)
269 int x;
270{
271 if (is_Magic(x))
272 return un_Magic(x);
273 return Magic(x);
274}
275
276/*
277 * The first byte of the regexp internal "program" is actually this magic
278 * number; the start node begins in the second byte. It's used to catch the
279 * most severe mutilation of the program by the caller.
280 */
281
282#define REGMAGIC 0234
283
284/*
285 * Opcode notes:
286 *
287 * BRANCH The set of branches constituting a single choice are hooked
288 * together with their "next" pointers, since precedence prevents
289 * anything being concatenated to any individual branch. The
290 * "next" pointer of the last BRANCH in a choice points to the
291 * thing following the whole choice. This is also where the
292 * final "next" pointer of each individual branch points; each
293 * branch starts with the operand node of a BRANCH node.
294 *
295 * BACK Normal "next" pointers all implicitly point forward; BACK
296 * exists to make loop structures possible.
297 *
298 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
299 * BRANCH structures using BACK. Simple cases (one character
300 * per match) are implemented with STAR and PLUS for speed
301 * and to minimize recursive plunges.
302 *
303 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
304 * node, and defines the min and max limits to be used for that
305 * node.
306 *
307 * MOPEN,MCLOSE ...are numbered at compile time.
308 * ZOPEN,ZCLOSE ...ditto
309 */
310
311/*
312 * A node is one char of opcode followed by two chars of "next" pointer.
313 * "Next" pointers are stored as two 8-bit bytes, high order first. The
314 * value is a positive offset from the opcode of the node containing it.
315 * An operand, if any, simply follows the node. (Note that much of the
316 * code generation knows about this implicit relationship.)
317 *
318 * Using two bytes for the "next" pointer is vast overkill for most things,
319 * but allows patterns to get big without disasters.
320 */
321#define OP(p) ((int)*(p))
322#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
323#define OPERAND(p) ((p) + 3)
324/* Obtain an operand that was stored as four bytes, MSB first. */
325#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
326 + ((long)(p)[5] << 8) + (long)(p)[6])
327/* Obtain a second operand stored as four bytes. */
328#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
329/* Obtain a second single-byte operand stored after a four bytes operand. */
330#define OPERAND_CMP(p) (p)[7]
331
332/*
333 * Utility definitions.
334 */
335#define UCHARAT(p) ((int)*(char_u *)(p))
336
337/* Used for an error (down from) vim_regcomp(): give the error message, set
338 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000339#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000340#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200341#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
342#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
343#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000344
345#define MAX_LIMIT (32767L << 16L)
346
347static int re_multi_type __ARGS((int));
348static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
349static char_u *cstrchr __ARGS((char_u *, int));
350
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200351#ifdef BT_REGEXP_DUMP
352static void regdump __ARGS((char_u *, bt_regprog_T *));
353#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000354#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +0000355static char_u *regprop __ARGS((char_u *));
356#endif
357
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200358static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
359static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
360static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
361static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
362
Bram Moolenaar071d4272004-06-13 20:20:40 +0000363#define NOT_MULTI 0
364#define MULTI_ONE 1
365#define MULTI_MULT 2
366/*
367 * Return NOT_MULTI if c is not a "multi" operator.
368 * Return MULTI_ONE if c is a single "multi" operator.
369 * Return MULTI_MULT if c is a multi "multi" operator.
370 */
371 static int
372re_multi_type(c)
373 int c;
374{
375 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
376 return MULTI_ONE;
377 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
378 return MULTI_MULT;
379 return NOT_MULTI;
380}
381
382/*
383 * Flags to be passed up and down.
384 */
385#define HASWIDTH 0x1 /* Known never to match null string. */
386#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
387#define SPSTART 0x4 /* Starts with * or +. */
388#define HASNL 0x8 /* Contains some \n. */
389#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
390#define WORST 0 /* Worst case. */
391
392/*
393 * When regcode is set to this value, code is not emitted and size is computed
394 * instead.
395 */
396#define JUST_CALC_SIZE ((char_u *) -1)
397
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000398static char_u *reg_prev_sub = NULL;
399
Bram Moolenaar071d4272004-06-13 20:20:40 +0000400/*
401 * REGEXP_INRANGE contains all characters which are always special in a []
402 * range after '\'.
403 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
404 * These are:
405 * \n - New line (NL).
406 * \r - Carriage Return (CR).
407 * \t - Tab (TAB).
408 * \e - Escape (ESC).
409 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000410 * \d - Character code in decimal, eg \d123
411 * \o - Character code in octal, eg \o80
412 * \x - Character code in hex, eg \x4a
413 * \u - Multibyte character code, eg \u20ac
414 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000415 */
416static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000417static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000418
419static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000420static int get_char_class __ARGS((char_u **pp));
421static int get_equi_class __ARGS((char_u **pp));
422static void reg_equi_class __ARGS((int c));
423static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000424static char_u *skip_anyof __ARGS((char_u *p));
425static void init_class_tab __ARGS((void));
426
427/*
428 * Translate '\x' to its control character, except "\n", which is Magic.
429 */
430 static int
431backslash_trans(c)
432 int c;
433{
434 switch (c)
435 {
436 case 'r': return CAR;
437 case 't': return TAB;
438 case 'e': return ESC;
439 case 'b': return BS;
440 }
441 return c;
442}
443
444/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000445 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000446 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
447 * recognized. Otherwise "pp" is advanced to after the item.
448 */
449 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000450get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000451 char_u **pp;
452{
453 static const char *(class_names[]) =
454 {
455 "alnum:]",
456#define CLASS_ALNUM 0
457 "alpha:]",
458#define CLASS_ALPHA 1
459 "blank:]",
460#define CLASS_BLANK 2
461 "cntrl:]",
462#define CLASS_CNTRL 3
463 "digit:]",
464#define CLASS_DIGIT 4
465 "graph:]",
466#define CLASS_GRAPH 5
467 "lower:]",
468#define CLASS_LOWER 6
469 "print:]",
470#define CLASS_PRINT 7
471 "punct:]",
472#define CLASS_PUNCT 8
473 "space:]",
474#define CLASS_SPACE 9
475 "upper:]",
476#define CLASS_UPPER 10
477 "xdigit:]",
478#define CLASS_XDIGIT 11
479 "tab:]",
480#define CLASS_TAB 12
481 "return:]",
482#define CLASS_RETURN 13
483 "backspace:]",
484#define CLASS_BACKSPACE 14
485 "escape:]",
486#define CLASS_ESCAPE 15
487 };
488#define CLASS_NONE 99
489 int i;
490
491 if ((*pp)[1] == ':')
492 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000493 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000494 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
495 {
496 *pp += STRLEN(class_names[i]) + 2;
497 return i;
498 }
499 }
500 return CLASS_NONE;
501}
502
503/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000504 * Specific version of character class functions.
505 * Using a table to keep this fast.
506 */
507static short class_tab[256];
508
509#define RI_DIGIT 0x01
510#define RI_HEX 0x02
511#define RI_OCTAL 0x04
512#define RI_WORD 0x08
513#define RI_HEAD 0x10
514#define RI_ALPHA 0x20
515#define RI_LOWER 0x40
516#define RI_UPPER 0x80
517#define RI_WHITE 0x100
518
519 static void
520init_class_tab()
521{
522 int i;
523 static int done = FALSE;
524
525 if (done)
526 return;
527
528 for (i = 0; i < 256; ++i)
529 {
530 if (i >= '0' && i <= '7')
531 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
532 else if (i >= '8' && i <= '9')
533 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
534 else if (i >= 'a' && i <= 'f')
535 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
536#ifdef EBCDIC
537 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
538 || (i >= 's' && i <= 'z'))
539#else
540 else if (i >= 'g' && i <= 'z')
541#endif
542 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
543 else if (i >= 'A' && i <= 'F')
544 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
545#ifdef EBCDIC
546 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
547 || (i >= 'S' && i <= 'Z'))
548#else
549 else if (i >= 'G' && i <= 'Z')
550#endif
551 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
552 else if (i == '_')
553 class_tab[i] = RI_WORD + RI_HEAD;
554 else
555 class_tab[i] = 0;
556 }
557 class_tab[' '] |= RI_WHITE;
558 class_tab['\t'] |= RI_WHITE;
559 done = TRUE;
560}
561
562#ifdef FEAT_MBYTE
563# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
564# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
565# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
566# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
567# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
568# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
569# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
570# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
571# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
572#else
573# define ri_digit(c) (class_tab[c] & RI_DIGIT)
574# define ri_hex(c) (class_tab[c] & RI_HEX)
575# define ri_octal(c) (class_tab[c] & RI_OCTAL)
576# define ri_word(c) (class_tab[c] & RI_WORD)
577# define ri_head(c) (class_tab[c] & RI_HEAD)
578# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
579# define ri_lower(c) (class_tab[c] & RI_LOWER)
580# define ri_upper(c) (class_tab[c] & RI_UPPER)
581# define ri_white(c) (class_tab[c] & RI_WHITE)
582#endif
583
584/* flags for regflags */
585#define RF_ICASE 1 /* ignore case */
586#define RF_NOICASE 2 /* don't ignore case */
587#define RF_HASNL 4 /* can match a NL */
588#define RF_ICOMBINE 8 /* ignore combining characters */
589#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
590
591/*
592 * Global work variables for vim_regcomp().
593 */
594
595static char_u *regparse; /* Input-scan pointer. */
596static int prevchr_len; /* byte length of previous char */
597static int num_complex_braces; /* Complex \{...} count */
598static int regnpar; /* () count. */
599#ifdef FEAT_SYN_HL
600static int regnzpar; /* \z() count. */
601static int re_has_z; /* \z item detected */
602#endif
603static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
604static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000605static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000606static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
607static unsigned regflags; /* RF_ flags for prog */
608static long brace_min[10]; /* Minimums for complex brace repeats */
609static long brace_max[10]; /* Maximums for complex brace repeats */
610static int brace_count[10]; /* Current counts for complex brace repeats */
611#if defined(FEAT_SYN_HL) || defined(PROTO)
612static int had_eol; /* TRUE when EOL found by vim_regcomp() */
613#endif
614static int one_exactly = FALSE; /* only do one char for EXACTLY */
615
616static int reg_magic; /* magicness of the pattern: */
617#define MAGIC_NONE 1 /* "\V" very unmagic */
618#define MAGIC_OFF 2 /* "\M" or 'magic' off */
619#define MAGIC_ON 3 /* "\m" or 'magic' */
620#define MAGIC_ALL 4 /* "\v" very magic */
621
622static int reg_string; /* matching with a string instead of a buffer
623 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000624static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000625
626/*
627 * META contains all characters that may be magic, except '^' and '$'.
628 */
629
630#ifdef EBCDIC
631static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
632#else
633/* META[] is used often enough to justify turning it into a table. */
634static char_u META_flags[] = {
635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
637/* % & ( ) * + . */
638 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
639/* 1 2 3 4 5 6 7 8 9 < = > ? */
640 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
641/* @ A C D F H I K L M O */
642 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
643/* P S U V W X Z [ _ */
644 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
645/* a c d f h i k l m n o */
646 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
647/* p s u v w x z { | ~ */
648 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
649};
650#endif
651
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200652static int curchr; /* currently parsed character */
653/* Previous character. Note: prevchr is sometimes -1 when we are not at the
654 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
655 * because ^ was taken to be magic -- webb */
656static int prevchr;
657static int prevprevchr; /* previous-previous character */
658static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000659
660/* arguments for reg() */
661#define REG_NOPAREN 0 /* toplevel reg() */
662#define REG_PAREN 1 /* \(\) */
663#define REG_ZPAREN 2 /* \z(\) */
664#define REG_NPAREN 3 /* \%(\) */
665
666/*
667 * Forward declarations for vim_regcomp()'s friends.
668 */
669static void initchr __ARGS((char_u *));
670static int getchr __ARGS((void));
671static void skipchr_keepstart __ARGS((void));
672static int peekchr __ARGS((void));
673static void skipchr __ARGS((void));
674static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000675static int gethexchrs __ARGS((int maxinputlen));
676static int getoctchrs __ARGS((void));
677static int getdecchrs __ARGS((void));
678static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000679static void regcomp_start __ARGS((char_u *expr, int flags));
680static char_u *reg __ARGS((int, int *));
681static char_u *regbranch __ARGS((int *flagp));
682static char_u *regconcat __ARGS((int *flagp));
683static char_u *regpiece __ARGS((int *));
684static char_u *regatom __ARGS((int *));
685static char_u *regnode __ARGS((int));
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000686#ifdef FEAT_MBYTE
687static int use_multibytecode __ARGS((int c));
688#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000689static int prog_magic_wrong __ARGS((void));
690static char_u *regnext __ARGS((char_u *));
691static void regc __ARGS((int b));
692#ifdef FEAT_MBYTE
693static void regmbc __ARGS((int c));
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200694# define REGMBC(x) regmbc(x);
695# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000696#else
697# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200698# define REGMBC(x)
699# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000700#endif
701static void reginsert __ARGS((int, char_u *));
702static void reginsert_limits __ARGS((int, long, long, char_u *));
703static char_u *re_put_long __ARGS((char_u *pr, long_u val));
704static int read_limits __ARGS((long *, long *));
705static void regtail __ARGS((char_u *, char_u *));
706static void regoptail __ARGS((char_u *, char_u *));
707
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200708static regengine_T bt_regengine;
709static regengine_T nfa_regengine;
710
Bram Moolenaar071d4272004-06-13 20:20:40 +0000711/*
712 * Return TRUE if compiled regular expression "prog" can match a line break.
713 */
714 int
715re_multiline(prog)
716 regprog_T *prog;
717{
718 return (prog->regflags & RF_HASNL);
719}
720
721/*
722 * Return TRUE if compiled regular expression "prog" looks before the start
723 * position (pattern contains "\@<=" or "\@<!").
724 */
725 int
726re_lookbehind(prog)
727 regprog_T *prog;
728{
729 return (prog->regflags & RF_LOOKBH);
730}
731
732/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000733 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
734 * Returns a character representing the class. Zero means that no item was
735 * recognized. Otherwise "pp" is advanced to after the item.
736 */
737 static int
738get_equi_class(pp)
739 char_u **pp;
740{
741 int c;
742 int l = 1;
743 char_u *p = *pp;
744
745 if (p[1] == '=')
746 {
747#ifdef FEAT_MBYTE
748 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000749 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000750#endif
751 if (p[l + 2] == '=' && p[l + 3] == ']')
752 {
753#ifdef FEAT_MBYTE
754 if (has_mbyte)
755 c = mb_ptr2char(p + 2);
756 else
757#endif
758 c = p[2];
759 *pp += l + 4;
760 return c;
761 }
762 }
763 return 0;
764}
765
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200766#ifdef EBCDIC
767/*
768 * Table for equivalence class "c". (IBM-1047)
769 */
770char *EQUIVAL_CLASS_C[16] = {
771 "A\x62\x63\x64\x65\x66\x67",
772 "C\x68",
773 "E\x71\x72\x73\x74",
774 "I\x75\x76\x77\x78",
775 "N\x69",
776 "O\xEB\xEC\xED\xEE\xEF",
777 "U\xFB\xFC\xFD\xFE",
778 "Y\xBA",
779 "a\x42\x43\x44\x45\x46\x47",
780 "c\x48",
781 "e\x51\x52\x53\x54",
782 "i\x55\x56\x57\x58",
783 "n\x49",
784 "o\xCB\xCC\xCD\xCE\xCF",
785 "u\xDB\xDC\xDD\xDE",
786 "y\x8D\xDF",
787};
788#endif
789
Bram Moolenaardf177f62005-02-22 08:39:57 +0000790/*
791 * Produce the bytes for equivalence class "c".
792 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200793 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000794 */
795 static void
796reg_equi_class(c)
797 int c;
798{
799#ifdef FEAT_MBYTE
800 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000801 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000802#endif
803 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200804#ifdef EBCDIC
805 int i;
806
807 /* This might be slower than switch/case below. */
808 for (i = 0; i < 16; i++)
809 {
810 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
811 {
812 char *p = EQUIVAL_CLASS_C[i];
813
814 while (*p != 0)
815 regmbc(*p++);
816 return;
817 }
818 }
819#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000820 switch (c)
821 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000822 case 'A': case '\300': case '\301': case '\302':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200823 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
824 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000825 case '\303': case '\304': case '\305':
826 regmbc('A'); regmbc('\300'); regmbc('\301');
827 regmbc('\302'); regmbc('\303'); regmbc('\304');
828 regmbc('\305');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200829 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
830 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
831 REGMBC(0x1ea2)
832 return;
833 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
834 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000835 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000836 case 'C': case '\307':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200837 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000838 regmbc('C'); regmbc('\307');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200839 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
840 REGMBC(0x10c)
841 return;
842 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
843 CASEMBC(0x1e0e) CASEMBC(0x1e10)
844 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
845 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000846 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000847 case 'E': case '\310': case '\311': case '\312': case '\313':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200848 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
849 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000850 regmbc('E'); regmbc('\310'); regmbc('\311');
851 regmbc('\312'); regmbc('\313');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200852 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
853 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
854 REGMBC(0x1ebc)
855 return;
856 case 'F': CASEMBC(0x1e1e)
857 regmbc('F'); REGMBC(0x1e1e)
858 return;
859 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
860 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
861 CASEMBC(0x1e20)
862 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
863 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
864 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
865 return;
866 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
867 CASEMBC(0x1e26) CASEMBC(0x1e28)
868 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
869 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000870 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000871 case 'I': case '\314': case '\315': case '\316': case '\317':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200872 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
873 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000874 regmbc('I'); regmbc('\314'); regmbc('\315');
875 regmbc('\316'); regmbc('\317');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200876 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
877 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
878 REGMBC(0x1ec8)
879 return;
880 case 'J': CASEMBC(0x134)
881 regmbc('J'); REGMBC(0x134)
882 return;
883 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
884 CASEMBC(0x1e34)
885 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
886 REGMBC(0x1e30) REGMBC(0x1e34)
887 return;
888 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
889 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
890 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
891 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
892 REGMBC(0x1e3a)
893 return;
894 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
895 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000896 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000897 case 'N': case '\321':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200898 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
899 CASEMBC(0x1e48)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000900 regmbc('N'); regmbc('\321');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200901 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
902 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000903 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000904 case 'O': case '\322': case '\323': case '\324': case '\325':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200905 case '\326': case '\330':
906 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
907 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000908 regmbc('O'); regmbc('\322'); regmbc('\323');
909 regmbc('\324'); regmbc('\325'); regmbc('\326');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200910 regmbc('\330');
911 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
912 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
913 REGMBC(0x1ec) REGMBC(0x1ece)
914 return;
915 case 'P': case 0x1e54: case 0x1e56:
916 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
917 return;
918 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
919 CASEMBC(0x1e58) CASEMBC(0x1e5e)
920 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
921 REGMBC(0x1e58) REGMBC(0x1e5e)
922 return;
923 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
924 CASEMBC(0x160) CASEMBC(0x1e60)
925 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
926 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
927 return;
928 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
929 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
930 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
931 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000932 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000933 case 'U': case '\331': case '\332': case '\333': case '\334':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200934 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
935 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
936 CASEMBC(0x1ee6)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000937 regmbc('U'); regmbc('\331'); regmbc('\332');
938 regmbc('\333'); regmbc('\334');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200939 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
940 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
941 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
942 return;
943 case 'V': CASEMBC(0x1e7c)
944 regmbc('V'); REGMBC(0x1e7c)
945 return;
946 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
947 CASEMBC(0x1e84) CASEMBC(0x1e86)
948 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
949 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
950 return;
951 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
952 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000953 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000954 case 'Y': case '\335':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200955 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
956 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000957 regmbc('Y'); regmbc('\335');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200958 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
959 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
960 return;
961 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
962 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
963 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
964 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
965 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000966 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000967 case 'a': case '\340': case '\341': case '\342':
968 case '\343': case '\344': case '\345':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200969 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
970 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000971 regmbc('a'); regmbc('\340'); regmbc('\341');
972 regmbc('\342'); regmbc('\343'); regmbc('\344');
973 regmbc('\345');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200974 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
975 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
976 REGMBC(0x1ea3)
977 return;
978 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
979 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000980 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000981 case 'c': case '\347':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200982 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000983 regmbc('c'); regmbc('\347');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200984 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
985 REGMBC(0x10d)
986 return;
987 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1d0b)
988 CASEMBC(0x1e11)
989 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
990 REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000991 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000992 case 'e': case '\350': case '\351': case '\352': case '\353':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200993 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
994 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000995 regmbc('e'); regmbc('\350'); regmbc('\351');
996 regmbc('\352'); regmbc('\353');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200997 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
998 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
999 REGMBC(0x1ebd)
1000 return;
1001 case 'f': CASEMBC(0x1e1f)
1002 regmbc('f'); REGMBC(0x1e1f)
1003 return;
1004 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1005 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1006 CASEMBC(0x1e21)
1007 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1008 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1009 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1010 return;
1011 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1012 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1013 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1014 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1015 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001016 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001017 case 'i': case '\354': case '\355': case '\356': case '\357':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001018 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1019 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001020 regmbc('i'); regmbc('\354'); regmbc('\355');
1021 regmbc('\356'); regmbc('\357');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001022 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1023 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1024 return;
1025 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1026 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1027 return;
1028 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1029 CASEMBC(0x1e35)
1030 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1031 REGMBC(0x1e31) REGMBC(0x1e35)
1032 return;
1033 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1034 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1035 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1036 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1037 REGMBC(0x1e3b)
1038 return;
1039 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1040 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001041 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001042 case 'n': case '\361':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001043 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1044 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001045 regmbc('n'); regmbc('\361');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001046 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1047 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001048 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001049 case 'o': case '\362': case '\363': case '\364': case '\365':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001050 case '\366': case '\370':
1051 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1052 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001053 regmbc('o'); regmbc('\362'); regmbc('\363');
1054 regmbc('\364'); regmbc('\365'); regmbc('\366');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001055 regmbc('\370');
1056 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1057 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1058 REGMBC(0x1ed) REGMBC(0x1ecf)
1059 return;
1060 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1061 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1062 return;
1063 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1064 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1065 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1066 REGMBC(0x1e59) REGMBC(0x1e5f)
1067 return;
1068 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1069 CASEMBC(0x161) CASEMBC(0x1e61)
1070 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1071 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1072 return;
1073 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1074 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1075 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1076 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001077 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001078 case 'u': case '\371': case '\372': case '\373': case '\374':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001079 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1080 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1081 CASEMBC(0x1ee7)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001082 regmbc('u'); regmbc('\371'); regmbc('\372');
1083 regmbc('\373'); regmbc('\374');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001084 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1085 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1086 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1087 return;
1088 case 'v': CASEMBC(0x1e7d)
1089 regmbc('v'); REGMBC(0x1e7d)
1090 return;
1091 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1092 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1093 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1094 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1095 REGMBC(0x1e98)
1096 return;
1097 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1098 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001099 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001100 case 'y': case '\375': case '\377':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001101 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1102 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001103 regmbc('y'); regmbc('\375'); regmbc('\377');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001104 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1105 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1106 return;
1107 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1108 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1109 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1110 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1111 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001112 return;
1113 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001114#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001115 }
1116 regmbc(c);
1117}
1118
1119/*
1120 * Check for a collating element "[.a.]". "pp" points to the '['.
1121 * Returns a character. Zero means that no item was recognized. Otherwise
1122 * "pp" is advanced to after the item.
1123 * Currently only single characters are recognized!
1124 */
1125 static int
1126get_coll_element(pp)
1127 char_u **pp;
1128{
1129 int c;
1130 int l = 1;
1131 char_u *p = *pp;
1132
1133 if (p[1] == '.')
1134 {
1135#ifdef FEAT_MBYTE
1136 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001137 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001138#endif
1139 if (p[l + 2] == '.' && p[l + 3] == ']')
1140 {
1141#ifdef FEAT_MBYTE
1142 if (has_mbyte)
1143 c = mb_ptr2char(p + 2);
1144 else
1145#endif
1146 c = p[2];
1147 *pp += l + 4;
1148 return c;
1149 }
1150 }
1151 return 0;
1152}
1153
1154
1155/*
1156 * Skip over a "[]" range.
1157 * "p" must point to the character after the '['.
1158 * The returned pointer is on the matching ']', or the terminating NUL.
1159 */
1160 static char_u *
1161skip_anyof(p)
1162 char_u *p;
1163{
1164 int cpo_lit; /* 'cpoptions' contains 'l' flag */
1165 int cpo_bsl; /* 'cpoptions' contains '\' flag */
1166#ifdef FEAT_MBYTE
1167 int l;
1168#endif
1169
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00001170 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1171 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaardf177f62005-02-22 08:39:57 +00001172
1173 if (*p == '^') /* Complement of range. */
1174 ++p;
1175 if (*p == ']' || *p == '-')
1176 ++p;
1177 while (*p != NUL && *p != ']')
1178 {
1179#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001180 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001181 p += l;
1182 else
1183#endif
1184 if (*p == '-')
1185 {
1186 ++p;
1187 if (*p != ']' && *p != NUL)
1188 mb_ptr_adv(p);
1189 }
1190 else if (*p == '\\'
1191 && !cpo_bsl
1192 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
1193 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
1194 p += 2;
1195 else if (*p == '[')
1196 {
1197 if (get_char_class(&p) == CLASS_NONE
1198 && get_equi_class(&p) == 0
1199 && get_coll_element(&p) == 0)
1200 ++p; /* It was not a class name */
1201 }
1202 else
1203 ++p;
1204 }
1205
1206 return p;
1207}
1208
1209/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001210 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001211 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001212 * Take care of characters with a backslash in front of it.
1213 * Skip strings inside [ and ].
1214 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1215 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1216 * is changed in-place.
1217 */
1218 char_u *
1219skip_regexp(startp, dirc, magic, newp)
1220 char_u *startp;
1221 int dirc;
1222 int magic;
1223 char_u **newp;
1224{
1225 int mymagic;
1226 char_u *p = startp;
1227
1228 if (magic)
1229 mymagic = MAGIC_ON;
1230 else
1231 mymagic = MAGIC_OFF;
1232
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001233 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001234 {
1235 if (p[0] == dirc) /* found end of regexp */
1236 break;
1237 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1238 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1239 {
1240 p = skip_anyof(p + 1);
1241 if (p[0] == NUL)
1242 break;
1243 }
1244 else if (p[0] == '\\' && p[1] != NUL)
1245 {
1246 if (dirc == '?' && newp != NULL && p[1] == '?')
1247 {
1248 /* change "\?" to "?", make a copy first. */
1249 if (*newp == NULL)
1250 {
1251 *newp = vim_strsave(startp);
1252 if (*newp != NULL)
1253 p = *newp + (p - startp);
1254 }
1255 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001256 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001257 else
1258 ++p;
1259 }
1260 else
1261 ++p; /* skip next character */
1262 if (*p == 'v')
1263 mymagic = MAGIC_ALL;
1264 else if (*p == 'V')
1265 mymagic = MAGIC_NONE;
1266 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001267 }
1268 return p;
1269}
1270
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001271static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
1272
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001274 * bt_regcomp() - compile a regular expression into internal code for the
1275 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001276 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001277 *
1278 * We can't allocate space until we know how big the compiled form will be,
1279 * but we can't compile it (and thus know how big it is) until we've got a
1280 * place to put the code. So we cheat: we compile it twice, once with code
1281 * generation turned off and size counting turned on, and once "for real".
1282 * This also means that we don't allocate space until we are sure that the
1283 * thing really will compile successfully, and we never have to move the
1284 * code and thus invalidate pointers into it. (Note that it has to be in
1285 * one piece because vim_free() must be able to free it all.)
1286 *
1287 * Whether upper/lower case is to be ignored is decided when executing the
1288 * program, it does not matter here.
1289 *
1290 * Beware that the optimization-preparation code in here knows about some
1291 * of the structure of the compiled regexp.
1292 * "re_flags": RE_MAGIC and/or RE_STRING.
1293 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001294 static regprog_T *
1295bt_regcomp(expr, re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001296 char_u *expr;
1297 int re_flags;
1298{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001299 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001300 char_u *scan;
1301 char_u *longest;
1302 int len;
1303 int flags;
1304
1305 if (expr == NULL)
1306 EMSG_RET_NULL(_(e_null));
1307
1308 init_class_tab();
1309
1310 /*
1311 * First pass: determine size, legality.
1312 */
1313 regcomp_start(expr, re_flags);
1314 regcode = JUST_CALC_SIZE;
1315 regc(REGMAGIC);
1316 if (reg(REG_NOPAREN, &flags) == NULL)
1317 return NULL;
1318
1319 /* Small enough for pointer-storage convention? */
1320#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1321 if (regsize >= 65536L - 256L)
1322 EMSG_RET_NULL(_("E339: Pattern too long"));
1323#endif
1324
1325 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001326 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001327 if (r == NULL)
1328 return NULL;
1329
1330 /*
1331 * Second pass: emit code.
1332 */
1333 regcomp_start(expr, re_flags);
1334 regcode = r->program;
1335 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001336 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001337 {
1338 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001339 if (reg_toolong)
1340 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001341 return NULL;
1342 }
1343
1344 /* Dig out information for optimizations. */
1345 r->regstart = NUL; /* Worst-case defaults. */
1346 r->reganch = 0;
1347 r->regmust = NULL;
1348 r->regmlen = 0;
1349 r->regflags = regflags;
1350 if (flags & HASNL)
1351 r->regflags |= RF_HASNL;
1352 if (flags & HASLOOKBH)
1353 r->regflags |= RF_LOOKBH;
1354#ifdef FEAT_SYN_HL
1355 /* Remember whether this pattern has any \z specials in it. */
1356 r->reghasz = re_has_z;
1357#endif
1358 scan = r->program + 1; /* First BRANCH. */
1359 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1360 {
1361 scan = OPERAND(scan);
1362
1363 /* Starting-point info. */
1364 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1365 {
1366 r->reganch++;
1367 scan = regnext(scan);
1368 }
1369
1370 if (OP(scan) == EXACTLY)
1371 {
1372#ifdef FEAT_MBYTE
1373 if (has_mbyte)
1374 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1375 else
1376#endif
1377 r->regstart = *OPERAND(scan);
1378 }
1379 else if ((OP(scan) == BOW
1380 || OP(scan) == EOW
1381 || OP(scan) == NOTHING
1382 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1383 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1384 && OP(regnext(scan)) == EXACTLY)
1385 {
1386#ifdef FEAT_MBYTE
1387 if (has_mbyte)
1388 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1389 else
1390#endif
1391 r->regstart = *OPERAND(regnext(scan));
1392 }
1393
1394 /*
1395 * If there's something expensive in the r.e., find the longest
1396 * literal string that must appear and make it the regmust. Resolve
1397 * ties in favor of later strings, since the regstart check works
1398 * with the beginning of the r.e. and avoiding duplication
1399 * strengthens checking. Not a strong reason, but sufficient in the
1400 * absence of others.
1401 */
1402 /*
1403 * When the r.e. starts with BOW, it is faster to look for a regmust
1404 * first. Used a lot for "#" and "*" commands. (Added by mool).
1405 */
1406 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1407 && !(flags & HASNL))
1408 {
1409 longest = NULL;
1410 len = 0;
1411 for (; scan != NULL; scan = regnext(scan))
1412 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1413 {
1414 longest = OPERAND(scan);
1415 len = (int)STRLEN(OPERAND(scan));
1416 }
1417 r->regmust = longest;
1418 r->regmlen = len;
1419 }
1420 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001421#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001422 regdump(expr, r);
1423#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001424 r->engine = &bt_regengine;
1425 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001426}
1427
1428/*
1429 * Setup to parse the regexp. Used once to get the length and once to do it.
1430 */
1431 static void
1432regcomp_start(expr, re_flags)
1433 char_u *expr;
1434 int re_flags; /* see vim_regcomp() */
1435{
1436 initchr(expr);
1437 if (re_flags & RE_MAGIC)
1438 reg_magic = MAGIC_ON;
1439 else
1440 reg_magic = MAGIC_OFF;
1441 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001442 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001443
1444 num_complex_braces = 0;
1445 regnpar = 1;
1446 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1447#ifdef FEAT_SYN_HL
1448 regnzpar = 1;
1449 re_has_z = 0;
1450#endif
1451 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001452 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001453 regflags = 0;
1454#if defined(FEAT_SYN_HL) || defined(PROTO)
1455 had_eol = FALSE;
1456#endif
1457}
1458
1459#if defined(FEAT_SYN_HL) || defined(PROTO)
1460/*
1461 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1462 * found. This is messy, but it works fine.
1463 */
1464 int
1465vim_regcomp_had_eol()
1466{
1467 return had_eol;
1468}
1469#endif
1470
1471/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001472 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001473 *
1474 * Caller must absorb opening parenthesis.
1475 *
1476 * Combining parenthesis handling with the base level of regular expression
1477 * is a trifle forced, but the need to tie the tails of the branches to what
1478 * follows makes it hard to avoid.
1479 */
1480 static char_u *
1481reg(paren, flagp)
1482 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1483 int *flagp;
1484{
1485 char_u *ret;
1486 char_u *br;
1487 char_u *ender;
1488 int parno = 0;
1489 int flags;
1490
1491 *flagp = HASWIDTH; /* Tentatively. */
1492
1493#ifdef FEAT_SYN_HL
1494 if (paren == REG_ZPAREN)
1495 {
1496 /* Make a ZOPEN node. */
1497 if (regnzpar >= NSUBEXP)
1498 EMSG_RET_NULL(_("E50: Too many \\z("));
1499 parno = regnzpar;
1500 regnzpar++;
1501 ret = regnode(ZOPEN + parno);
1502 }
1503 else
1504#endif
1505 if (paren == REG_PAREN)
1506 {
1507 /* Make a MOPEN node. */
1508 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001509 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001510 parno = regnpar;
1511 ++regnpar;
1512 ret = regnode(MOPEN + parno);
1513 }
1514 else if (paren == REG_NPAREN)
1515 {
1516 /* Make a NOPEN node. */
1517 ret = regnode(NOPEN);
1518 }
1519 else
1520 ret = NULL;
1521
1522 /* Pick up the branches, linking them together. */
1523 br = regbranch(&flags);
1524 if (br == NULL)
1525 return NULL;
1526 if (ret != NULL)
1527 regtail(ret, br); /* [MZ]OPEN -> first. */
1528 else
1529 ret = br;
1530 /* If one of the branches can be zero-width, the whole thing can.
1531 * If one of the branches has * at start or matches a line-break, the
1532 * whole thing can. */
1533 if (!(flags & HASWIDTH))
1534 *flagp &= ~HASWIDTH;
1535 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1536 while (peekchr() == Magic('|'))
1537 {
1538 skipchr();
1539 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001540 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001541 return NULL;
1542 regtail(ret, br); /* BRANCH -> BRANCH. */
1543 if (!(flags & HASWIDTH))
1544 *flagp &= ~HASWIDTH;
1545 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1546 }
1547
1548 /* Make a closing node, and hook it on the end. */
1549 ender = regnode(
1550#ifdef FEAT_SYN_HL
1551 paren == REG_ZPAREN ? ZCLOSE + parno :
1552#endif
1553 paren == REG_PAREN ? MCLOSE + parno :
1554 paren == REG_NPAREN ? NCLOSE : END);
1555 regtail(ret, ender);
1556
1557 /* Hook the tails of the branches to the closing node. */
1558 for (br = ret; br != NULL; br = regnext(br))
1559 regoptail(br, ender);
1560
1561 /* Check for proper termination. */
1562 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1563 {
1564#ifdef FEAT_SYN_HL
1565 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001566 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001567 else
1568#endif
1569 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001570 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001571 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001572 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001573 }
1574 else if (paren == REG_NOPAREN && peekchr() != NUL)
1575 {
1576 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001577 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001579 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001580 /* NOTREACHED */
1581 }
1582 /*
1583 * Here we set the flag allowing back references to this set of
1584 * parentheses.
1585 */
1586 if (paren == REG_PAREN)
1587 had_endbrace[parno] = TRUE; /* have seen the close paren */
1588 return ret;
1589}
1590
1591/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001592 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001593 * Implements the & operator.
1594 */
1595 static char_u *
1596regbranch(flagp)
1597 int *flagp;
1598{
1599 char_u *ret;
1600 char_u *chain = NULL;
1601 char_u *latest;
1602 int flags;
1603
1604 *flagp = WORST | HASNL; /* Tentatively. */
1605
1606 ret = regnode(BRANCH);
1607 for (;;)
1608 {
1609 latest = regconcat(&flags);
1610 if (latest == NULL)
1611 return NULL;
1612 /* If one of the branches has width, the whole thing has. If one of
1613 * the branches anchors at start-of-line, the whole thing does.
1614 * If one of the branches uses look-behind, the whole thing does. */
1615 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1616 /* If one of the branches doesn't match a line-break, the whole thing
1617 * doesn't. */
1618 *flagp &= ~HASNL | (flags & HASNL);
1619 if (chain != NULL)
1620 regtail(chain, latest);
1621 if (peekchr() != Magic('&'))
1622 break;
1623 skipchr();
1624 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001625 if (reg_toolong)
1626 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001627 reginsert(MATCH, latest);
1628 chain = latest;
1629 }
1630
1631 return ret;
1632}
1633
1634/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001635 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001636 * Implements the concatenation operator.
1637 */
1638 static char_u *
1639regconcat(flagp)
1640 int *flagp;
1641{
1642 char_u *first = NULL;
1643 char_u *chain = NULL;
1644 char_u *latest;
1645 int flags;
1646 int cont = TRUE;
1647
1648 *flagp = WORST; /* Tentatively. */
1649
1650 while (cont)
1651 {
1652 switch (peekchr())
1653 {
1654 case NUL:
1655 case Magic('|'):
1656 case Magic('&'):
1657 case Magic(')'):
1658 cont = FALSE;
1659 break;
1660 case Magic('Z'):
1661#ifdef FEAT_MBYTE
1662 regflags |= RF_ICOMBINE;
1663#endif
1664 skipchr_keepstart();
1665 break;
1666 case Magic('c'):
1667 regflags |= RF_ICASE;
1668 skipchr_keepstart();
1669 break;
1670 case Magic('C'):
1671 regflags |= RF_NOICASE;
1672 skipchr_keepstart();
1673 break;
1674 case Magic('v'):
1675 reg_magic = MAGIC_ALL;
1676 skipchr_keepstart();
1677 curchr = -1;
1678 break;
1679 case Magic('m'):
1680 reg_magic = MAGIC_ON;
1681 skipchr_keepstart();
1682 curchr = -1;
1683 break;
1684 case Magic('M'):
1685 reg_magic = MAGIC_OFF;
1686 skipchr_keepstart();
1687 curchr = -1;
1688 break;
1689 case Magic('V'):
1690 reg_magic = MAGIC_NONE;
1691 skipchr_keepstart();
1692 curchr = -1;
1693 break;
1694 default:
1695 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001696 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001697 return NULL;
1698 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1699 if (chain == NULL) /* First piece. */
1700 *flagp |= flags & SPSTART;
1701 else
1702 regtail(chain, latest);
1703 chain = latest;
1704 if (first == NULL)
1705 first = latest;
1706 break;
1707 }
1708 }
1709 if (first == NULL) /* Loop ran zero times. */
1710 first = regnode(NOTHING);
1711 return first;
1712}
1713
1714/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001715 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001716 *
1717 * Note that the branching code sequences used for = and the general cases
1718 * of * and + are somewhat optimized: they use the same NOTHING node as
1719 * both the endmarker for their branch list and the body of the last branch.
1720 * It might seem that this node could be dispensed with entirely, but the
1721 * endmarker role is not redundant.
1722 */
1723 static char_u *
1724regpiece(flagp)
1725 int *flagp;
1726{
1727 char_u *ret;
1728 int op;
1729 char_u *next;
1730 int flags;
1731 long minval;
1732 long maxval;
1733
1734 ret = regatom(&flags);
1735 if (ret == NULL)
1736 return NULL;
1737
1738 op = peekchr();
1739 if (re_multi_type(op) == NOT_MULTI)
1740 {
1741 *flagp = flags;
1742 return ret;
1743 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001744 /* default flags */
1745 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1746
1747 skipchr();
1748 switch (op)
1749 {
1750 case Magic('*'):
1751 if (flags & SIMPLE)
1752 reginsert(STAR, ret);
1753 else
1754 {
1755 /* Emit x* as (x&|), where & means "self". */
1756 reginsert(BRANCH, ret); /* Either x */
1757 regoptail(ret, regnode(BACK)); /* and loop */
1758 regoptail(ret, ret); /* back */
1759 regtail(ret, regnode(BRANCH)); /* or */
1760 regtail(ret, regnode(NOTHING)); /* null. */
1761 }
1762 break;
1763
1764 case Magic('+'):
1765 if (flags & SIMPLE)
1766 reginsert(PLUS, ret);
1767 else
1768 {
1769 /* Emit x+ as x(&|), where & means "self". */
1770 next = regnode(BRANCH); /* Either */
1771 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001772 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001773 regtail(next, regnode(BRANCH)); /* or */
1774 regtail(ret, regnode(NOTHING)); /* null. */
1775 }
1776 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1777 break;
1778
1779 case Magic('@'):
1780 {
1781 int lop = END;
1782
1783 switch (no_Magic(getchr()))
1784 {
1785 case '=': lop = MATCH; break; /* \@= */
1786 case '!': lop = NOMATCH; break; /* \@! */
1787 case '>': lop = SUBPAT; break; /* \@> */
1788 case '<': switch (no_Magic(getchr()))
1789 {
1790 case '=': lop = BEHIND; break; /* \@<= */
1791 case '!': lop = NOBEHIND; break; /* \@<! */
1792 }
1793 }
1794 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001795 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001796 reg_magic == MAGIC_ALL);
1797 /* Look behind must match with behind_pos. */
1798 if (lop == BEHIND || lop == NOBEHIND)
1799 {
1800 regtail(ret, regnode(BHPOS));
1801 *flagp |= HASLOOKBH;
1802 }
1803 regtail(ret, regnode(END)); /* operand ends */
1804 reginsert(lop, ret);
1805 break;
1806 }
1807
1808 case Magic('?'):
1809 case Magic('='):
1810 /* Emit x= as (x|) */
1811 reginsert(BRANCH, ret); /* Either x */
1812 regtail(ret, regnode(BRANCH)); /* or */
1813 next = regnode(NOTHING); /* null. */
1814 regtail(ret, next);
1815 regoptail(ret, next);
1816 break;
1817
1818 case Magic('{'):
1819 if (!read_limits(&minval, &maxval))
1820 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001821 if (flags & SIMPLE)
1822 {
1823 reginsert(BRACE_SIMPLE, ret);
1824 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1825 }
1826 else
1827 {
1828 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001829 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001830 reg_magic == MAGIC_ALL);
1831 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1832 regoptail(ret, regnode(BACK));
1833 regoptail(ret, ret);
1834 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1835 ++num_complex_braces;
1836 }
1837 if (minval > 0 && maxval > 0)
1838 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1839 break;
1840 }
1841 if (re_multi_type(peekchr()) != NOT_MULTI)
1842 {
1843 /* Can't have a multi follow a multi. */
1844 if (peekchr() == Magic('*'))
1845 sprintf((char *)IObuff, _("E61: Nested %s*"),
1846 reg_magic >= MAGIC_ON ? "" : "\\");
1847 else
1848 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1849 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1850 EMSG_RET_NULL(IObuff);
1851 }
1852
1853 return ret;
1854}
1855
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001856/* When making changes to classchars also change nfa_classcodes. */
1857static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1858static int classcodes[] = {
1859 ANY, IDENT, SIDENT, KWORD, SKWORD,
1860 FNAME, SFNAME, PRINT, SPRINT,
1861 WHITE, NWHITE, DIGIT, NDIGIT,
1862 HEX, NHEX, OCTAL, NOCTAL,
1863 WORD, NWORD, HEAD, NHEAD,
1864 ALPHA, NALPHA, LOWER, NLOWER,
1865 UPPER, NUPPER
1866};
1867
Bram Moolenaar071d4272004-06-13 20:20:40 +00001868/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001869 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001870 *
1871 * Optimization: gobbles an entire sequence of ordinary characters so that
1872 * it can turn them into a single node, which is smaller to store and
1873 * faster to run. Don't do this when one_exactly is set.
1874 */
1875 static char_u *
1876regatom(flagp)
1877 int *flagp;
1878{
1879 char_u *ret;
1880 int flags;
1881 int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001882 int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001883 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001884 char_u *p;
1885 int extra = 0;
1886
1887 *flagp = WORST; /* Tentatively. */
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00001888 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1889 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001890
1891 c = getchr();
1892 switch (c)
1893 {
1894 case Magic('^'):
1895 ret = regnode(BOL);
1896 break;
1897
1898 case Magic('$'):
1899 ret = regnode(EOL);
1900#if defined(FEAT_SYN_HL) || defined(PROTO)
1901 had_eol = TRUE;
1902#endif
1903 break;
1904
1905 case Magic('<'):
1906 ret = regnode(BOW);
1907 break;
1908
1909 case Magic('>'):
1910 ret = regnode(EOW);
1911 break;
1912
1913 case Magic('_'):
1914 c = no_Magic(getchr());
1915 if (c == '^') /* "\_^" is start-of-line */
1916 {
1917 ret = regnode(BOL);
1918 break;
1919 }
1920 if (c == '$') /* "\_$" is end-of-line */
1921 {
1922 ret = regnode(EOL);
1923#if defined(FEAT_SYN_HL) || defined(PROTO)
1924 had_eol = TRUE;
1925#endif
1926 break;
1927 }
1928
1929 extra = ADD_NL;
1930 *flagp |= HASNL;
1931
1932 /* "\_[" is character range plus newline */
1933 if (c == '[')
1934 goto collection;
1935
1936 /* "\_x" is character class plus newline */
1937 /*FALLTHROUGH*/
1938
1939 /*
1940 * Character classes.
1941 */
1942 case Magic('.'):
1943 case Magic('i'):
1944 case Magic('I'):
1945 case Magic('k'):
1946 case Magic('K'):
1947 case Magic('f'):
1948 case Magic('F'):
1949 case Magic('p'):
1950 case Magic('P'):
1951 case Magic('s'):
1952 case Magic('S'):
1953 case Magic('d'):
1954 case Magic('D'):
1955 case Magic('x'):
1956 case Magic('X'):
1957 case Magic('o'):
1958 case Magic('O'):
1959 case Magic('w'):
1960 case Magic('W'):
1961 case Magic('h'):
1962 case Magic('H'):
1963 case Magic('a'):
1964 case Magic('A'):
1965 case Magic('l'):
1966 case Magic('L'):
1967 case Magic('u'):
1968 case Magic('U'):
1969 p = vim_strchr(classchars, no_Magic(c));
1970 if (p == NULL)
1971 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00001972#ifdef FEAT_MBYTE
1973 /* When '.' is followed by a composing char ignore the dot, so that
1974 * the composing char is matched here. */
1975 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1976 {
1977 c = getchr();
1978 goto do_multibyte;
1979 }
1980#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001981 ret = regnode(classcodes[p - classchars] + extra);
1982 *flagp |= HASWIDTH | SIMPLE;
1983 break;
1984
1985 case Magic('n'):
1986 if (reg_string)
1987 {
1988 /* In a string "\n" matches a newline character. */
1989 ret = regnode(EXACTLY);
1990 regc(NL);
1991 regc(NUL);
1992 *flagp |= HASWIDTH | SIMPLE;
1993 }
1994 else
1995 {
1996 /* In buffer text "\n" matches the end of a line. */
1997 ret = regnode(NEWL);
1998 *flagp |= HASWIDTH | HASNL;
1999 }
2000 break;
2001
2002 case Magic('('):
2003 if (one_exactly)
2004 EMSG_ONE_RET_NULL;
2005 ret = reg(REG_PAREN, &flags);
2006 if (ret == NULL)
2007 return NULL;
2008 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2009 break;
2010
2011 case NUL:
2012 case Magic('|'):
2013 case Magic('&'):
2014 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002015 if (one_exactly)
2016 EMSG_ONE_RET_NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002017 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
2018 /* NOTREACHED */
2019
2020 case Magic('='):
2021 case Magic('?'):
2022 case Magic('+'):
2023 case Magic('@'):
2024 case Magic('{'):
2025 case Magic('*'):
2026 c = no_Magic(c);
2027 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2028 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2029 ? "" : "\\", c);
2030 EMSG_RET_NULL(IObuff);
2031 /* NOTREACHED */
2032
2033 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002034 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002035 {
2036 char_u *lp;
2037
2038 ret = regnode(EXACTLY);
2039 lp = reg_prev_sub;
2040 while (*lp != NUL)
2041 regc(*lp++);
2042 regc(NUL);
2043 if (*reg_prev_sub != NUL)
2044 {
2045 *flagp |= HASWIDTH;
2046 if ((lp - reg_prev_sub) == 1)
2047 *flagp |= SIMPLE;
2048 }
2049 }
2050 else
2051 EMSG_RET_NULL(_(e_nopresub));
2052 break;
2053
2054 case Magic('1'):
2055 case Magic('2'):
2056 case Magic('3'):
2057 case Magic('4'):
2058 case Magic('5'):
2059 case Magic('6'):
2060 case Magic('7'):
2061 case Magic('8'):
2062 case Magic('9'):
2063 {
2064 int refnum;
2065
2066 refnum = c - Magic('0');
2067 /*
2068 * Check if the back reference is legal. We must have seen the
2069 * close brace.
2070 * TODO: Should also check that we don't refer to something
2071 * that is repeated (+*=): what instance of the repetition
2072 * should we match?
2073 */
2074 if (!had_endbrace[refnum])
2075 {
2076 /* Trick: check if "@<=" or "@<!" follows, in which case
2077 * the \1 can appear before the referenced match. */
2078 for (p = regparse; *p != NUL; ++p)
2079 if (p[0] == '@' && p[1] == '<'
2080 && (p[2] == '!' || p[2] == '='))
2081 break;
2082 if (*p == NUL)
2083 EMSG_RET_NULL(_("E65: Illegal back reference"));
2084 }
2085 ret = regnode(BACKREF + refnum);
2086 }
2087 break;
2088
Bram Moolenaar071d4272004-06-13 20:20:40 +00002089 case Magic('z'):
2090 {
2091 c = no_Magic(getchr());
2092 switch (c)
2093 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002094#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002095 case '(': if (reg_do_extmatch != REX_SET)
2096 EMSG_RET_NULL(_("E66: \\z( not allowed here"));
2097 if (one_exactly)
2098 EMSG_ONE_RET_NULL;
2099 ret = reg(REG_ZPAREN, &flags);
2100 if (ret == NULL)
2101 return NULL;
2102 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2103 re_has_z = REX_SET;
2104 break;
2105
2106 case '1':
2107 case '2':
2108 case '3':
2109 case '4':
2110 case '5':
2111 case '6':
2112 case '7':
2113 case '8':
2114 case '9': if (reg_do_extmatch != REX_USE)
2115 EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
2116 ret = regnode(ZREF + c - '0');
2117 re_has_z = REX_USE;
2118 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002119#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002120
2121 case 's': ret = regnode(MOPEN + 0);
2122 break;
2123
2124 case 'e': ret = regnode(MCLOSE + 0);
2125 break;
2126
2127 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2128 }
2129 }
2130 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002131
2132 case Magic('%'):
2133 {
2134 c = no_Magic(getchr());
2135 switch (c)
2136 {
2137 /* () without a back reference */
2138 case '(':
2139 if (one_exactly)
2140 EMSG_ONE_RET_NULL;
2141 ret = reg(REG_NPAREN, &flags);
2142 if (ret == NULL)
2143 return NULL;
2144 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2145 break;
2146
2147 /* Catch \%^ and \%$ regardless of where they appear in the
2148 * pattern -- regardless of whether or not it makes sense. */
2149 case '^':
2150 ret = regnode(RE_BOF);
2151 break;
2152
2153 case '$':
2154 ret = regnode(RE_EOF);
2155 break;
2156
2157 case '#':
2158 ret = regnode(CURSOR);
2159 break;
2160
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002161 case 'V':
2162 ret = regnode(RE_VISUAL);
2163 break;
2164
Bram Moolenaar071d4272004-06-13 20:20:40 +00002165 /* \%[abc]: Emit as a list of branches, all ending at the last
2166 * branch which matches nothing. */
2167 case '[':
2168 if (one_exactly) /* doesn't nest */
2169 EMSG_ONE_RET_NULL;
2170 {
2171 char_u *lastbranch;
2172 char_u *lastnode = NULL;
2173 char_u *br;
2174
2175 ret = NULL;
2176 while ((c = getchr()) != ']')
2177 {
2178 if (c == NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002179 EMSG2_RET_NULL(_("E69: Missing ] after %s%%["),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002180 reg_magic == MAGIC_ALL);
2181 br = regnode(BRANCH);
2182 if (ret == NULL)
2183 ret = br;
2184 else
2185 regtail(lastnode, br);
2186
2187 ungetchr();
2188 one_exactly = TRUE;
2189 lastnode = regatom(flagp);
2190 one_exactly = FALSE;
2191 if (lastnode == NULL)
2192 return NULL;
2193 }
2194 if (ret == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002195 EMSG2_RET_NULL(_("E70: Empty %s%%[]"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002196 reg_magic == MAGIC_ALL);
2197 lastbranch = regnode(BRANCH);
2198 br = regnode(NOTHING);
2199 if (ret != JUST_CALC_SIZE)
2200 {
2201 regtail(lastnode, br);
2202 regtail(lastbranch, br);
2203 /* connect all branches to the NOTHING
2204 * branch at the end */
2205 for (br = ret; br != lastnode; )
2206 {
2207 if (OP(br) == BRANCH)
2208 {
2209 regtail(br, lastbranch);
2210 br = OPERAND(br);
2211 }
2212 else
2213 br = regnext(br);
2214 }
2215 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002216 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002217 break;
2218 }
2219
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002220 case 'd': /* %d123 decimal */
2221 case 'o': /* %o123 octal */
2222 case 'x': /* %xab hex 2 */
2223 case 'u': /* %uabcd hex 4 */
2224 case 'U': /* %U1234abcd hex 8 */
2225 {
2226 int i;
2227
2228 switch (c)
2229 {
2230 case 'd': i = getdecchrs(); break;
2231 case 'o': i = getoctchrs(); break;
2232 case 'x': i = gethexchrs(2); break;
2233 case 'u': i = gethexchrs(4); break;
2234 case 'U': i = gethexchrs(8); break;
2235 default: i = -1; break;
2236 }
2237
2238 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002239 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002240 _("E678: Invalid character after %s%%[dxouU]"),
2241 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002242#ifdef FEAT_MBYTE
2243 if (use_multibytecode(i))
2244 ret = regnode(MULTIBYTECODE);
2245 else
2246#endif
2247 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002248 if (i == 0)
2249 regc(0x0a);
2250 else
2251#ifdef FEAT_MBYTE
2252 regmbc(i);
2253#else
2254 regc(i);
2255#endif
2256 regc(NUL);
2257 *flagp |= HASWIDTH;
2258 break;
2259 }
2260
Bram Moolenaar071d4272004-06-13 20:20:40 +00002261 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002262 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2263 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002264 {
2265 long_u n = 0;
2266 int cmp;
2267
2268 cmp = c;
2269 if (cmp == '<' || cmp == '>')
2270 c = getchr();
2271 while (VIM_ISDIGIT(c))
2272 {
2273 n = n * 10 + (c - '0');
2274 c = getchr();
2275 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002276 if (c == '\'' && n == 0)
2277 {
2278 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2279 c = getchr();
2280 ret = regnode(RE_MARK);
2281 if (ret == JUST_CALC_SIZE)
2282 regsize += 2;
2283 else
2284 {
2285 *regcode++ = c;
2286 *regcode++ = cmp;
2287 }
2288 break;
2289 }
2290 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002291 {
2292 if (c == 'l')
2293 ret = regnode(RE_LNUM);
2294 else if (c == 'c')
2295 ret = regnode(RE_COL);
2296 else
2297 ret = regnode(RE_VCOL);
2298 if (ret == JUST_CALC_SIZE)
2299 regsize += 5;
2300 else
2301 {
2302 /* put the number and the optional
2303 * comparator after the opcode */
2304 regcode = re_put_long(regcode, n);
2305 *regcode++ = cmp;
2306 }
2307 break;
2308 }
2309 }
2310
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002311 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002312 reg_magic == MAGIC_ALL);
2313 }
2314 }
2315 break;
2316
2317 case Magic('['):
2318collection:
2319 {
2320 char_u *lp;
2321
2322 /*
2323 * If there is no matching ']', we assume the '[' is a normal
2324 * character. This makes 'incsearch' and ":help [" work.
2325 */
2326 lp = skip_anyof(regparse);
2327 if (*lp == ']') /* there is a matching ']' */
2328 {
2329 int startc = -1; /* > 0 when next '-' is a range */
2330 int endc;
2331
2332 /*
2333 * In a character class, different parsing rules apply.
2334 * Not even \ is special anymore, nothing is.
2335 */
2336 if (*regparse == '^') /* Complement of range. */
2337 {
2338 ret = regnode(ANYBUT + extra);
2339 regparse++;
2340 }
2341 else
2342 ret = regnode(ANYOF + extra);
2343
2344 /* At the start ']' and '-' mean the literal character. */
2345 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002346 {
2347 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002348 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002349 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002350
2351 while (*regparse != NUL && *regparse != ']')
2352 {
2353 if (*regparse == '-')
2354 {
2355 ++regparse;
2356 /* The '-' is not used for a range at the end and
2357 * after or before a '\n'. */
2358 if (*regparse == ']' || *regparse == NUL
2359 || startc == -1
2360 || (regparse[0] == '\\' && regparse[1] == 'n'))
2361 {
2362 regc('-');
2363 startc = '-'; /* [--x] is a range */
2364 }
2365 else
2366 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002367 /* Also accept "a-[.z.]" */
2368 endc = 0;
2369 if (*regparse == '[')
2370 endc = get_coll_element(&regparse);
2371 if (endc == 0)
2372 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002373#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002374 if (has_mbyte)
2375 endc = mb_ptr2char_adv(&regparse);
2376 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002377#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002378 endc = *regparse++;
2379 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002380
2381 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002382 if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002383 endc = coll_get_char();
2384
Bram Moolenaar071d4272004-06-13 20:20:40 +00002385 if (startc > endc)
2386 EMSG_RET_NULL(_(e_invrange));
2387#ifdef FEAT_MBYTE
2388 if (has_mbyte && ((*mb_char2len)(startc) > 1
2389 || (*mb_char2len)(endc) > 1))
2390 {
2391 /* Limit to a range of 256 chars */
2392 if (endc > startc + 256)
2393 EMSG_RET_NULL(_(e_invrange));
2394 while (++startc <= endc)
2395 regmbc(startc);
2396 }
2397 else
2398#endif
2399 {
2400#ifdef EBCDIC
2401 int alpha_only = FALSE;
2402
2403 /* for alphabetical range skip the gaps
2404 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2405 if (isalpha(startc) && isalpha(endc))
2406 alpha_only = TRUE;
2407#endif
2408 while (++startc <= endc)
2409#ifdef EBCDIC
2410 if (!alpha_only || isalpha(startc))
2411#endif
2412 regc(startc);
2413 }
2414 startc = -1;
2415 }
2416 }
2417 /*
2418 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2419 * accepts "\t", "\e", etc., but only when the 'l' flag in
2420 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002421 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002422 */
2423 else if (*regparse == '\\'
Bram Moolenaardf177f62005-02-22 08:39:57 +00002424 && !cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002425 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2426 || (!cpo_lit
2427 && vim_strchr(REGEXP_ABBR,
2428 regparse[1]) != NULL)))
2429 {
2430 regparse++;
2431 if (*regparse == 'n')
2432 {
2433 /* '\n' in range: also match NL */
2434 if (ret != JUST_CALC_SIZE)
2435 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002436 /* Using \n inside [^] does not change what
2437 * matches. "[^\n]" is the same as ".". */
2438 if (*ret == ANYOF)
2439 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002440 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002441 *flagp |= HASNL;
2442 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002443 /* else: must have had a \n already */
2444 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002445 regparse++;
2446 startc = -1;
2447 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002448 else if (*regparse == 'd'
2449 || *regparse == 'o'
2450 || *regparse == 'x'
2451 || *regparse == 'u'
2452 || *regparse == 'U')
2453 {
2454 startc = coll_get_char();
2455 if (startc == 0)
2456 regc(0x0a);
2457 else
2458#ifdef FEAT_MBYTE
2459 regmbc(startc);
2460#else
2461 regc(startc);
2462#endif
2463 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002464 else
2465 {
2466 startc = backslash_trans(*regparse++);
2467 regc(startc);
2468 }
2469 }
2470 else if (*regparse == '[')
2471 {
2472 int c_class;
2473 int cu;
2474
Bram Moolenaardf177f62005-02-22 08:39:57 +00002475 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002476 startc = -1;
2477 /* Characters assumed to be 8 bits! */
2478 switch (c_class)
2479 {
2480 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002481 c_class = get_equi_class(&regparse);
2482 if (c_class != 0)
2483 {
2484 /* produce equivalence class */
2485 reg_equi_class(c_class);
2486 }
2487 else if ((c_class =
2488 get_coll_element(&regparse)) != 0)
2489 {
2490 /* produce a collating element */
2491 regmbc(c_class);
2492 }
2493 else
2494 {
2495 /* literal '[', allow [[-x] as a range */
2496 startc = *regparse++;
2497 regc(startc);
2498 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002499 break;
2500 case CLASS_ALNUM:
2501 for (cu = 1; cu <= 255; cu++)
2502 if (isalnum(cu))
2503 regc(cu);
2504 break;
2505 case CLASS_ALPHA:
2506 for (cu = 1; cu <= 255; cu++)
2507 if (isalpha(cu))
2508 regc(cu);
2509 break;
2510 case CLASS_BLANK:
2511 regc(' ');
2512 regc('\t');
2513 break;
2514 case CLASS_CNTRL:
2515 for (cu = 1; cu <= 255; cu++)
2516 if (iscntrl(cu))
2517 regc(cu);
2518 break;
2519 case CLASS_DIGIT:
2520 for (cu = 1; cu <= 255; cu++)
2521 if (VIM_ISDIGIT(cu))
2522 regc(cu);
2523 break;
2524 case CLASS_GRAPH:
2525 for (cu = 1; cu <= 255; cu++)
2526 if (isgraph(cu))
2527 regc(cu);
2528 break;
2529 case CLASS_LOWER:
2530 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002531 if (MB_ISLOWER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002532 regc(cu);
2533 break;
2534 case CLASS_PRINT:
2535 for (cu = 1; cu <= 255; cu++)
2536 if (vim_isprintc(cu))
2537 regc(cu);
2538 break;
2539 case CLASS_PUNCT:
2540 for (cu = 1; cu <= 255; cu++)
2541 if (ispunct(cu))
2542 regc(cu);
2543 break;
2544 case CLASS_SPACE:
2545 for (cu = 9; cu <= 13; cu++)
2546 regc(cu);
2547 regc(' ');
2548 break;
2549 case CLASS_UPPER:
2550 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002551 if (MB_ISUPPER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002552 regc(cu);
2553 break;
2554 case CLASS_XDIGIT:
2555 for (cu = 1; cu <= 255; cu++)
2556 if (vim_isxdigit(cu))
2557 regc(cu);
2558 break;
2559 case CLASS_TAB:
2560 regc('\t');
2561 break;
2562 case CLASS_RETURN:
2563 regc('\r');
2564 break;
2565 case CLASS_BACKSPACE:
2566 regc('\b');
2567 break;
2568 case CLASS_ESCAPE:
2569 regc('\033');
2570 break;
2571 }
2572 }
2573 else
2574 {
2575#ifdef FEAT_MBYTE
2576 if (has_mbyte)
2577 {
2578 int len;
2579
2580 /* produce a multibyte character, including any
2581 * following composing characters */
2582 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002583 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002584 if (enc_utf8 && utf_char2len(startc) != len)
2585 startc = -1; /* composing chars */
2586 while (--len >= 0)
2587 regc(*regparse++);
2588 }
2589 else
2590#endif
2591 {
2592 startc = *regparse++;
2593 regc(startc);
2594 }
2595 }
2596 }
2597 regc(NUL);
2598 prevchr_len = 1; /* last char was the ']' */
2599 if (*regparse != ']')
2600 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2601 skipchr(); /* let's be friends with the lexer again */
2602 *flagp |= HASWIDTH | SIMPLE;
2603 break;
2604 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002605 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002606 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002607 }
2608 /* FALLTHROUGH */
2609
2610 default:
2611 {
2612 int len;
2613
2614#ifdef FEAT_MBYTE
2615 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002616 * before a multi and when it's a composing char. */
2617 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002618 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002619do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002620 ret = regnode(MULTIBYTECODE);
2621 regmbc(c);
2622 *flagp |= HASWIDTH | SIMPLE;
2623 break;
2624 }
2625#endif
2626
2627 ret = regnode(EXACTLY);
2628
2629 /*
2630 * Append characters as long as:
2631 * - there is no following multi, we then need the character in
2632 * front of it as a single character operand
2633 * - not running into a Magic character
2634 * - "one_exactly" is not set
2635 * But always emit at least one character. Might be a Multi,
2636 * e.g., a "[" without matching "]".
2637 */
2638 for (len = 0; c != NUL && (len == 0
2639 || (re_multi_type(peekchr()) == NOT_MULTI
2640 && !one_exactly
2641 && !is_Magic(c))); ++len)
2642 {
2643 c = no_Magic(c);
2644#ifdef FEAT_MBYTE
2645 if (has_mbyte)
2646 {
2647 regmbc(c);
2648 if (enc_utf8)
2649 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002650 int l;
2651
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002652 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002653 for (;;)
2654 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002655 l = utf_ptr2len(regparse);
2656 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002657 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002658 regmbc(utf_ptr2char(regparse));
2659 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002660 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002661 }
2662 }
2663 else
2664#endif
2665 regc(c);
2666 c = getchr();
2667 }
2668 ungetchr();
2669
2670 regc(NUL);
2671 *flagp |= HASWIDTH;
2672 if (len == 1)
2673 *flagp |= SIMPLE;
2674 }
2675 break;
2676 }
2677
2678 return ret;
2679}
2680
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002681#ifdef FEAT_MBYTE
2682/*
2683 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2684 * character "c".
2685 */
2686 static int
2687use_multibytecode(c)
2688 int c;
2689{
2690 return has_mbyte && (*mb_char2len)(c) > 1
2691 && (re_multi_type(peekchr()) != NOT_MULTI
2692 || (enc_utf8 && utf_iscomposing(c)));
2693}
2694#endif
2695
Bram Moolenaar071d4272004-06-13 20:20:40 +00002696/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002697 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002698 * Return pointer to generated code.
2699 */
2700 static char_u *
2701regnode(op)
2702 int op;
2703{
2704 char_u *ret;
2705
2706 ret = regcode;
2707 if (ret == JUST_CALC_SIZE)
2708 regsize += 3;
2709 else
2710 {
2711 *regcode++ = op;
2712 *regcode++ = NUL; /* Null "next" pointer. */
2713 *regcode++ = NUL;
2714 }
2715 return ret;
2716}
2717
2718/*
2719 * Emit (if appropriate) a byte of code
2720 */
2721 static void
2722regc(b)
2723 int b;
2724{
2725 if (regcode == JUST_CALC_SIZE)
2726 regsize++;
2727 else
2728 *regcode++ = b;
2729}
2730
2731#ifdef FEAT_MBYTE
2732/*
2733 * Emit (if appropriate) a multi-byte character of code
2734 */
2735 static void
2736regmbc(c)
2737 int c;
2738{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002739 if (!has_mbyte && c > 0xff)
2740 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002741 if (regcode == JUST_CALC_SIZE)
2742 regsize += (*mb_char2len)(c);
2743 else
2744 regcode += (*mb_char2bytes)(c, regcode);
2745}
2746#endif
2747
2748/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002749 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002750 *
2751 * Means relocating the operand.
2752 */
2753 static void
2754reginsert(op, opnd)
2755 int op;
2756 char_u *opnd;
2757{
2758 char_u *src;
2759 char_u *dst;
2760 char_u *place;
2761
2762 if (regcode == JUST_CALC_SIZE)
2763 {
2764 regsize += 3;
2765 return;
2766 }
2767 src = regcode;
2768 regcode += 3;
2769 dst = regcode;
2770 while (src > opnd)
2771 *--dst = *--src;
2772
2773 place = opnd; /* Op node, where operand used to be. */
2774 *place++ = op;
2775 *place++ = NUL;
2776 *place = NUL;
2777}
2778
2779/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002780 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002781 * The operator has the given limit values as operands. Also set next pointer.
2782 *
2783 * Means relocating the operand.
2784 */
2785 static void
2786reginsert_limits(op, minval, maxval, opnd)
2787 int op;
2788 long minval;
2789 long maxval;
2790 char_u *opnd;
2791{
2792 char_u *src;
2793 char_u *dst;
2794 char_u *place;
2795
2796 if (regcode == JUST_CALC_SIZE)
2797 {
2798 regsize += 11;
2799 return;
2800 }
2801 src = regcode;
2802 regcode += 11;
2803 dst = regcode;
2804 while (src > opnd)
2805 *--dst = *--src;
2806
2807 place = opnd; /* Op node, where operand used to be. */
2808 *place++ = op;
2809 *place++ = NUL;
2810 *place++ = NUL;
2811 place = re_put_long(place, (long_u)minval);
2812 place = re_put_long(place, (long_u)maxval);
2813 regtail(opnd, place);
2814}
2815
2816/*
2817 * Write a long as four bytes at "p" and return pointer to the next char.
2818 */
2819 static char_u *
2820re_put_long(p, val)
2821 char_u *p;
2822 long_u val;
2823{
2824 *p++ = (char_u) ((val >> 24) & 0377);
2825 *p++ = (char_u) ((val >> 16) & 0377);
2826 *p++ = (char_u) ((val >> 8) & 0377);
2827 *p++ = (char_u) (val & 0377);
2828 return p;
2829}
2830
2831/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002832 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002833 */
2834 static void
2835regtail(p, val)
2836 char_u *p;
2837 char_u *val;
2838{
2839 char_u *scan;
2840 char_u *temp;
2841 int offset;
2842
2843 if (p == JUST_CALC_SIZE)
2844 return;
2845
2846 /* Find last node. */
2847 scan = p;
2848 for (;;)
2849 {
2850 temp = regnext(scan);
2851 if (temp == NULL)
2852 break;
2853 scan = temp;
2854 }
2855
Bram Moolenaar582fd852005-03-28 20:58:01 +00002856 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002857 offset = (int)(scan - val);
2858 else
2859 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002860 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002861 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002862 * values in too many places. */
2863 if (offset > 0xffff)
2864 reg_toolong = TRUE;
2865 else
2866 {
2867 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2868 *(scan + 2) = (char_u) (offset & 0377);
2869 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002870}
2871
2872/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002873 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002874 */
2875 static void
2876regoptail(p, val)
2877 char_u *p;
2878 char_u *val;
2879{
2880 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2881 if (p == NULL || p == JUST_CALC_SIZE
2882 || (OP(p) != BRANCH
2883 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2884 return;
2885 regtail(OPERAND(p), val);
2886}
2887
2888/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002889 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002890 */
2891
Bram Moolenaar071d4272004-06-13 20:20:40 +00002892static int at_start; /* True when on the first character */
2893static int prev_at_start; /* True when on the second character */
2894
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002895/*
2896 * Start parsing at "str".
2897 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002898 static void
2899initchr(str)
2900 char_u *str;
2901{
2902 regparse = str;
2903 prevchr_len = 0;
2904 curchr = prevprevchr = prevchr = nextchr = -1;
2905 at_start = TRUE;
2906 prev_at_start = FALSE;
2907}
2908
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002909/*
2910 * Get the next character without advancing.
2911 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002912 static int
2913peekchr()
2914{
Bram Moolenaardf177f62005-02-22 08:39:57 +00002915 static int after_slash = FALSE;
2916
Bram Moolenaar071d4272004-06-13 20:20:40 +00002917 if (curchr == -1)
2918 {
2919 switch (curchr = regparse[0])
2920 {
2921 case '.':
2922 case '[':
2923 case '~':
2924 /* magic when 'magic' is on */
2925 if (reg_magic >= MAGIC_ON)
2926 curchr = Magic(curchr);
2927 break;
2928 case '(':
2929 case ')':
2930 case '{':
2931 case '%':
2932 case '+':
2933 case '=':
2934 case '?':
2935 case '@':
2936 case '!':
2937 case '&':
2938 case '|':
2939 case '<':
2940 case '>':
2941 case '#': /* future ext. */
2942 case '"': /* future ext. */
2943 case '\'': /* future ext. */
2944 case ',': /* future ext. */
2945 case '-': /* future ext. */
2946 case ':': /* future ext. */
2947 case ';': /* future ext. */
2948 case '`': /* future ext. */
2949 case '/': /* Can't be used in / command */
2950 /* magic only after "\v" */
2951 if (reg_magic == MAGIC_ALL)
2952 curchr = Magic(curchr);
2953 break;
2954 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00002955 /* * is not magic as the very first character, eg "?*ptr", when
2956 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
2957 * "\(\*" is not magic, thus must be magic if "after_slash" */
2958 if (reg_magic >= MAGIC_ON
2959 && !at_start
2960 && !(prev_at_start && prevchr == Magic('^'))
2961 && (after_slash
2962 || (prevchr != Magic('(')
2963 && prevchr != Magic('&')
2964 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002965 curchr = Magic('*');
2966 break;
2967 case '^':
2968 /* '^' is only magic as the very first character and if it's after
2969 * "\(", "\|", "\&' or "\n" */
2970 if (reg_magic >= MAGIC_OFF
2971 && (at_start
2972 || reg_magic == MAGIC_ALL
2973 || prevchr == Magic('(')
2974 || prevchr == Magic('|')
2975 || prevchr == Magic('&')
2976 || prevchr == Magic('n')
2977 || (no_Magic(prevchr) == '('
2978 && prevprevchr == Magic('%'))))
2979 {
2980 curchr = Magic('^');
2981 at_start = TRUE;
2982 prev_at_start = FALSE;
2983 }
2984 break;
2985 case '$':
2986 /* '$' is only magic as the very last char and if it's in front of
2987 * either "\|", "\)", "\&", or "\n" */
2988 if (reg_magic >= MAGIC_OFF)
2989 {
2990 char_u *p = regparse + 1;
2991
2992 /* ignore \c \C \m and \M after '$' */
2993 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2994 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2995 p += 2;
2996 if (p[0] == NUL
2997 || (p[0] == '\\'
2998 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2999 || p[1] == 'n'))
3000 || reg_magic == MAGIC_ALL)
3001 curchr = Magic('$');
3002 }
3003 break;
3004 case '\\':
3005 {
3006 int c = regparse[1];
3007
3008 if (c == NUL)
3009 curchr = '\\'; /* trailing '\' */
3010 else if (
3011#ifdef EBCDIC
3012 vim_strchr(META, c)
3013#else
3014 c <= '~' && META_flags[c]
3015#endif
3016 )
3017 {
3018 /*
3019 * META contains everything that may be magic sometimes,
3020 * except ^ and $ ("\^" and "\$" are only magic after
3021 * "\v"). We now fetch the next character and toggle its
3022 * magicness. Therefore, \ is so meta-magic that it is
3023 * not in META.
3024 */
3025 curchr = -1;
3026 prev_at_start = at_start;
3027 at_start = FALSE; /* be able to say "/\*ptr" */
3028 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003029 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003030 peekchr();
3031 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003032 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003033 curchr = toggle_Magic(curchr);
3034 }
3035 else if (vim_strchr(REGEXP_ABBR, c))
3036 {
3037 /*
3038 * Handle abbreviations, like "\t" for TAB -- webb
3039 */
3040 curchr = backslash_trans(c);
3041 }
3042 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3043 curchr = toggle_Magic(c);
3044 else
3045 {
3046 /*
3047 * Next character can never be (made) magic?
3048 * Then backslashing it won't do anything.
3049 */
3050#ifdef FEAT_MBYTE
3051 if (has_mbyte)
3052 curchr = (*mb_ptr2char)(regparse + 1);
3053 else
3054#endif
3055 curchr = c;
3056 }
3057 break;
3058 }
3059
3060#ifdef FEAT_MBYTE
3061 default:
3062 if (has_mbyte)
3063 curchr = (*mb_ptr2char)(regparse);
3064#endif
3065 }
3066 }
3067
3068 return curchr;
3069}
3070
3071/*
3072 * Eat one lexed character. Do this in a way that we can undo it.
3073 */
3074 static void
3075skipchr()
3076{
3077 /* peekchr() eats a backslash, do the same here */
3078 if (*regparse == '\\')
3079 prevchr_len = 1;
3080 else
3081 prevchr_len = 0;
3082 if (regparse[prevchr_len] != NUL)
3083 {
3084#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003085 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003086 /* exclude composing chars that mb_ptr2len does include */
3087 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003088 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003089 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003090 else
3091#endif
3092 ++prevchr_len;
3093 }
3094 regparse += prevchr_len;
3095 prev_at_start = at_start;
3096 at_start = FALSE;
3097 prevprevchr = prevchr;
3098 prevchr = curchr;
3099 curchr = nextchr; /* use previously unget char, or -1 */
3100 nextchr = -1;
3101}
3102
3103/*
3104 * Skip a character while keeping the value of prev_at_start for at_start.
3105 * prevchr and prevprevchr are also kept.
3106 */
3107 static void
3108skipchr_keepstart()
3109{
3110 int as = prev_at_start;
3111 int pr = prevchr;
3112 int prpr = prevprevchr;
3113
3114 skipchr();
3115 at_start = as;
3116 prevchr = pr;
3117 prevprevchr = prpr;
3118}
3119
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003120/*
3121 * Get the next character from the pattern. We know about magic and such, so
3122 * therefore we need a lexical analyzer.
3123 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003124 static int
3125getchr()
3126{
3127 int chr = peekchr();
3128
3129 skipchr();
3130 return chr;
3131}
3132
3133/*
3134 * put character back. Works only once!
3135 */
3136 static void
3137ungetchr()
3138{
3139 nextchr = curchr;
3140 curchr = prevchr;
3141 prevchr = prevprevchr;
3142 at_start = prev_at_start;
3143 prev_at_start = FALSE;
3144
3145 /* Backup regparse, so that it's at the same position as before the
3146 * getchr(). */
3147 regparse -= prevchr_len;
3148}
3149
3150/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003151 * Get and return the value of the hex string at the current position.
3152 * Return -1 if there is no valid hex number.
3153 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003154 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003155 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003156 * The parameter controls the maximum number of input characters. This will be
3157 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3158 */
3159 static int
3160gethexchrs(maxinputlen)
3161 int maxinputlen;
3162{
3163 int nr = 0;
3164 int c;
3165 int i;
3166
3167 for (i = 0; i < maxinputlen; ++i)
3168 {
3169 c = regparse[0];
3170 if (!vim_isxdigit(c))
3171 break;
3172 nr <<= 4;
3173 nr |= hex2nr(c);
3174 ++regparse;
3175 }
3176
3177 if (i == 0)
3178 return -1;
3179 return nr;
3180}
3181
3182/*
3183 * get and return the value of the decimal string immediately after the
3184 * current position. Return -1 for invalid. Consumes all digits.
3185 */
3186 static int
3187getdecchrs()
3188{
3189 int nr = 0;
3190 int c;
3191 int i;
3192
3193 for (i = 0; ; ++i)
3194 {
3195 c = regparse[0];
3196 if (c < '0' || c > '9')
3197 break;
3198 nr *= 10;
3199 nr += c - '0';
3200 ++regparse;
3201 }
3202
3203 if (i == 0)
3204 return -1;
3205 return nr;
3206}
3207
3208/*
3209 * get and return the value of the octal string immediately after the current
3210 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3211 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3212 * treat 8 or 9 as recognised characters. Position is updated:
3213 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003214 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003215 */
3216 static int
3217getoctchrs()
3218{
3219 int nr = 0;
3220 int c;
3221 int i;
3222
3223 for (i = 0; i < 3 && nr < 040; ++i)
3224 {
3225 c = regparse[0];
3226 if (c < '0' || c > '7')
3227 break;
3228 nr <<= 3;
3229 nr |= hex2nr(c);
3230 ++regparse;
3231 }
3232
3233 if (i == 0)
3234 return -1;
3235 return nr;
3236}
3237
3238/*
3239 * Get a number after a backslash that is inside [].
3240 * When nothing is recognized return a backslash.
3241 */
3242 static int
3243coll_get_char()
3244{
3245 int nr = -1;
3246
3247 switch (*regparse++)
3248 {
3249 case 'd': nr = getdecchrs(); break;
3250 case 'o': nr = getoctchrs(); break;
3251 case 'x': nr = gethexchrs(2); break;
3252 case 'u': nr = gethexchrs(4); break;
3253 case 'U': nr = gethexchrs(8); break;
3254 }
3255 if (nr < 0)
3256 {
3257 /* If getting the number fails be backwards compatible: the character
3258 * is a backslash. */
3259 --regparse;
3260 nr = '\\';
3261 }
3262 return nr;
3263}
3264
3265/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003266 * read_limits - Read two integers to be taken as a minimum and maximum.
3267 * If the first character is '-', then the range is reversed.
3268 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3269 * missing, a very big number is the default.
3270 */
3271 static int
3272read_limits(minval, maxval)
3273 long *minval;
3274 long *maxval;
3275{
3276 int reverse = FALSE;
3277 char_u *first_char;
3278 long tmp;
3279
3280 if (*regparse == '-')
3281 {
3282 /* Starts with '-', so reverse the range later */
3283 regparse++;
3284 reverse = TRUE;
3285 }
3286 first_char = regparse;
3287 *minval = getdigits(&regparse);
3288 if (*regparse == ',') /* There is a comma */
3289 {
3290 if (vim_isdigit(*++regparse))
3291 *maxval = getdigits(&regparse);
3292 else
3293 *maxval = MAX_LIMIT;
3294 }
3295 else if (VIM_ISDIGIT(*first_char))
3296 *maxval = *minval; /* It was \{n} or \{-n} */
3297 else
3298 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3299 if (*regparse == '\\')
3300 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003301 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003302 {
3303 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3304 reg_magic == MAGIC_ALL ? "" : "\\");
3305 EMSG_RET_FAIL(IObuff);
3306 }
3307
3308 /*
3309 * Reverse the range if there was a '-', or make sure it is in the right
3310 * order otherwise.
3311 */
3312 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3313 {
3314 tmp = *minval;
3315 *minval = *maxval;
3316 *maxval = tmp;
3317 }
3318 skipchr(); /* let's be friends with the lexer again */
3319 return OK;
3320}
3321
3322/*
3323 * vim_regexec and friends
3324 */
3325
3326/*
3327 * Global work variables for vim_regexec().
3328 */
3329
3330/* The current match-position is remembered with these variables: */
3331static linenr_T reglnum; /* line number, relative to first line */
3332static char_u *regline; /* start of current line */
3333static char_u *reginput; /* current input, points into "regline" */
3334
3335static int need_clear_subexpr; /* subexpressions still need to be
3336 * cleared */
3337#ifdef FEAT_SYN_HL
3338static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3339 * still need to be cleared */
3340#endif
3341
Bram Moolenaar071d4272004-06-13 20:20:40 +00003342/*
3343 * Structure used to save the current input state, when it needs to be
3344 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003345 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003346 */
3347typedef struct
3348{
3349 union
3350 {
3351 char_u *ptr; /* reginput pointer, for single-line regexp */
3352 lpos_T pos; /* reginput pos, for multi-line regexp */
3353 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003354 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003355} regsave_T;
3356
3357/* struct to save start/end pointer/position in for \(\) */
3358typedef struct
3359{
3360 union
3361 {
3362 char_u *ptr;
3363 lpos_T pos;
3364 } se_u;
3365} save_se_T;
3366
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003367/* used for BEHIND and NOBEHIND matching */
3368typedef struct regbehind_S
3369{
3370 regsave_T save_after;
3371 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003372 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003373 save_se_T save_start[NSUBEXP];
3374 save_se_T save_end[NSUBEXP];
3375} regbehind_T;
3376
Bram Moolenaar071d4272004-06-13 20:20:40 +00003377static char_u *reg_getline __ARGS((linenr_T lnum));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003378static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3379static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003380static void cleanup_subexpr __ARGS((void));
3381#ifdef FEAT_SYN_HL
3382static void cleanup_zsubexpr __ARGS((void));
3383#endif
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003384static void save_subexpr __ARGS((regbehind_T *bp));
3385static void restore_subexpr __ARGS((regbehind_T *bp));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003386static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003387static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3388static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003389static int reg_save_equal __ARGS((regsave_T *save));
3390static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3391static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3392
3393/* Save the sub-expressions before attempting a match. */
3394#define save_se(savep, posp, pp) \
3395 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3396
3397/* After a failed match restore the sub-expressions. */
3398#define restore_se(savep, posp, pp) { \
3399 if (REG_MULTI) \
3400 *(posp) = (savep)->se_u.pos; \
3401 else \
3402 *(pp) = (savep)->se_u.ptr; }
3403
3404static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003405static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003406static int regrepeat __ARGS((char_u *p, long maxcount));
3407
3408#ifdef DEBUG
3409int regnarrate = 0;
3410#endif
3411
3412/*
3413 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3414 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3415 * contains '\c' or '\C' the value is overruled.
3416 */
3417static int ireg_ic;
3418
3419#ifdef FEAT_MBYTE
3420/*
3421 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3422 * in the regexp. Defaults to false, always.
3423 */
3424static int ireg_icombine;
3425#endif
3426
3427/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003428 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3429 * there is no maximum.
3430 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003431static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003432
3433/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003434 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3435 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003436 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003437 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003438static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003439static unsigned reg_tofreelen;
3440
3441/*
3442 * These variables are set when executing a regexp to speed up the execution.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003443 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003444 * done:
3445 * single-line multi-line
3446 * reg_match &regmatch_T NULL
3447 * reg_mmatch NULL &regmmatch_T
3448 * reg_startp reg_match->startp <invalid>
3449 * reg_endp reg_match->endp <invalid>
3450 * reg_startpos <invalid> reg_mmatch->startpos
3451 * reg_endpos <invalid> reg_mmatch->endpos
3452 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003453 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003454 * reg_firstlnum <invalid> first line in which to search
3455 * reg_maxline 0 last line nr
3456 * reg_line_lbr FALSE or TRUE FALSE
3457 */
3458static regmatch_T *reg_match;
3459static regmmatch_T *reg_mmatch;
3460static char_u **reg_startp = NULL;
3461static char_u **reg_endp = NULL;
3462static lpos_T *reg_startpos = NULL;
3463static lpos_T *reg_endpos = NULL;
3464static win_T *reg_win;
3465static buf_T *reg_buf;
3466static linenr_T reg_firstlnum;
3467static linenr_T reg_maxline;
3468static int reg_line_lbr; /* "\n" in string is line break */
3469
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003470/* Values for rs_state in regitem_T. */
3471typedef enum regstate_E
3472{
3473 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3474 , RS_MOPEN /* MOPEN + [0-9] */
3475 , RS_MCLOSE /* MCLOSE + [0-9] */
3476#ifdef FEAT_SYN_HL
3477 , RS_ZOPEN /* ZOPEN + [0-9] */
3478 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3479#endif
3480 , RS_BRANCH /* BRANCH */
3481 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3482 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3483 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3484 , RS_NOMATCH /* NOMATCH */
3485 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3486 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3487 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3488 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3489} regstate_T;
3490
3491/*
3492 * When there are alternatives a regstate_T is put on the regstack to remember
3493 * what we are doing.
3494 * Before it may be another type of item, depending on rs_state, to remember
3495 * more things.
3496 */
3497typedef struct regitem_S
3498{
3499 regstate_T rs_state; /* what we are doing, one of RS_ above */
3500 char_u *rs_scan; /* current node in program */
3501 union
3502 {
3503 save_se_T sesave;
3504 regsave_T regsave;
3505 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003506 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003507} regitem_T;
3508
3509static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3510static void regstack_pop __ARGS((char_u **scan));
3511
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003512/* used for STAR, PLUS and BRACE_SIMPLE matching */
3513typedef struct regstar_S
3514{
3515 int nextb; /* next byte */
3516 int nextb_ic; /* next byte reverse case */
3517 long count;
3518 long minval;
3519 long maxval;
3520} regstar_T;
3521
3522/* used to store input position when a BACK was encountered, so that we now if
3523 * we made any progress since the last time. */
3524typedef struct backpos_S
3525{
3526 char_u *bp_scan; /* "scan" where BACK was encountered */
3527 regsave_T bp_pos; /* last input position */
3528} backpos_T;
3529
3530/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003531 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3532 * to avoid invoking malloc() and free() often.
3533 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3534 * or regbehind_T.
3535 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003536 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003537static garray_T regstack = {0, 0, 0, 0, NULL};
3538static garray_T backpos = {0, 0, 0, 0, NULL};
3539
3540/*
3541 * Both for regstack and backpos tables we use the following strategy of
3542 * allocation (to reduce malloc/free calls):
3543 * - Initial size is fairly small.
3544 * - When needed, the tables are grown bigger (8 times at first, double after
3545 * that).
3546 * - After executing the match we free the memory only if the array has grown.
3547 * Thus the memory is kept allocated when it's at the initial size.
3548 * This makes it fast while not keeping a lot of memory allocated.
3549 * A three times speed increase was observed when using many simple patterns.
3550 */
3551#define REGSTACK_INITIAL 2048
3552#define BACKPOS_INITIAL 64
3553
3554#if defined(EXITFREE) || defined(PROTO)
3555 void
3556free_regexp_stuff()
3557{
3558 ga_clear(&regstack);
3559 ga_clear(&backpos);
3560 vim_free(reg_tofree);
3561 vim_free(reg_prev_sub);
3562}
3563#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003564
Bram Moolenaar071d4272004-06-13 20:20:40 +00003565/*
3566 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3567 */
3568 static char_u *
3569reg_getline(lnum)
3570 linenr_T lnum;
3571{
3572 /* when looking behind for a match/no-match lnum is negative. But we
3573 * can't go before line 1 */
3574 if (reg_firstlnum + lnum < 1)
3575 return NULL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00003576 if (lnum > reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003577 /* Must have matched the "\n" in the last line. */
3578 return (char_u *)"";
Bram Moolenaar071d4272004-06-13 20:20:40 +00003579 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3580}
3581
3582static regsave_T behind_pos;
3583
3584#ifdef FEAT_SYN_HL
3585static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3586static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3587static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3588static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3589#endif
3590
3591/* TRUE if using multi-line regexp. */
3592#define REG_MULTI (reg_match == NULL)
3593
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003594static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
3595
Bram Moolenaar071d4272004-06-13 20:20:40 +00003596/*
3597 * Match a regexp against a string.
3598 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3599 * Uses curbuf for line count and 'iskeyword'.
3600 *
3601 * Return TRUE if there is a match, FALSE if not.
3602 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003603 static int
3604bt_regexec(rmp, line, col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003605 regmatch_T *rmp;
3606 char_u *line; /* string to match against */
3607 colnr_T col; /* column to start looking for match */
3608{
3609 reg_match = rmp;
3610 reg_mmatch = NULL;
3611 reg_maxline = 0;
3612 reg_line_lbr = FALSE;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003613 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003614 reg_win = NULL;
3615 ireg_ic = rmp->rm_ic;
3616#ifdef FEAT_MBYTE
3617 ireg_icombine = FALSE;
3618#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003619 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003620 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003621}
3622
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003623#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3624 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003625
3626static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
3627
Bram Moolenaar071d4272004-06-13 20:20:40 +00003628/*
3629 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3630 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003631 static int
3632bt_regexec_nl(rmp, line, col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003633 regmatch_T *rmp;
3634 char_u *line; /* string to match against */
3635 colnr_T col; /* column to start looking for match */
3636{
3637 reg_match = rmp;
3638 reg_mmatch = NULL;
3639 reg_maxline = 0;
3640 reg_line_lbr = TRUE;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003641 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003642 reg_win = NULL;
3643 ireg_ic = rmp->rm_ic;
3644#ifdef FEAT_MBYTE
3645 ireg_icombine = FALSE;
3646#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003647 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003648 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003649}
3650#endif
3651
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003652static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
3653
Bram Moolenaar071d4272004-06-13 20:20:40 +00003654/*
3655 * Match a regexp against multiple lines.
3656 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3657 * Uses curbuf for line count and 'iskeyword'.
3658 *
3659 * Return zero if there is no match. Return number of lines contained in the
3660 * match otherwise.
3661 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003662 static long
3663bt_regexec_multi(rmp, win, buf, lnum, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003664 regmmatch_T *rmp;
3665 win_T *win; /* window in which to search or NULL */
3666 buf_T *buf; /* buffer in which to search */
3667 linenr_T lnum; /* nr of line to start looking for match */
3668 colnr_T col; /* column to start looking for match */
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003669 proftime_T *tm; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003670{
3671 long r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003672
3673 reg_match = NULL;
3674 reg_mmatch = rmp;
3675 reg_buf = buf;
3676 reg_win = win;
3677 reg_firstlnum = lnum;
3678 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3679 reg_line_lbr = FALSE;
3680 ireg_ic = rmp->rmm_ic;
3681#ifdef FEAT_MBYTE
3682 ireg_icombine = FALSE;
3683#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003684 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003685
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003686 r = bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003687
3688 return r;
3689}
3690
3691/*
3692 * Match a regexp against a string ("line" points to the string) or multiple
3693 * lines ("line" is NULL, use reg_getline()).
3694 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003695 static long
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003696bt_regexec_both(line, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003697 char_u *line;
3698 colnr_T col; /* column to start looking for match */
Bram Moolenaar78a15312009-05-15 19:33:18 +00003699 proftime_T *tm UNUSED; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003700{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003701 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003702 char_u *s;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003703 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003704
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003705 /* Create "regstack" and "backpos" if they are not allocated yet.
3706 * We allocate *_INITIAL amount of bytes first and then set the grow size
3707 * to much bigger value to avoid many malloc calls in case of deep regular
3708 * expressions. */
3709 if (regstack.ga_data == NULL)
3710 {
3711 /* Use an item size of 1 byte, since we push different things
3712 * onto the regstack. */
3713 ga_init2(&regstack, 1, REGSTACK_INITIAL);
3714 ga_grow(&regstack, REGSTACK_INITIAL);
3715 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3716 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003717
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003718 if (backpos.ga_data == NULL)
3719 {
3720 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3721 ga_grow(&backpos, BACKPOS_INITIAL);
3722 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3723 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003724
Bram Moolenaar071d4272004-06-13 20:20:40 +00003725 if (REG_MULTI)
3726 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003727 prog = (bt_regprog_T *)reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003728 line = reg_getline((linenr_T)0);
3729 reg_startpos = reg_mmatch->startpos;
3730 reg_endpos = reg_mmatch->endpos;
3731 }
3732 else
3733 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003734 prog = (bt_regprog_T *)reg_match->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003735 reg_startp = reg_match->startp;
3736 reg_endp = reg_match->endp;
3737 }
3738
3739 /* Be paranoid... */
3740 if (prog == NULL || line == NULL)
3741 {
3742 EMSG(_(e_null));
3743 goto theend;
3744 }
3745
3746 /* Check validity of program. */
3747 if (prog_magic_wrong())
3748 goto theend;
3749
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003750 /* If the start column is past the maximum column: no need to try. */
3751 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3752 goto theend;
3753
Bram Moolenaar071d4272004-06-13 20:20:40 +00003754 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3755 if (prog->regflags & RF_ICASE)
3756 ireg_ic = TRUE;
3757 else if (prog->regflags & RF_NOICASE)
3758 ireg_ic = FALSE;
3759
3760#ifdef FEAT_MBYTE
3761 /* If pattern contains "\Z" overrule value of ireg_icombine */
3762 if (prog->regflags & RF_ICOMBINE)
3763 ireg_icombine = TRUE;
3764#endif
3765
3766 /* If there is a "must appear" string, look for it. */
3767 if (prog->regmust != NULL)
3768 {
3769 int c;
3770
3771#ifdef FEAT_MBYTE
3772 if (has_mbyte)
3773 c = (*mb_ptr2char)(prog->regmust);
3774 else
3775#endif
3776 c = *prog->regmust;
3777 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003778
3779 /*
3780 * This is used very often, esp. for ":global". Use three versions of
3781 * the loop to avoid overhead of conditions.
3782 */
3783 if (!ireg_ic
3784#ifdef FEAT_MBYTE
3785 && !has_mbyte
3786#endif
3787 )
3788 while ((s = vim_strbyte(s, c)) != NULL)
3789 {
3790 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3791 break; /* Found it. */
3792 ++s;
3793 }
3794#ifdef FEAT_MBYTE
3795 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3796 while ((s = vim_strchr(s, c)) != NULL)
3797 {
3798 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3799 break; /* Found it. */
3800 mb_ptr_adv(s);
3801 }
3802#endif
3803 else
3804 while ((s = cstrchr(s, c)) != NULL)
3805 {
3806 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3807 break; /* Found it. */
3808 mb_ptr_adv(s);
3809 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003810 if (s == NULL) /* Not present. */
3811 goto theend;
3812 }
3813
3814 regline = line;
3815 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003816 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003817
3818 /* Simplest case: Anchored match need be tried only once. */
3819 if (prog->reganch)
3820 {
3821 int c;
3822
3823#ifdef FEAT_MBYTE
3824 if (has_mbyte)
3825 c = (*mb_ptr2char)(regline + col);
3826 else
3827#endif
3828 c = regline[col];
3829 if (prog->regstart == NUL
3830 || prog->regstart == c
3831 || (ireg_ic && ((
3832#ifdef FEAT_MBYTE
3833 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3834 || (c < 255 && prog->regstart < 255 &&
3835#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003836 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003837 retval = regtry(prog, col);
3838 else
3839 retval = 0;
3840 }
3841 else
3842 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003843#ifdef FEAT_RELTIME
3844 int tm_count = 0;
3845#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003846 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003847 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003848 {
3849 if (prog->regstart != NUL)
3850 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003851 /* Skip until the char we know it must start with.
3852 * Used often, do some work to avoid call overhead. */
3853 if (!ireg_ic
3854#ifdef FEAT_MBYTE
3855 && !has_mbyte
3856#endif
3857 )
3858 s = vim_strbyte(regline + col, prog->regstart);
3859 else
3860 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003861 if (s == NULL)
3862 {
3863 retval = 0;
3864 break;
3865 }
3866 col = (int)(s - regline);
3867 }
3868
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003869 /* Check for maximum column to try. */
3870 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3871 {
3872 retval = 0;
3873 break;
3874 }
3875
Bram Moolenaar071d4272004-06-13 20:20:40 +00003876 retval = regtry(prog, col);
3877 if (retval > 0)
3878 break;
3879
3880 /* if not currently on the first line, get it again */
3881 if (reglnum != 0)
3882 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003883 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003884 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003885 }
3886 if (regline[col] == NUL)
3887 break;
3888#ifdef FEAT_MBYTE
3889 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003890 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003891 else
3892#endif
3893 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003894#ifdef FEAT_RELTIME
3895 /* Check for timeout once in a twenty times to avoid overhead. */
3896 if (tm != NULL && ++tm_count == 20)
3897 {
3898 tm_count = 0;
3899 if (profile_passed_limit(tm))
3900 break;
3901 }
3902#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003903 }
3904 }
3905
Bram Moolenaar071d4272004-06-13 20:20:40 +00003906theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003907 /* Free "reg_tofree" when it's a bit big.
3908 * Free regstack and backpos if they are bigger than their initial size. */
3909 if (reg_tofreelen > 400)
3910 {
3911 vim_free(reg_tofree);
3912 reg_tofree = NULL;
3913 }
3914 if (regstack.ga_maxlen > REGSTACK_INITIAL)
3915 ga_clear(&regstack);
3916 if (backpos.ga_maxlen > BACKPOS_INITIAL)
3917 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003918
Bram Moolenaar071d4272004-06-13 20:20:40 +00003919 return retval;
3920}
3921
3922#ifdef FEAT_SYN_HL
3923static reg_extmatch_T *make_extmatch __ARGS((void));
3924
3925/*
3926 * Create a new extmatch and mark it as referenced once.
3927 */
3928 static reg_extmatch_T *
3929make_extmatch()
3930{
3931 reg_extmatch_T *em;
3932
3933 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3934 if (em != NULL)
3935 em->refcnt = 1;
3936 return em;
3937}
3938
3939/*
3940 * Add a reference to an extmatch.
3941 */
3942 reg_extmatch_T *
3943ref_extmatch(em)
3944 reg_extmatch_T *em;
3945{
3946 if (em != NULL)
3947 em->refcnt++;
3948 return em;
3949}
3950
3951/*
3952 * Remove a reference to an extmatch. If there are no references left, free
3953 * the info.
3954 */
3955 void
3956unref_extmatch(em)
3957 reg_extmatch_T *em;
3958{
3959 int i;
3960
3961 if (em != NULL && --em->refcnt <= 0)
3962 {
3963 for (i = 0; i < NSUBEXP; ++i)
3964 vim_free(em->matches[i]);
3965 vim_free(em);
3966 }
3967}
3968#endif
3969
3970/*
3971 * regtry - try match of "prog" with at regline["col"].
3972 * Returns 0 for failure, number of lines contained in the match otherwise.
3973 */
3974 static long
3975regtry(prog, col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003976 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003977 colnr_T col;
3978{
3979 reginput = regline + col;
3980 need_clear_subexpr = TRUE;
3981#ifdef FEAT_SYN_HL
3982 /* Clear the external match subpointers if necessary. */
3983 if (prog->reghasz == REX_SET)
3984 need_clear_zsubexpr = TRUE;
3985#endif
3986
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003987 if (regmatch(prog->program + 1) == 0)
3988 return 0;
3989
3990 cleanup_subexpr();
3991 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003992 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003993 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003994 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003995 reg_startpos[0].lnum = 0;
3996 reg_startpos[0].col = col;
3997 }
3998 if (reg_endpos[0].lnum < 0)
3999 {
4000 reg_endpos[0].lnum = reglnum;
4001 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004002 }
4003 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004004 /* Use line number of "\ze". */
4005 reglnum = reg_endpos[0].lnum;
4006 }
4007 else
4008 {
4009 if (reg_startp[0] == NULL)
4010 reg_startp[0] = regline + col;
4011 if (reg_endp[0] == NULL)
4012 reg_endp[0] = reginput;
4013 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004014#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004015 /* Package any found \z(...\) matches for export. Default is none. */
4016 unref_extmatch(re_extmatch_out);
4017 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004018
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004019 if (prog->reghasz == REX_SET)
4020 {
4021 int i;
4022
4023 cleanup_zsubexpr();
4024 re_extmatch_out = make_extmatch();
4025 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004026 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004027 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004028 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004029 /* Only accept single line matches. */
4030 if (reg_startzpos[i].lnum >= 0
4031 && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
4032 re_extmatch_out->matches[i] =
4033 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004034 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004035 reg_endzpos[i].col - reg_startzpos[i].col);
4036 }
4037 else
4038 {
4039 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4040 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004041 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004042 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004043 }
4044 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004045 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004046#endif
4047 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004048}
4049
4050#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00004051static int reg_prev_class __ARGS((void));
4052
Bram Moolenaar071d4272004-06-13 20:20:40 +00004053/*
4054 * Get class of previous character.
4055 */
4056 static int
4057reg_prev_class()
4058{
4059 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004060 return mb_get_class_buf(reginput - 1
4061 - (*mb_head_off)(regline, reginput - 1), reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004062 return -1;
4063}
4064
Bram Moolenaar071d4272004-06-13 20:20:40 +00004065#endif
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00004066#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004067
4068/*
4069 * The arguments from BRACE_LIMITS are stored here. They are actually local
4070 * to regmatch(), but they are here to reduce the amount of stack space used
4071 * (it can be called recursively many times).
4072 */
4073static long bl_minval;
4074static long bl_maxval;
4075
4076/*
4077 * regmatch - main matching routine
4078 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004079 * Conceptually the strategy is simple: Check to see whether the current node
4080 * matches, push an item onto the regstack and loop to see whether the rest
4081 * matches, and then act accordingly. In practice we make some effort to
4082 * avoid using the regstack, in particular by going through "ordinary" nodes
4083 * (that don't need to know whether the rest of the match failed) by a nested
4084 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004085 *
4086 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4087 * the last matched character.
4088 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4089 * undefined state!
4090 */
4091 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004092regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004093 char_u *scan; /* Current node. */
4094{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004095 char_u *next; /* Next node. */
4096 int op;
4097 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004098 regitem_T *rp;
4099 int no;
4100 int status; /* one of the RA_ values: */
4101#define RA_FAIL 1 /* something failed, abort */
4102#define RA_CONT 2 /* continue in inner loop */
4103#define RA_BREAK 3 /* break inner loop */
4104#define RA_MATCH 4 /* successful match */
4105#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004106
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004107 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004108 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004109 regstack.ga_len = 0;
4110 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004111
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004112 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004113 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004114 */
4115 for (;;)
4116 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004117 /* Some patterns may cause a long time to match, even though they are not
Bram Moolenaar071d4272004-06-13 20:20:40 +00004118 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
4119 fast_breakcheck();
4120
4121#ifdef DEBUG
4122 if (scan != NULL && regnarrate)
4123 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004124 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004125 mch_errmsg("(\n");
4126 }
4127#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004128
4129 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004130 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004131 * regstack.
4132 */
4133 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004134 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004135 if (got_int || scan == NULL)
4136 {
4137 status = RA_FAIL;
4138 break;
4139 }
4140 status = RA_CONT;
4141
Bram Moolenaar071d4272004-06-13 20:20:40 +00004142#ifdef DEBUG
4143 if (regnarrate)
4144 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004145 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004146 mch_errmsg("...\n");
4147# ifdef FEAT_SYN_HL
4148 if (re_extmatch_in != NULL)
4149 {
4150 int i;
4151
4152 mch_errmsg(_("External submatches:\n"));
4153 for (i = 0; i < NSUBEXP; i++)
4154 {
4155 mch_errmsg(" \"");
4156 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004157 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004158 mch_errmsg("\"\n");
4159 }
4160 }
4161# endif
4162 }
4163#endif
4164 next = regnext(scan);
4165
4166 op = OP(scan);
4167 /* Check for character class with NL added. */
Bram Moolenaar640009d2006-10-17 16:48:26 +00004168 if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
4169 && *reginput == NUL && reglnum <= reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004170 {
4171 reg_nextline();
4172 }
4173 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
4174 {
4175 ADVANCE_REGINPUT();
4176 }
4177 else
4178 {
4179 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004180 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004181#ifdef FEAT_MBYTE
4182 if (has_mbyte)
4183 c = (*mb_ptr2char)(reginput);
4184 else
4185#endif
4186 c = *reginput;
4187 switch (op)
4188 {
4189 case BOL:
4190 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004191 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004192 break;
4193
4194 case EOL:
4195 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004196 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004197 break;
4198
4199 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004200 /* We're not at the beginning of the file when below the first
4201 * line where we started, not at the start of the line or we
4202 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004203 if (reglnum != 0 || reginput != regline
Bram Moolenaara7139332007-12-09 18:26:22 +00004204 || (REG_MULTI && reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004205 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004206 break;
4207
4208 case RE_EOF:
4209 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004210 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004211 break;
4212
4213 case CURSOR:
4214 /* Check if the buffer is in a window and compare the
4215 * reg_win->w_cursor position to the match position. */
4216 if (reg_win == NULL
4217 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
4218 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004219 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004220 break;
4221
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004222 case RE_MARK:
4223 /* Compare the mark position to the match position. NOTE: Always
4224 * uses the current buffer. */
4225 {
4226 int mark = OPERAND(scan)[0];
4227 int cmp = OPERAND(scan)[1];
4228 pos_T *pos;
4229
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004230 pos = getmark_buf(reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004231 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004232 || pos->lnum <= 0 /* mark isn't set (in curbuf) */
4233 || (pos->lnum == reglnum + reg_firstlnum
4234 ? (pos->col == (colnr_T)(reginput - regline)
4235 ? (cmp == '<' || cmp == '>')
4236 : (pos->col < (colnr_T)(reginput - regline)
4237 ? cmp != '>'
4238 : cmp != '<'))
4239 : (pos->lnum < reglnum + reg_firstlnum
4240 ? cmp != '>'
4241 : cmp != '<')))
4242 status = RA_NOMATCH;
4243 }
4244 break;
4245
4246 case RE_VISUAL:
4247#ifdef FEAT_VISUAL
4248 /* Check if the buffer is the current buffer. and whether the
4249 * position is inside the Visual area. */
4250 if (reg_buf != curbuf || VIsual.lnum == 0)
4251 status = RA_NOMATCH;
4252 else
4253 {
4254 pos_T top, bot;
4255 linenr_T lnum;
4256 colnr_T col;
4257 win_T *wp = reg_win == NULL ? curwin : reg_win;
4258 int mode;
4259
4260 if (VIsual_active)
4261 {
4262 if (lt(VIsual, wp->w_cursor))
4263 {
4264 top = VIsual;
4265 bot = wp->w_cursor;
4266 }
4267 else
4268 {
4269 top = wp->w_cursor;
4270 bot = VIsual;
4271 }
4272 mode = VIsual_mode;
4273 }
4274 else
4275 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004276 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004277 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004278 top = curbuf->b_visual.vi_start;
4279 bot = curbuf->b_visual.vi_end;
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004280 }
4281 else
4282 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004283 top = curbuf->b_visual.vi_end;
4284 bot = curbuf->b_visual.vi_start;
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004285 }
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004286 mode = curbuf->b_visual.vi_mode;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004287 }
4288 lnum = reglnum + reg_firstlnum;
4289 col = (colnr_T)(reginput - regline);
4290 if (lnum < top.lnum || lnum > bot.lnum)
4291 status = RA_NOMATCH;
4292 else if (mode == 'v')
4293 {
4294 if ((lnum == top.lnum && col < top.col)
4295 || (lnum == bot.lnum
4296 && col >= bot.col + (*p_sel != 'e')))
4297 status = RA_NOMATCH;
4298 }
4299 else if (mode == Ctrl_V)
4300 {
4301 colnr_T start, end;
4302 colnr_T start2, end2;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004303 colnr_T cols;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004304
4305 getvvcol(wp, &top, &start, NULL, &end);
4306 getvvcol(wp, &bot, &start2, NULL, &end2);
4307 if (start2 < start)
4308 start = start2;
4309 if (end2 > end)
4310 end = end2;
4311 if (top.col == MAXCOL || bot.col == MAXCOL)
4312 end = MAXCOL;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004313 cols = win_linetabsize(wp,
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004314 regline, (colnr_T)(reginput - regline));
Bram Moolenaar89d40322006-08-29 15:30:07 +00004315 if (cols < start || cols > end - (*p_sel == 'e'))
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004316 status = RA_NOMATCH;
4317 }
4318 }
4319#else
4320 status = RA_NOMATCH;
4321#endif
4322 break;
4323
Bram Moolenaar071d4272004-06-13 20:20:40 +00004324 case RE_LNUM:
4325 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4326 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004327 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004328 break;
4329
4330 case RE_COL:
4331 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004332 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004333 break;
4334
4335 case RE_VCOL:
4336 if (!re_num_cmp((long_u)win_linetabsize(
4337 reg_win == NULL ? curwin : reg_win,
4338 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004339 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004340 break;
4341
4342 case BOW: /* \<word; reginput points to w */
4343 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004344 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004345#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004346 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004347 {
4348 int this_class;
4349
4350 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004351 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004352 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004353 status = RA_NOMATCH; /* not on a word at all */
4354 else if (reg_prev_class() == this_class)
4355 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004356 }
4357#endif
4358 else
4359 {
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01004360 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline
4361 && vim_iswordc_buf(reginput[-1], reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004362 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004363 }
4364 break;
4365
4366 case EOW: /* word\>; reginput points after d */
4367 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004368 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004369#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004370 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004371 {
4372 int this_class, prev_class;
4373
4374 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004375 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004376 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004377 if (this_class == prev_class
4378 || prev_class == 0 || prev_class == 1)
4379 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004380 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004381#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004382 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004383 {
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004384 if (!vim_iswordc_buf(reginput[-1], reg_buf)
4385 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004386 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004387 }
4388 break; /* Matched with EOW */
4389
4390 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004391 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004392 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004393 status = RA_NOMATCH;
4394 else
4395 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004396 break;
4397
4398 case IDENT:
4399 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004400 status = RA_NOMATCH;
4401 else
4402 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004403 break;
4404
4405 case SIDENT:
4406 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004407 status = RA_NOMATCH;
4408 else
4409 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004410 break;
4411
4412 case KWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004413 if (!vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004414 status = RA_NOMATCH;
4415 else
4416 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004417 break;
4418
4419 case SKWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004420 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004421 status = RA_NOMATCH;
4422 else
4423 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004424 break;
4425
4426 case FNAME:
4427 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004428 status = RA_NOMATCH;
4429 else
4430 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004431 break;
4432
4433 case SFNAME:
4434 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004435 status = RA_NOMATCH;
4436 else
4437 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004438 break;
4439
4440 case PRINT:
4441 if (ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004442 status = RA_NOMATCH;
4443 else
4444 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004445 break;
4446
4447 case SPRINT:
4448 if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004449 status = RA_NOMATCH;
4450 else
4451 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004452 break;
4453
4454 case WHITE:
4455 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004456 status = RA_NOMATCH;
4457 else
4458 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004459 break;
4460
4461 case NWHITE:
4462 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004463 status = RA_NOMATCH;
4464 else
4465 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004466 break;
4467
4468 case DIGIT:
4469 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004470 status = RA_NOMATCH;
4471 else
4472 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004473 break;
4474
4475 case NDIGIT:
4476 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004477 status = RA_NOMATCH;
4478 else
4479 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004480 break;
4481
4482 case HEX:
4483 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004484 status = RA_NOMATCH;
4485 else
4486 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004487 break;
4488
4489 case NHEX:
4490 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004491 status = RA_NOMATCH;
4492 else
4493 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004494 break;
4495
4496 case OCTAL:
4497 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004498 status = RA_NOMATCH;
4499 else
4500 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004501 break;
4502
4503 case NOCTAL:
4504 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004505 status = RA_NOMATCH;
4506 else
4507 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004508 break;
4509
4510 case WORD:
4511 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004512 status = RA_NOMATCH;
4513 else
4514 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004515 break;
4516
4517 case NWORD:
4518 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004519 status = RA_NOMATCH;
4520 else
4521 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004522 break;
4523
4524 case HEAD:
4525 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004526 status = RA_NOMATCH;
4527 else
4528 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004529 break;
4530
4531 case NHEAD:
4532 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004533 status = RA_NOMATCH;
4534 else
4535 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004536 break;
4537
4538 case ALPHA:
4539 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004540 status = RA_NOMATCH;
4541 else
4542 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004543 break;
4544
4545 case NALPHA:
4546 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004547 status = RA_NOMATCH;
4548 else
4549 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004550 break;
4551
4552 case LOWER:
4553 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004554 status = RA_NOMATCH;
4555 else
4556 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004557 break;
4558
4559 case NLOWER:
4560 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004561 status = RA_NOMATCH;
4562 else
4563 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004564 break;
4565
4566 case UPPER:
4567 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004568 status = RA_NOMATCH;
4569 else
4570 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004571 break;
4572
4573 case NUPPER:
4574 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004575 status = RA_NOMATCH;
4576 else
4577 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004578 break;
4579
4580 case EXACTLY:
4581 {
4582 int len;
4583 char_u *opnd;
4584
4585 opnd = OPERAND(scan);
4586 /* Inline the first byte, for speed. */
4587 if (*opnd != *reginput
4588 && (!ireg_ic || (
4589#ifdef FEAT_MBYTE
4590 !enc_utf8 &&
4591#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004592 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004593 status = RA_NOMATCH;
4594 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004595 {
4596 /* match empty string always works; happens when "~" is
4597 * empty. */
4598 }
4599 else if (opnd[1] == NUL
4600#ifdef FEAT_MBYTE
4601 && !(enc_utf8 && ireg_ic)
4602#endif
4603 )
4604 ++reginput; /* matched a single char */
4605 else
4606 {
4607 len = (int)STRLEN(opnd);
4608 /* Need to match first byte again for multi-byte. */
4609 if (cstrncmp(opnd, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004610 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004611#ifdef FEAT_MBYTE
4612 /* Check for following composing character. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004613 else if (enc_utf8
4614 && UTF_COMPOSINGLIKE(reginput, reginput + len))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004615 {
4616 /* raaron: This code makes a composing character get
4617 * ignored, which is the correct behavior (sometimes)
4618 * for voweled Hebrew texts. */
4619 if (!ireg_icombine)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004620 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004621 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004622#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004623 else
4624 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004625 }
4626 }
4627 break;
4628
4629 case ANYOF:
4630 case ANYBUT:
4631 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004632 status = RA_NOMATCH;
4633 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4634 status = RA_NOMATCH;
4635 else
4636 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004637 break;
4638
4639#ifdef FEAT_MBYTE
4640 case MULTIBYTECODE:
4641 if (has_mbyte)
4642 {
4643 int i, len;
4644 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004645 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004646
4647 opnd = OPERAND(scan);
4648 /* Safety check (just in case 'encoding' was changed since
4649 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004650 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004651 {
4652 status = RA_NOMATCH;
4653 break;
4654 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004655 if (enc_utf8)
4656 opndc = mb_ptr2char(opnd);
4657 if (enc_utf8 && utf_iscomposing(opndc))
4658 {
4659 /* When only a composing char is given match at any
4660 * position where that composing char appears. */
4661 status = RA_NOMATCH;
4662 for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004663 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004664 inpc = mb_ptr2char(reginput + i);
4665 if (!utf_iscomposing(inpc))
4666 {
4667 if (i > 0)
4668 break;
4669 }
4670 else if (opndc == inpc)
4671 {
4672 /* Include all following composing chars. */
4673 len = i + mb_ptr2len(reginput + i);
4674 status = RA_MATCH;
4675 break;
4676 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004677 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004678 }
4679 else
4680 for (i = 0; i < len; ++i)
4681 if (opnd[i] != reginput[i])
4682 {
4683 status = RA_NOMATCH;
4684 break;
4685 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004686 reginput += len;
4687 }
4688 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004689 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004690 break;
4691#endif
4692
4693 case NOTHING:
4694 break;
4695
4696 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004697 {
4698 int i;
4699 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004700
Bram Moolenaar582fd852005-03-28 20:58:01 +00004701 /*
4702 * When we run into BACK we need to check if we don't keep
4703 * looping without matching any input. The second and later
4704 * times a BACK is encountered it fails if the input is still
4705 * at the same position as the previous time.
4706 * The positions are stored in "backpos" and found by the
4707 * current value of "scan", the position in the RE program.
4708 */
4709 bp = (backpos_T *)backpos.ga_data;
4710 for (i = 0; i < backpos.ga_len; ++i)
4711 if (bp[i].bp_scan == scan)
4712 break;
4713 if (i == backpos.ga_len)
4714 {
4715 /* First time at this BACK, make room to store the pos. */
4716 if (ga_grow(&backpos, 1) == FAIL)
4717 status = RA_FAIL;
4718 else
4719 {
4720 /* get "ga_data" again, it may have changed */
4721 bp = (backpos_T *)backpos.ga_data;
4722 bp[i].bp_scan = scan;
4723 ++backpos.ga_len;
4724 }
4725 }
4726 else if (reg_save_equal(&bp[i].bp_pos))
4727 /* Still at same position as last time, fail. */
4728 status = RA_NOMATCH;
4729
4730 if (status != RA_FAIL && status != RA_NOMATCH)
4731 reg_save(&bp[i].bp_pos, &backpos);
4732 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004733 break;
4734
Bram Moolenaar071d4272004-06-13 20:20:40 +00004735 case MOPEN + 0: /* Match start: \zs */
4736 case MOPEN + 1: /* \( */
4737 case MOPEN + 2:
4738 case MOPEN + 3:
4739 case MOPEN + 4:
4740 case MOPEN + 5:
4741 case MOPEN + 6:
4742 case MOPEN + 7:
4743 case MOPEN + 8:
4744 case MOPEN + 9:
4745 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004746 no = op - MOPEN;
4747 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004748 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004749 if (rp == NULL)
4750 status = RA_FAIL;
4751 else
4752 {
4753 rp->rs_no = no;
4754 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4755 &reg_startp[no]);
4756 /* We simply continue and handle the result when done. */
4757 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004758 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004759 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004760
4761 case NOPEN: /* \%( */
4762 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004763 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004764 status = RA_FAIL;
4765 /* We simply continue and handle the result when done. */
4766 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004767
4768#ifdef FEAT_SYN_HL
4769 case ZOPEN + 1:
4770 case ZOPEN + 2:
4771 case ZOPEN + 3:
4772 case ZOPEN + 4:
4773 case ZOPEN + 5:
4774 case ZOPEN + 6:
4775 case ZOPEN + 7:
4776 case ZOPEN + 8:
4777 case ZOPEN + 9:
4778 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004779 no = op - ZOPEN;
4780 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004781 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004782 if (rp == NULL)
4783 status = RA_FAIL;
4784 else
4785 {
4786 rp->rs_no = no;
4787 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4788 &reg_startzp[no]);
4789 /* We simply continue and handle the result when done. */
4790 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004791 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004792 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004793#endif
4794
4795 case MCLOSE + 0: /* Match end: \ze */
4796 case MCLOSE + 1: /* \) */
4797 case MCLOSE + 2:
4798 case MCLOSE + 3:
4799 case MCLOSE + 4:
4800 case MCLOSE + 5:
4801 case MCLOSE + 6:
4802 case MCLOSE + 7:
4803 case MCLOSE + 8:
4804 case MCLOSE + 9:
4805 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004806 no = op - MCLOSE;
4807 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004808 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004809 if (rp == NULL)
4810 status = RA_FAIL;
4811 else
4812 {
4813 rp->rs_no = no;
4814 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4815 /* We simply continue and handle the result when done. */
4816 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004817 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004818 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004819
4820#ifdef FEAT_SYN_HL
4821 case ZCLOSE + 1: /* \) after \z( */
4822 case ZCLOSE + 2:
4823 case ZCLOSE + 3:
4824 case ZCLOSE + 4:
4825 case ZCLOSE + 5:
4826 case ZCLOSE + 6:
4827 case ZCLOSE + 7:
4828 case ZCLOSE + 8:
4829 case ZCLOSE + 9:
4830 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004831 no = op - ZCLOSE;
4832 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004833 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004834 if (rp == NULL)
4835 status = RA_FAIL;
4836 else
4837 {
4838 rp->rs_no = no;
4839 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4840 &reg_endzp[no]);
4841 /* We simply continue and handle the result when done. */
4842 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004843 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004844 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004845#endif
4846
4847 case BACKREF + 1:
4848 case BACKREF + 2:
4849 case BACKREF + 3:
4850 case BACKREF + 4:
4851 case BACKREF + 5:
4852 case BACKREF + 6:
4853 case BACKREF + 7:
4854 case BACKREF + 8:
4855 case BACKREF + 9:
4856 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004857 int len;
4858 linenr_T clnum;
4859 colnr_T ccol;
4860 char_u *p;
4861
4862 no = op - BACKREF;
4863 cleanup_subexpr();
4864 if (!REG_MULTI) /* Single-line regexp */
4865 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004866 if (reg_startp[no] == NULL || reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004867 {
4868 /* Backref was not set: Match an empty string. */
4869 len = 0;
4870 }
4871 else
4872 {
4873 /* Compare current input with back-ref in the same
4874 * line. */
4875 len = (int)(reg_endp[no] - reg_startp[no]);
4876 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004877 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004878 }
4879 }
4880 else /* Multi-line regexp */
4881 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004882 if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004883 {
4884 /* Backref was not set: Match an empty string. */
4885 len = 0;
4886 }
4887 else
4888 {
4889 if (reg_startpos[no].lnum == reglnum
4890 && reg_endpos[no].lnum == reglnum)
4891 {
4892 /* Compare back-ref within the current line. */
4893 len = reg_endpos[no].col - reg_startpos[no].col;
4894 if (cstrncmp(regline + reg_startpos[no].col,
4895 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004896 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004897 }
4898 else
4899 {
4900 /* Messy situation: Need to compare between two
4901 * lines. */
4902 ccol = reg_startpos[no].col;
4903 clnum = reg_startpos[no].lnum;
4904 for (;;)
4905 {
4906 /* Since getting one line may invalidate
4907 * the other, need to make copy. Slow! */
4908 if (regline != reg_tofree)
4909 {
4910 len = (int)STRLEN(regline);
4911 if (reg_tofree == NULL
4912 || len >= (int)reg_tofreelen)
4913 {
4914 len += 50; /* get some extra */
4915 vim_free(reg_tofree);
4916 reg_tofree = alloc(len);
4917 if (reg_tofree == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004918 {
4919 status = RA_FAIL; /* outof memory!*/
4920 break;
4921 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004922 reg_tofreelen = len;
4923 }
4924 STRCPY(reg_tofree, regline);
4925 reginput = reg_tofree
4926 + (reginput - regline);
4927 regline = reg_tofree;
4928 }
4929
4930 /* Get the line to compare with. */
4931 p = reg_getline(clnum);
4932 if (clnum == reg_endpos[no].lnum)
4933 len = reg_endpos[no].col - ccol;
4934 else
4935 len = (int)STRLEN(p + ccol);
4936
4937 if (cstrncmp(p + ccol, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004938 {
4939 status = RA_NOMATCH; /* doesn't match */
4940 break;
4941 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004942 if (clnum == reg_endpos[no].lnum)
4943 break; /* match and at end! */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004944 if (reglnum >= reg_maxline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004945 {
4946 status = RA_NOMATCH; /* text too short */
4947 break;
4948 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004949
4950 /* Advance to next line. */
4951 reg_nextline();
4952 ++clnum;
4953 ccol = 0;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004954 if (got_int)
4955 {
4956 status = RA_FAIL;
4957 break;
4958 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004959 }
4960
4961 /* found a match! Note that regline may now point
4962 * to a copy of the line, that should not matter. */
4963 }
4964 }
4965 }
4966
4967 /* Matched the backref, skip over it. */
4968 reginput += len;
4969 }
4970 break;
4971
4972#ifdef FEAT_SYN_HL
4973 case ZREF + 1:
4974 case ZREF + 2:
4975 case ZREF + 3:
4976 case ZREF + 4:
4977 case ZREF + 5:
4978 case ZREF + 6:
4979 case ZREF + 7:
4980 case ZREF + 8:
4981 case ZREF + 9:
4982 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004983 int len;
4984
4985 cleanup_zsubexpr();
4986 no = op - ZREF;
4987 if (re_extmatch_in != NULL
4988 && re_extmatch_in->matches[no] != NULL)
4989 {
4990 len = (int)STRLEN(re_extmatch_in->matches[no]);
4991 if (cstrncmp(re_extmatch_in->matches[no],
4992 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004993 status = RA_NOMATCH;
4994 else
4995 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004996 }
4997 else
4998 {
4999 /* Backref was not set: Match an empty string. */
5000 }
5001 }
5002 break;
5003#endif
5004
5005 case BRANCH:
5006 {
5007 if (OP(next) != BRANCH) /* No choice. */
5008 next = OPERAND(scan); /* Avoid recursion. */
5009 else
5010 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005011 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005012 if (rp == NULL)
5013 status = RA_FAIL;
5014 else
5015 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005016 }
5017 }
5018 break;
5019
5020 case BRACE_LIMITS:
5021 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005022 if (OP(next) == BRACE_SIMPLE)
5023 {
5024 bl_minval = OPERAND_MIN(scan);
5025 bl_maxval = OPERAND_MAX(scan);
5026 }
5027 else if (OP(next) >= BRACE_COMPLEX
5028 && OP(next) < BRACE_COMPLEX + 10)
5029 {
5030 no = OP(next) - BRACE_COMPLEX;
5031 brace_min[no] = OPERAND_MIN(scan);
5032 brace_max[no] = OPERAND_MAX(scan);
5033 brace_count[no] = 0;
5034 }
5035 else
5036 {
5037 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005038 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005039 }
5040 }
5041 break;
5042
5043 case BRACE_COMPLEX + 0:
5044 case BRACE_COMPLEX + 1:
5045 case BRACE_COMPLEX + 2:
5046 case BRACE_COMPLEX + 3:
5047 case BRACE_COMPLEX + 4:
5048 case BRACE_COMPLEX + 5:
5049 case BRACE_COMPLEX + 6:
5050 case BRACE_COMPLEX + 7:
5051 case BRACE_COMPLEX + 8:
5052 case BRACE_COMPLEX + 9:
5053 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005054 no = op - BRACE_COMPLEX;
5055 ++brace_count[no];
5056
5057 /* If not matched enough times yet, try one more */
5058 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005059 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005060 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005061 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005062 if (rp == NULL)
5063 status = RA_FAIL;
5064 else
5065 {
5066 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005067 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005068 next = OPERAND(scan);
5069 /* We continue and handle the result when done. */
5070 }
5071 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005072 }
5073
5074 /* If matched enough times, may try matching some more */
5075 if (brace_min[no] <= brace_max[no])
5076 {
5077 /* Range is the normal way around, use longest match */
5078 if (brace_count[no] <= brace_max[no])
5079 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005080 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005081 if (rp == NULL)
5082 status = RA_FAIL;
5083 else
5084 {
5085 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005086 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005087 next = OPERAND(scan);
5088 /* We continue and handle the result when done. */
5089 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005090 }
5091 }
5092 else
5093 {
5094 /* Range is backwards, use shortest match first */
5095 if (brace_count[no] <= brace_min[no])
5096 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005097 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005098 if (rp == NULL)
5099 status = RA_FAIL;
5100 else
5101 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005102 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005103 /* We continue and handle the result when done. */
5104 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005105 }
5106 }
5107 }
5108 break;
5109
5110 case BRACE_SIMPLE:
5111 case STAR:
5112 case PLUS:
5113 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005114 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005115
5116 /*
5117 * Lookahead to avoid useless match attempts when we know
5118 * what character comes next.
5119 */
5120 if (OP(next) == EXACTLY)
5121 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005122 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005123 if (ireg_ic)
5124 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005125 if (MB_ISUPPER(rst.nextb))
5126 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005127 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005128 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005129 }
5130 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005131 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005132 }
5133 else
5134 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005135 rst.nextb = NUL;
5136 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005137 }
5138 if (op != BRACE_SIMPLE)
5139 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005140 rst.minval = (op == STAR) ? 0 : 1;
5141 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005142 }
5143 else
5144 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005145 rst.minval = bl_minval;
5146 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005147 }
5148
5149 /*
5150 * When maxval > minval, try matching as much as possible, up
5151 * to maxval. When maxval < minval, try matching at least the
5152 * minimal number (since the range is backwards, that's also
5153 * maxval!).
5154 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005155 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005156 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005157 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005158 status = RA_FAIL;
5159 break;
5160 }
5161 if (rst.minval <= rst.maxval
5162 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5163 {
5164 /* It could match. Prepare for trying to match what
5165 * follows. The code is below. Parameters are stored in
5166 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005167 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005168 {
5169 EMSG(_(e_maxmempat));
5170 status = RA_FAIL;
5171 }
5172 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005173 status = RA_FAIL;
5174 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005175 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005176 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005177 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005178 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005179 if (rp == NULL)
5180 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005181 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005182 {
5183 *(((regstar_T *)rp) - 1) = rst;
5184 status = RA_BREAK; /* skip the restore bits */
5185 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005186 }
5187 }
5188 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005189 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005190
Bram Moolenaar071d4272004-06-13 20:20:40 +00005191 }
5192 break;
5193
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005194 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005195 case MATCH:
5196 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005197 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005198 if (rp == NULL)
5199 status = RA_FAIL;
5200 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005201 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005202 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005203 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005204 next = OPERAND(scan);
5205 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005206 }
5207 break;
5208
5209 case BEHIND:
5210 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005211 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005212 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005213 {
5214 EMSG(_(e_maxmempat));
5215 status = RA_FAIL;
5216 }
5217 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005218 status = RA_FAIL;
5219 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005220 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005221 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005222 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005223 if (rp == NULL)
5224 status = RA_FAIL;
5225 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005226 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005227 /* Need to save the subexpr to be able to restore them
5228 * when there is a match but we don't use it. */
5229 save_subexpr(((regbehind_T *)rp) - 1);
5230
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005231 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005232 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005233 /* First try if what follows matches. If it does then we
5234 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005235 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005236 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005237 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005238
5239 case BHPOS:
5240 if (REG_MULTI)
5241 {
5242 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5243 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005244 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005245 }
5246 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005247 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005248 break;
5249
5250 case NEWL:
Bram Moolenaar640009d2006-10-17 16:48:26 +00005251 if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
5252 || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005253 status = RA_NOMATCH;
5254 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005255 ADVANCE_REGINPUT();
5256 else
5257 reg_nextline();
5258 break;
5259
5260 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005261 status = RA_MATCH; /* Success! */
5262 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005263
5264 default:
5265 EMSG(_(e_re_corr));
5266#ifdef DEBUG
5267 printf("Illegal op code %d\n", op);
5268#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005269 status = RA_FAIL;
5270 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005271 }
5272 }
5273
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005274 /* If we can't continue sequentially, break the inner loop. */
5275 if (status != RA_CONT)
5276 break;
5277
5278 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005279 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005280
5281 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005282
5283 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005284 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005285 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005286 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005287 while (regstack.ga_len > 0 && status != RA_FAIL)
5288 {
5289 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5290 switch (rp->rs_state)
5291 {
5292 case RS_NOPEN:
5293 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005294 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005295 break;
5296
5297 case RS_MOPEN:
5298 /* Pop the state. Restore pointers when there is no match. */
5299 if (status == RA_NOMATCH)
5300 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5301 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005302 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005303 break;
5304
5305#ifdef FEAT_SYN_HL
5306 case RS_ZOPEN:
5307 /* Pop the state. Restore pointers when there is no match. */
5308 if (status == RA_NOMATCH)
5309 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5310 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005311 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005312 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005313#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005314
5315 case RS_MCLOSE:
5316 /* Pop the state. Restore pointers when there is no match. */
5317 if (status == RA_NOMATCH)
5318 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5319 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005320 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005321 break;
5322
5323#ifdef FEAT_SYN_HL
5324 case RS_ZCLOSE:
5325 /* Pop the state. Restore pointers when there is no match. */
5326 if (status == RA_NOMATCH)
5327 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5328 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005329 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005330 break;
5331#endif
5332
5333 case RS_BRANCH:
5334 if (status == RA_MATCH)
5335 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005336 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005337 else
5338 {
5339 if (status != RA_BREAK)
5340 {
5341 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005342 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005343 scan = rp->rs_scan;
5344 }
5345 if (scan == NULL || OP(scan) != BRANCH)
5346 {
5347 /* no more branches, didn't find a match */
5348 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005349 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005350 }
5351 else
5352 {
5353 /* Prepare to try a branch. */
5354 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005355 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005356 scan = OPERAND(scan);
5357 }
5358 }
5359 break;
5360
5361 case RS_BRCPLX_MORE:
5362 /* Pop the state. Restore pointers when there is no match. */
5363 if (status == RA_NOMATCH)
5364 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005365 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005366 --brace_count[rp->rs_no]; /* decrement match count */
5367 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005368 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005369 break;
5370
5371 case RS_BRCPLX_LONG:
5372 /* Pop the state. Restore pointers when there is no match. */
5373 if (status == RA_NOMATCH)
5374 {
5375 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005376 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005377 --brace_count[rp->rs_no];
5378 /* continue with the items after "\{}" */
5379 status = RA_CONT;
5380 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005381 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005382 if (status == RA_CONT)
5383 scan = regnext(scan);
5384 break;
5385
5386 case RS_BRCPLX_SHORT:
5387 /* Pop the state. Restore pointers when there is no match. */
5388 if (status == RA_NOMATCH)
5389 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005390 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005391 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005392 if (status == RA_NOMATCH)
5393 {
5394 scan = OPERAND(scan);
5395 status = RA_CONT;
5396 }
5397 break;
5398
5399 case RS_NOMATCH:
5400 /* Pop the state. If the operand matches for NOMATCH or
5401 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5402 * except for SUBPAT, and continue with the next item. */
5403 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5404 status = RA_NOMATCH;
5405 else
5406 {
5407 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005408 if (rp->rs_no != SUBPAT) /* zero-width */
5409 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005410 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005411 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005412 if (status == RA_CONT)
5413 scan = regnext(scan);
5414 break;
5415
5416 case RS_BEHIND1:
5417 if (status == RA_NOMATCH)
5418 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005419 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005420 regstack.ga_len -= sizeof(regbehind_T);
5421 }
5422 else
5423 {
5424 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5425 * the behind part does (not) match before the current
5426 * position in the input. This must be done at every
5427 * position in the input and checking if the match ends at
5428 * the current position. */
5429
5430 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005431 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005432
5433 /* start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005434 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005435 * result, hitting the start of the line or the previous
5436 * line (for multi-line matching).
5437 * Set behind_pos to where the match should end, BHPOS
5438 * will match it. Save the current value. */
5439 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5440 behind_pos = rp->rs_un.regsave;
5441
5442 rp->rs_state = RS_BEHIND2;
5443
Bram Moolenaar582fd852005-03-28 20:58:01 +00005444 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005445 scan = OPERAND(rp->rs_scan);
5446 }
5447 break;
5448
5449 case RS_BEHIND2:
5450 /*
5451 * Looping for BEHIND / NOBEHIND match.
5452 */
5453 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5454 {
5455 /* found a match that ends where "next" started */
5456 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5457 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005458 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5459 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005460 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005461 {
5462 /* But we didn't want a match. Need to restore the
5463 * subexpr, because what follows matched, so they have
5464 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005465 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005466 restore_subexpr(((regbehind_T *)rp) - 1);
5467 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005468 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005469 regstack.ga_len -= sizeof(regbehind_T);
5470 }
5471 else
5472 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005473 /* No match or a match that doesn't end where we want it: Go
5474 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005475 no = OK;
5476 if (REG_MULTI)
5477 {
5478 if (rp->rs_un.regsave.rs_u.pos.col == 0)
5479 {
5480 if (rp->rs_un.regsave.rs_u.pos.lnum
5481 < behind_pos.rs_u.pos.lnum
5482 || reg_getline(
5483 --rp->rs_un.regsave.rs_u.pos.lnum)
5484 == NULL)
5485 no = FAIL;
5486 else
5487 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005488 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005489 rp->rs_un.regsave.rs_u.pos.col =
5490 (colnr_T)STRLEN(regline);
5491 }
5492 }
5493 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005494#ifdef FEAT_MBYTE
5495 if (has_mbyte)
5496 rp->rs_un.regsave.rs_u.pos.col -=
5497 (*mb_head_off)(regline, regline
5498 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
5499 else
5500#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005501 --rp->rs_un.regsave.rs_u.pos.col;
5502 }
5503 else
5504 {
5505 if (rp->rs_un.regsave.rs_u.ptr == regline)
5506 no = FAIL;
5507 else
5508 --rp->rs_un.regsave.rs_u.ptr;
5509 }
5510 if (no == OK)
5511 {
5512 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005513 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005514 scan = OPERAND(rp->rs_scan);
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005515 if (status == RA_MATCH)
5516 {
5517 /* We did match, so subexpr may have been changed,
5518 * need to restore them for the next try. */
5519 status = RA_NOMATCH;
5520 restore_subexpr(((regbehind_T *)rp) - 1);
5521 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005522 }
5523 else
5524 {
5525 /* Can't advance. For NOBEHIND that's a match. */
5526 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5527 if (rp->rs_no == NOBEHIND)
5528 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005529 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5530 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005531 status = RA_MATCH;
5532 }
5533 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005534 {
5535 /* We do want a proper match. Need to restore the
5536 * subexpr if we had a match, because they may have
5537 * been set. */
5538 if (status == RA_MATCH)
5539 {
5540 status = RA_NOMATCH;
5541 restore_subexpr(((regbehind_T *)rp) - 1);
5542 }
5543 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005544 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005545 regstack.ga_len -= sizeof(regbehind_T);
5546 }
5547 }
5548 break;
5549
5550 case RS_STAR_LONG:
5551 case RS_STAR_SHORT:
5552 {
5553 regstar_T *rst = ((regstar_T *)rp) - 1;
5554
5555 if (status == RA_MATCH)
5556 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005557 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005558 regstack.ga_len -= sizeof(regstar_T);
5559 break;
5560 }
5561
5562 /* Tried once already, restore input pointers. */
5563 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005564 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005565
5566 /* Repeat until we found a position where it could match. */
5567 for (;;)
5568 {
5569 if (status != RA_BREAK)
5570 {
5571 /* Tried first position already, advance. */
5572 if (rp->rs_state == RS_STAR_LONG)
5573 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005574 /* Trying for longest match, but couldn't or
5575 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005576 if (--rst->count < rst->minval)
5577 break;
5578 if (reginput == regline)
5579 {
5580 /* backup to last char of previous line */
5581 --reglnum;
5582 regline = reg_getline(reglnum);
5583 /* Just in case regrepeat() didn't count
5584 * right. */
5585 if (regline == NULL)
5586 break;
5587 reginput = regline + STRLEN(regline);
5588 fast_breakcheck();
5589 }
5590 else
5591 mb_ptr_back(regline, reginput);
5592 }
5593 else
5594 {
5595 /* Range is backwards, use shortest match first.
5596 * Careful: maxval and minval are exchanged!
5597 * Couldn't or didn't match: try advancing one
5598 * char. */
5599 if (rst->count == rst->minval
5600 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5601 break;
5602 ++rst->count;
5603 }
5604 if (got_int)
5605 break;
5606 }
5607 else
5608 status = RA_NOMATCH;
5609
5610 /* If it could match, try it. */
5611 if (rst->nextb == NUL || *reginput == rst->nextb
5612 || *reginput == rst->nextb_ic)
5613 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005614 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005615 scan = regnext(rp->rs_scan);
5616 status = RA_CONT;
5617 break;
5618 }
5619 }
5620 if (status != RA_CONT)
5621 {
5622 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005623 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005624 regstack.ga_len -= sizeof(regstar_T);
5625 status = RA_NOMATCH;
5626 }
5627 }
5628 break;
5629 }
5630
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005631 /* If we want to continue the inner loop or didn't pop a state
5632 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005633 if (status == RA_CONT || rp == (regitem_T *)
5634 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5635 break;
5636 }
5637
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005638 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005639 if (status == RA_CONT)
5640 continue;
5641
5642 /*
5643 * If the regstack is empty or something failed we are done.
5644 */
5645 if (regstack.ga_len == 0 || status == RA_FAIL)
5646 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005647 if (scan == NULL)
5648 {
5649 /*
5650 * We get here only if there's trouble -- normally "case END" is
5651 * the terminating point.
5652 */
5653 EMSG(_(e_re_corr));
5654#ifdef DEBUG
5655 printf("Premature EOL\n");
5656#endif
5657 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005658 if (status == RA_FAIL)
5659 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005660 return (status == RA_MATCH);
5661 }
5662
5663 } /* End of loop until the regstack is empty. */
5664
5665 /* NOTREACHED */
5666}
5667
5668/*
5669 * Push an item onto the regstack.
5670 * Returns pointer to new item. Returns NULL when out of memory.
5671 */
5672 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005673regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005674 regstate_T state;
5675 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005676{
5677 regitem_T *rp;
5678
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005679 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005680 {
5681 EMSG(_(e_maxmempat));
5682 return NULL;
5683 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005684 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005685 return NULL;
5686
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005687 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005688 rp->rs_state = state;
5689 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005690
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005691 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005692 return rp;
5693}
5694
5695/*
5696 * Pop an item from the regstack.
5697 */
5698 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005699regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005700 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005701{
5702 regitem_T *rp;
5703
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005704 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005705 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005706
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005707 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005708}
5709
Bram Moolenaar071d4272004-06-13 20:20:40 +00005710/*
5711 * regrepeat - repeatedly match something simple, return how many.
5712 * Advances reginput (and reglnum) to just after the matched chars.
5713 */
5714 static int
5715regrepeat(p, maxcount)
5716 char_u *p;
5717 long maxcount; /* maximum number of matches allowed */
5718{
5719 long count = 0;
5720 char_u *scan;
5721 char_u *opnd;
5722 int mask;
5723 int testval = 0;
5724
5725 scan = reginput; /* Make local copy of reginput for speed. */
5726 opnd = OPERAND(p);
5727 switch (OP(p))
5728 {
5729 case ANY:
5730 case ANY + ADD_NL:
5731 while (count < maxcount)
5732 {
5733 /* Matching anything means we continue until end-of-line (or
5734 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5735 while (*scan != NUL && count < maxcount)
5736 {
5737 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005738 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005739 }
Bram Moolenaar640009d2006-10-17 16:48:26 +00005740 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5741 || reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005742 break;
5743 ++count; /* count the line-break */
5744 reg_nextline();
5745 scan = reginput;
5746 if (got_int)
5747 break;
5748 }
5749 break;
5750
5751 case IDENT:
5752 case IDENT + ADD_NL:
5753 testval = TRUE;
5754 /*FALLTHROUGH*/
5755 case SIDENT:
5756 case SIDENT + ADD_NL:
5757 while (count < maxcount)
5758 {
5759 if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5760 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005761 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005762 }
5763 else if (*scan == NUL)
5764 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005765 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5766 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005767 break;
5768 reg_nextline();
5769 scan = reginput;
5770 if (got_int)
5771 break;
5772 }
5773 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5774 ++scan;
5775 else
5776 break;
5777 ++count;
5778 }
5779 break;
5780
5781 case KWORD:
5782 case KWORD + ADD_NL:
5783 testval = TRUE;
5784 /*FALLTHROUGH*/
5785 case SKWORD:
5786 case SKWORD + ADD_NL:
5787 while (count < maxcount)
5788 {
Bram Moolenaarf813a182013-01-30 13:59:37 +01005789 if (vim_iswordp_buf(scan, reg_buf)
5790 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005791 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005792 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005793 }
5794 else if (*scan == NUL)
5795 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005796 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5797 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005798 break;
5799 reg_nextline();
5800 scan = reginput;
5801 if (got_int)
5802 break;
5803 }
5804 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5805 ++scan;
5806 else
5807 break;
5808 ++count;
5809 }
5810 break;
5811
5812 case FNAME:
5813 case FNAME + ADD_NL:
5814 testval = TRUE;
5815 /*FALLTHROUGH*/
5816 case SFNAME:
5817 case SFNAME + ADD_NL:
5818 while (count < maxcount)
5819 {
5820 if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5821 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005822 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005823 }
5824 else if (*scan == NUL)
5825 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005826 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5827 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005828 break;
5829 reg_nextline();
5830 scan = reginput;
5831 if (got_int)
5832 break;
5833 }
5834 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5835 ++scan;
5836 else
5837 break;
5838 ++count;
5839 }
5840 break;
5841
5842 case PRINT:
5843 case PRINT + ADD_NL:
5844 testval = TRUE;
5845 /*FALLTHROUGH*/
5846 case SPRINT:
5847 case SPRINT + ADD_NL:
5848 while (count < maxcount)
5849 {
5850 if (*scan == NUL)
5851 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005852 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5853 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005854 break;
5855 reg_nextline();
5856 scan = reginput;
5857 if (got_int)
5858 break;
5859 }
5860 else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5861 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005862 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005863 }
5864 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5865 ++scan;
5866 else
5867 break;
5868 ++count;
5869 }
5870 break;
5871
5872 case WHITE:
5873 case WHITE + ADD_NL:
5874 testval = mask = RI_WHITE;
5875do_class:
5876 while (count < maxcount)
5877 {
5878#ifdef FEAT_MBYTE
5879 int l;
5880#endif
5881 if (*scan == NUL)
5882 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005883 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5884 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005885 break;
5886 reg_nextline();
5887 scan = reginput;
5888 if (got_int)
5889 break;
5890 }
5891#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005892 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005893 {
5894 if (testval != 0)
5895 break;
5896 scan += l;
5897 }
5898#endif
5899 else if ((class_tab[*scan] & mask) == testval)
5900 ++scan;
5901 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5902 ++scan;
5903 else
5904 break;
5905 ++count;
5906 }
5907 break;
5908
5909 case NWHITE:
5910 case NWHITE + ADD_NL:
5911 mask = RI_WHITE;
5912 goto do_class;
5913 case DIGIT:
5914 case DIGIT + ADD_NL:
5915 testval = mask = RI_DIGIT;
5916 goto do_class;
5917 case NDIGIT:
5918 case NDIGIT + ADD_NL:
5919 mask = RI_DIGIT;
5920 goto do_class;
5921 case HEX:
5922 case HEX + ADD_NL:
5923 testval = mask = RI_HEX;
5924 goto do_class;
5925 case NHEX:
5926 case NHEX + ADD_NL:
5927 mask = RI_HEX;
5928 goto do_class;
5929 case OCTAL:
5930 case OCTAL + ADD_NL:
5931 testval = mask = RI_OCTAL;
5932 goto do_class;
5933 case NOCTAL:
5934 case NOCTAL + ADD_NL:
5935 mask = RI_OCTAL;
5936 goto do_class;
5937 case WORD:
5938 case WORD + ADD_NL:
5939 testval = mask = RI_WORD;
5940 goto do_class;
5941 case NWORD:
5942 case NWORD + ADD_NL:
5943 mask = RI_WORD;
5944 goto do_class;
5945 case HEAD:
5946 case HEAD + ADD_NL:
5947 testval = mask = RI_HEAD;
5948 goto do_class;
5949 case NHEAD:
5950 case NHEAD + ADD_NL:
5951 mask = RI_HEAD;
5952 goto do_class;
5953 case ALPHA:
5954 case ALPHA + ADD_NL:
5955 testval = mask = RI_ALPHA;
5956 goto do_class;
5957 case NALPHA:
5958 case NALPHA + ADD_NL:
5959 mask = RI_ALPHA;
5960 goto do_class;
5961 case LOWER:
5962 case LOWER + ADD_NL:
5963 testval = mask = RI_LOWER;
5964 goto do_class;
5965 case NLOWER:
5966 case NLOWER + ADD_NL:
5967 mask = RI_LOWER;
5968 goto do_class;
5969 case UPPER:
5970 case UPPER + ADD_NL:
5971 testval = mask = RI_UPPER;
5972 goto do_class;
5973 case NUPPER:
5974 case NUPPER + ADD_NL:
5975 mask = RI_UPPER;
5976 goto do_class;
5977
5978 case EXACTLY:
5979 {
5980 int cu, cl;
5981
5982 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005983 * would have been used for it. It does handle single-byte
5984 * characters, such as latin1. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005985 if (ireg_ic)
5986 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005987 cu = MB_TOUPPER(*opnd);
5988 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005989 while (count < maxcount && (*scan == cu || *scan == cl))
5990 {
5991 count++;
5992 scan++;
5993 }
5994 }
5995 else
5996 {
5997 cu = *opnd;
5998 while (count < maxcount && *scan == cu)
5999 {
6000 count++;
6001 scan++;
6002 }
6003 }
6004 break;
6005 }
6006
6007#ifdef FEAT_MBYTE
6008 case MULTIBYTECODE:
6009 {
6010 int i, len, cf = 0;
6011
6012 /* Safety check (just in case 'encoding' was changed since
6013 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006014 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006015 {
6016 if (ireg_ic && enc_utf8)
6017 cf = utf_fold(utf_ptr2char(opnd));
6018 while (count < maxcount)
6019 {
6020 for (i = 0; i < len; ++i)
6021 if (opnd[i] != scan[i])
6022 break;
6023 if (i < len && (!ireg_ic || !enc_utf8
6024 || utf_fold(utf_ptr2char(scan)) != cf))
6025 break;
6026 scan += len;
6027 ++count;
6028 }
6029 }
6030 }
6031 break;
6032#endif
6033
6034 case ANYOF:
6035 case ANYOF + ADD_NL:
6036 testval = TRUE;
6037 /*FALLTHROUGH*/
6038
6039 case ANYBUT:
6040 case ANYBUT + ADD_NL:
6041 while (count < maxcount)
6042 {
6043#ifdef FEAT_MBYTE
6044 int len;
6045#endif
6046 if (*scan == NUL)
6047 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00006048 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6049 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006050 break;
6051 reg_nextline();
6052 scan = reginput;
6053 if (got_int)
6054 break;
6055 }
6056 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6057 ++scan;
6058#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006059 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006060 {
6061 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6062 break;
6063 scan += len;
6064 }
6065#endif
6066 else
6067 {
6068 if ((cstrchr(opnd, *scan) == NULL) == testval)
6069 break;
6070 ++scan;
6071 }
6072 ++count;
6073 }
6074 break;
6075
6076 case NEWL:
6077 while (count < maxcount
Bram Moolenaar640009d2006-10-17 16:48:26 +00006078 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
6079 && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006080 {
6081 count++;
6082 if (reg_line_lbr)
6083 ADVANCE_REGINPUT();
6084 else
6085 reg_nextline();
6086 scan = reginput;
6087 if (got_int)
6088 break;
6089 }
6090 break;
6091
6092 default: /* Oh dear. Called inappropriately. */
6093 EMSG(_(e_re_corr));
6094#ifdef DEBUG
6095 printf("Called regrepeat with op code %d\n", OP(p));
6096#endif
6097 break;
6098 }
6099
6100 reginput = scan;
6101
6102 return (int)count;
6103}
6104
6105/*
6106 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006107 * Returns NULL when calculating size, when there is no next item and when
6108 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006109 */
6110 static char_u *
6111regnext(p)
6112 char_u *p;
6113{
6114 int offset;
6115
Bram Moolenaard3005802009-11-25 17:21:32 +00006116 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006117 return NULL;
6118
6119 offset = NEXT(p);
6120 if (offset == 0)
6121 return NULL;
6122
Bram Moolenaar582fd852005-03-28 20:58:01 +00006123 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006124 return p - offset;
6125 else
6126 return p + offset;
6127}
6128
6129/*
6130 * Check the regexp program for its magic number.
6131 * Return TRUE if it's wrong.
6132 */
6133 static int
6134prog_magic_wrong()
6135{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006136 regprog_T *prog;
6137
6138 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
6139 if (prog->engine == &nfa_regengine)
6140 /* For NFA matcher we don't check the magic */
6141 return FALSE;
6142
6143 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006144 {
6145 EMSG(_(e_re_corr));
6146 return TRUE;
6147 }
6148 return FALSE;
6149}
6150
6151/*
6152 * Cleanup the subexpressions, if this wasn't done yet.
6153 * This construction is used to clear the subexpressions only when they are
6154 * used (to increase speed).
6155 */
6156 static void
6157cleanup_subexpr()
6158{
6159 if (need_clear_subexpr)
6160 {
6161 if (REG_MULTI)
6162 {
6163 /* Use 0xff to set lnum to -1 */
6164 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6165 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6166 }
6167 else
6168 {
6169 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6170 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
6171 }
6172 need_clear_subexpr = FALSE;
6173 }
6174}
6175
6176#ifdef FEAT_SYN_HL
6177 static void
6178cleanup_zsubexpr()
6179{
6180 if (need_clear_zsubexpr)
6181 {
6182 if (REG_MULTI)
6183 {
6184 /* Use 0xff to set lnum to -1 */
6185 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6186 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6187 }
6188 else
6189 {
6190 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6191 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6192 }
6193 need_clear_zsubexpr = FALSE;
6194 }
6195}
6196#endif
6197
6198/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006199 * Save the current subexpr to "bp", so that they can be restored
6200 * later by restore_subexpr().
6201 */
6202 static void
6203save_subexpr(bp)
6204 regbehind_T *bp;
6205{
6206 int i;
6207
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006208 /* When "need_clear_subexpr" is set we don't need to save the values, only
6209 * remember that this flag needs to be set again when restoring. */
6210 bp->save_need_clear_subexpr = need_clear_subexpr;
6211 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006212 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006213 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006214 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006215 if (REG_MULTI)
6216 {
6217 bp->save_start[i].se_u.pos = reg_startpos[i];
6218 bp->save_end[i].se_u.pos = reg_endpos[i];
6219 }
6220 else
6221 {
6222 bp->save_start[i].se_u.ptr = reg_startp[i];
6223 bp->save_end[i].se_u.ptr = reg_endp[i];
6224 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006225 }
6226 }
6227}
6228
6229/*
6230 * Restore the subexpr from "bp".
6231 */
6232 static void
6233restore_subexpr(bp)
6234 regbehind_T *bp;
6235{
6236 int i;
6237
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006238 /* Only need to restore saved values when they are not to be cleared. */
6239 need_clear_subexpr = bp->save_need_clear_subexpr;
6240 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006241 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006242 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006243 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006244 if (REG_MULTI)
6245 {
6246 reg_startpos[i] = bp->save_start[i].se_u.pos;
6247 reg_endpos[i] = bp->save_end[i].se_u.pos;
6248 }
6249 else
6250 {
6251 reg_startp[i] = bp->save_start[i].se_u.ptr;
6252 reg_endp[i] = bp->save_end[i].se_u.ptr;
6253 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006254 }
6255 }
6256}
6257
6258/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006259 * Advance reglnum, regline and reginput to the next line.
6260 */
6261 static void
6262reg_nextline()
6263{
6264 regline = reg_getline(++reglnum);
6265 reginput = regline;
6266 fast_breakcheck();
6267}
6268
6269/*
6270 * Save the input line and position in a regsave_T.
6271 */
6272 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006273reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006274 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006275 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006276{
6277 if (REG_MULTI)
6278 {
6279 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6280 save->rs_u.pos.lnum = reglnum;
6281 }
6282 else
6283 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006284 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006285}
6286
6287/*
6288 * Restore the input line and position from a regsave_T.
6289 */
6290 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006291reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006292 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006293 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006294{
6295 if (REG_MULTI)
6296 {
6297 if (reglnum != save->rs_u.pos.lnum)
6298 {
6299 /* only call reg_getline() when the line number changed to save
6300 * a bit of time */
6301 reglnum = save->rs_u.pos.lnum;
6302 regline = reg_getline(reglnum);
6303 }
6304 reginput = regline + save->rs_u.pos.col;
6305 }
6306 else
6307 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006308 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006309}
6310
6311/*
6312 * Return TRUE if current position is equal to saved position.
6313 */
6314 static int
6315reg_save_equal(save)
6316 regsave_T *save;
6317{
6318 if (REG_MULTI)
6319 return reglnum == save->rs_u.pos.lnum
6320 && reginput == regline + save->rs_u.pos.col;
6321 return reginput == save->rs_u.ptr;
6322}
6323
6324/*
6325 * Tentatively set the sub-expression start to the current position (after
6326 * calling regmatch() they will have changed). Need to save the existing
6327 * values for when there is no match.
6328 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6329 * depending on REG_MULTI.
6330 */
6331 static void
6332save_se_multi(savep, posp)
6333 save_se_T *savep;
6334 lpos_T *posp;
6335{
6336 savep->se_u.pos = *posp;
6337 posp->lnum = reglnum;
6338 posp->col = (colnr_T)(reginput - regline);
6339}
6340
6341 static void
6342save_se_one(savep, pp)
6343 save_se_T *savep;
6344 char_u **pp;
6345{
6346 savep->se_u.ptr = *pp;
6347 *pp = reginput;
6348}
6349
6350/*
6351 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6352 */
6353 static int
6354re_num_cmp(val, scan)
6355 long_u val;
6356 char_u *scan;
6357{
6358 long_u n = OPERAND_MIN(scan);
6359
6360 if (OPERAND_CMP(scan) == '>')
6361 return val > n;
6362 if (OPERAND_CMP(scan) == '<')
6363 return val < n;
6364 return val == n;
6365}
6366
6367
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006368#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006369
6370/*
6371 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6372 */
6373 static void
6374regdump(pattern, r)
6375 char_u *pattern;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006376 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006377{
6378 char_u *s;
6379 int op = EXACTLY; /* Arbitrary non-END op. */
6380 char_u *next;
6381 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006382 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006383
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006384#ifdef BT_REGEXP_LOG
6385 f = fopen("bt_regexp_log.log", "a");
6386#else
6387 f = stdout;
6388#endif
6389 if (f == NULL)
6390 return;
6391 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006392
6393 s = r->program + 1;
6394 /*
6395 * Loop until we find the END that isn't before a referred next (an END
6396 * can also appear in a NOMATCH operand).
6397 */
6398 while (op != END || s <= end)
6399 {
6400 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006401 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006402 next = regnext(s);
6403 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006404 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006405 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006406 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006407 if (end < next)
6408 end = next;
6409 if (op == BRACE_LIMITS)
6410 {
6411 /* Two short ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006412 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006413 s += 8;
6414 }
6415 s += 3;
6416 if (op == ANYOF || op == ANYOF + ADD_NL
6417 || op == ANYBUT || op == ANYBUT + ADD_NL
6418 || op == EXACTLY)
6419 {
6420 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006421 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006422 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006423 fprintf(f, "%c", *s++);
6424 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006425 s++;
6426 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006427 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006428 }
6429
6430 /* Header fields of interest. */
6431 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006432 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006433 ? (char *)transchar(r->regstart)
6434 : "multibyte", r->regstart);
6435 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006436 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006437 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006438 fprintf(f, "must have \"%s\"", r->regmust);
6439 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006440
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006441#ifdef BT_REGEXP_LOG
6442 fclose(f);
6443#endif
6444}
6445#endif /* BT_REGEXP_DUMP */
6446
6447#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006448/*
6449 * regprop - printable representation of opcode
6450 */
6451 static char_u *
6452regprop(op)
6453 char_u *op;
6454{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006455 char *p;
6456 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006457
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006458 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006459
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006460 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006461 {
6462 case BOL:
6463 p = "BOL";
6464 break;
6465 case EOL:
6466 p = "EOL";
6467 break;
6468 case RE_BOF:
6469 p = "BOF";
6470 break;
6471 case RE_EOF:
6472 p = "EOF";
6473 break;
6474 case CURSOR:
6475 p = "CURSOR";
6476 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006477 case RE_VISUAL:
6478 p = "RE_VISUAL";
6479 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006480 case RE_LNUM:
6481 p = "RE_LNUM";
6482 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006483 case RE_MARK:
6484 p = "RE_MARK";
6485 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006486 case RE_COL:
6487 p = "RE_COL";
6488 break;
6489 case RE_VCOL:
6490 p = "RE_VCOL";
6491 break;
6492 case BOW:
6493 p = "BOW";
6494 break;
6495 case EOW:
6496 p = "EOW";
6497 break;
6498 case ANY:
6499 p = "ANY";
6500 break;
6501 case ANY + ADD_NL:
6502 p = "ANY+NL";
6503 break;
6504 case ANYOF:
6505 p = "ANYOF";
6506 break;
6507 case ANYOF + ADD_NL:
6508 p = "ANYOF+NL";
6509 break;
6510 case ANYBUT:
6511 p = "ANYBUT";
6512 break;
6513 case ANYBUT + ADD_NL:
6514 p = "ANYBUT+NL";
6515 break;
6516 case IDENT:
6517 p = "IDENT";
6518 break;
6519 case IDENT + ADD_NL:
6520 p = "IDENT+NL";
6521 break;
6522 case SIDENT:
6523 p = "SIDENT";
6524 break;
6525 case SIDENT + ADD_NL:
6526 p = "SIDENT+NL";
6527 break;
6528 case KWORD:
6529 p = "KWORD";
6530 break;
6531 case KWORD + ADD_NL:
6532 p = "KWORD+NL";
6533 break;
6534 case SKWORD:
6535 p = "SKWORD";
6536 break;
6537 case SKWORD + ADD_NL:
6538 p = "SKWORD+NL";
6539 break;
6540 case FNAME:
6541 p = "FNAME";
6542 break;
6543 case FNAME + ADD_NL:
6544 p = "FNAME+NL";
6545 break;
6546 case SFNAME:
6547 p = "SFNAME";
6548 break;
6549 case SFNAME + ADD_NL:
6550 p = "SFNAME+NL";
6551 break;
6552 case PRINT:
6553 p = "PRINT";
6554 break;
6555 case PRINT + ADD_NL:
6556 p = "PRINT+NL";
6557 break;
6558 case SPRINT:
6559 p = "SPRINT";
6560 break;
6561 case SPRINT + ADD_NL:
6562 p = "SPRINT+NL";
6563 break;
6564 case WHITE:
6565 p = "WHITE";
6566 break;
6567 case WHITE + ADD_NL:
6568 p = "WHITE+NL";
6569 break;
6570 case NWHITE:
6571 p = "NWHITE";
6572 break;
6573 case NWHITE + ADD_NL:
6574 p = "NWHITE+NL";
6575 break;
6576 case DIGIT:
6577 p = "DIGIT";
6578 break;
6579 case DIGIT + ADD_NL:
6580 p = "DIGIT+NL";
6581 break;
6582 case NDIGIT:
6583 p = "NDIGIT";
6584 break;
6585 case NDIGIT + ADD_NL:
6586 p = "NDIGIT+NL";
6587 break;
6588 case HEX:
6589 p = "HEX";
6590 break;
6591 case HEX + ADD_NL:
6592 p = "HEX+NL";
6593 break;
6594 case NHEX:
6595 p = "NHEX";
6596 break;
6597 case NHEX + ADD_NL:
6598 p = "NHEX+NL";
6599 break;
6600 case OCTAL:
6601 p = "OCTAL";
6602 break;
6603 case OCTAL + ADD_NL:
6604 p = "OCTAL+NL";
6605 break;
6606 case NOCTAL:
6607 p = "NOCTAL";
6608 break;
6609 case NOCTAL + ADD_NL:
6610 p = "NOCTAL+NL";
6611 break;
6612 case WORD:
6613 p = "WORD";
6614 break;
6615 case WORD + ADD_NL:
6616 p = "WORD+NL";
6617 break;
6618 case NWORD:
6619 p = "NWORD";
6620 break;
6621 case NWORD + ADD_NL:
6622 p = "NWORD+NL";
6623 break;
6624 case HEAD:
6625 p = "HEAD";
6626 break;
6627 case HEAD + ADD_NL:
6628 p = "HEAD+NL";
6629 break;
6630 case NHEAD:
6631 p = "NHEAD";
6632 break;
6633 case NHEAD + ADD_NL:
6634 p = "NHEAD+NL";
6635 break;
6636 case ALPHA:
6637 p = "ALPHA";
6638 break;
6639 case ALPHA + ADD_NL:
6640 p = "ALPHA+NL";
6641 break;
6642 case NALPHA:
6643 p = "NALPHA";
6644 break;
6645 case NALPHA + ADD_NL:
6646 p = "NALPHA+NL";
6647 break;
6648 case LOWER:
6649 p = "LOWER";
6650 break;
6651 case LOWER + ADD_NL:
6652 p = "LOWER+NL";
6653 break;
6654 case NLOWER:
6655 p = "NLOWER";
6656 break;
6657 case NLOWER + ADD_NL:
6658 p = "NLOWER+NL";
6659 break;
6660 case UPPER:
6661 p = "UPPER";
6662 break;
6663 case UPPER + ADD_NL:
6664 p = "UPPER+NL";
6665 break;
6666 case NUPPER:
6667 p = "NUPPER";
6668 break;
6669 case NUPPER + ADD_NL:
6670 p = "NUPPER+NL";
6671 break;
6672 case BRANCH:
6673 p = "BRANCH";
6674 break;
6675 case EXACTLY:
6676 p = "EXACTLY";
6677 break;
6678 case NOTHING:
6679 p = "NOTHING";
6680 break;
6681 case BACK:
6682 p = "BACK";
6683 break;
6684 case END:
6685 p = "END";
6686 break;
6687 case MOPEN + 0:
6688 p = "MATCH START";
6689 break;
6690 case MOPEN + 1:
6691 case MOPEN + 2:
6692 case MOPEN + 3:
6693 case MOPEN + 4:
6694 case MOPEN + 5:
6695 case MOPEN + 6:
6696 case MOPEN + 7:
6697 case MOPEN + 8:
6698 case MOPEN + 9:
6699 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6700 p = NULL;
6701 break;
6702 case MCLOSE + 0:
6703 p = "MATCH END";
6704 break;
6705 case MCLOSE + 1:
6706 case MCLOSE + 2:
6707 case MCLOSE + 3:
6708 case MCLOSE + 4:
6709 case MCLOSE + 5:
6710 case MCLOSE + 6:
6711 case MCLOSE + 7:
6712 case MCLOSE + 8:
6713 case MCLOSE + 9:
6714 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6715 p = NULL;
6716 break;
6717 case BACKREF + 1:
6718 case BACKREF + 2:
6719 case BACKREF + 3:
6720 case BACKREF + 4:
6721 case BACKREF + 5:
6722 case BACKREF + 6:
6723 case BACKREF + 7:
6724 case BACKREF + 8:
6725 case BACKREF + 9:
6726 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6727 p = NULL;
6728 break;
6729 case NOPEN:
6730 p = "NOPEN";
6731 break;
6732 case NCLOSE:
6733 p = "NCLOSE";
6734 break;
6735#ifdef FEAT_SYN_HL
6736 case ZOPEN + 1:
6737 case ZOPEN + 2:
6738 case ZOPEN + 3:
6739 case ZOPEN + 4:
6740 case ZOPEN + 5:
6741 case ZOPEN + 6:
6742 case ZOPEN + 7:
6743 case ZOPEN + 8:
6744 case ZOPEN + 9:
6745 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6746 p = NULL;
6747 break;
6748 case ZCLOSE + 1:
6749 case ZCLOSE + 2:
6750 case ZCLOSE + 3:
6751 case ZCLOSE + 4:
6752 case ZCLOSE + 5:
6753 case ZCLOSE + 6:
6754 case ZCLOSE + 7:
6755 case ZCLOSE + 8:
6756 case ZCLOSE + 9:
6757 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6758 p = NULL;
6759 break;
6760 case ZREF + 1:
6761 case ZREF + 2:
6762 case ZREF + 3:
6763 case ZREF + 4:
6764 case ZREF + 5:
6765 case ZREF + 6:
6766 case ZREF + 7:
6767 case ZREF + 8:
6768 case ZREF + 9:
6769 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6770 p = NULL;
6771 break;
6772#endif
6773 case STAR:
6774 p = "STAR";
6775 break;
6776 case PLUS:
6777 p = "PLUS";
6778 break;
6779 case NOMATCH:
6780 p = "NOMATCH";
6781 break;
6782 case MATCH:
6783 p = "MATCH";
6784 break;
6785 case BEHIND:
6786 p = "BEHIND";
6787 break;
6788 case NOBEHIND:
6789 p = "NOBEHIND";
6790 break;
6791 case SUBPAT:
6792 p = "SUBPAT";
6793 break;
6794 case BRACE_LIMITS:
6795 p = "BRACE_LIMITS";
6796 break;
6797 case BRACE_SIMPLE:
6798 p = "BRACE_SIMPLE";
6799 break;
6800 case BRACE_COMPLEX + 0:
6801 case BRACE_COMPLEX + 1:
6802 case BRACE_COMPLEX + 2:
6803 case BRACE_COMPLEX + 3:
6804 case BRACE_COMPLEX + 4:
6805 case BRACE_COMPLEX + 5:
6806 case BRACE_COMPLEX + 6:
6807 case BRACE_COMPLEX + 7:
6808 case BRACE_COMPLEX + 8:
6809 case BRACE_COMPLEX + 9:
6810 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6811 p = NULL;
6812 break;
6813#ifdef FEAT_MBYTE
6814 case MULTIBYTECODE:
6815 p = "MULTIBYTECODE";
6816 break;
6817#endif
6818 case NEWL:
6819 p = "NEWL";
6820 break;
6821 default:
6822 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6823 p = NULL;
6824 break;
6825 }
6826 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006827 STRCAT(buf, p);
6828 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006829}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006830#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006831
6832#ifdef FEAT_MBYTE
6833static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6834
6835typedef struct
6836{
6837 int a, b, c;
6838} decomp_T;
6839
6840
6841/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006842static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006843{
6844 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6845 {0x5d0,0,0}, /* 0xfb21 alt alef */
6846 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6847 {0x5d4,0,0}, /* 0xfb23 alt he */
6848 {0x5db,0,0}, /* 0xfb24 alt kaf */
6849 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6850 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6851 {0x5e8,0,0}, /* 0xfb27 alt resh */
6852 {0x5ea,0,0}, /* 0xfb28 alt tav */
6853 {'+', 0, 0}, /* 0xfb29 alt plus */
6854 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6855 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6856 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6857 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6858 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6859 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6860 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6861 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6862 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6863 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6864 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6865 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6866 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
6867 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
6868 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
6869 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
6870 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
6871 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
6872 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
6873 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
6874 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
6875 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
6876 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
6877 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
6878 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
6879 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
6880 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
6881 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
6882 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
6883 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
6884 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
6885 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
6886 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
6887 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
6888 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
6889 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
6890 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
6891 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
6892};
6893
6894 static void
6895mb_decompose(c, c1, c2, c3)
6896 int c, *c1, *c2, *c3;
6897{
6898 decomp_T d;
6899
6900 if (c >= 0x4b20 && c <= 0xfb4f)
6901 {
6902 d = decomp_table[c - 0xfb20];
6903 *c1 = d.a;
6904 *c2 = d.b;
6905 *c3 = d.c;
6906 }
6907 else
6908 {
6909 *c1 = c;
6910 *c2 = *c3 = 0;
6911 }
6912}
6913#endif
6914
6915/*
6916 * Compare two strings, ignore case if ireg_ic set.
6917 * Return 0 if strings match, non-zero otherwise.
6918 * Correct the length "*n" when composing characters are ignored.
6919 */
6920 static int
6921cstrncmp(s1, s2, n)
6922 char_u *s1, *s2;
6923 int *n;
6924{
6925 int result;
6926
6927 if (!ireg_ic)
6928 result = STRNCMP(s1, s2, *n);
6929 else
6930 result = MB_STRNICMP(s1, s2, *n);
6931
6932#ifdef FEAT_MBYTE
6933 /* if it failed and it's utf8 and we want to combineignore: */
6934 if (result != 0 && enc_utf8 && ireg_icombine)
6935 {
6936 char_u *str1, *str2;
6937 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006938 int junk;
6939
6940 /* we have to handle the strcmp ourselves, since it is necessary to
6941 * deal with the composing characters by ignoring them: */
6942 str1 = s1;
6943 str2 = s2;
6944 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00006945 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006946 {
6947 c1 = mb_ptr2char_adv(&str1);
6948 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006949
6950 /* decompose the character if necessary, into 'base' characters
6951 * because I don't care about Arabic, I will hard-code the Hebrew
6952 * which I *do* care about! So sue me... */
6953 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6954 {
6955 /* decomposition necessary? */
6956 mb_decompose(c1, &c11, &junk, &junk);
6957 mb_decompose(c2, &c12, &junk, &junk);
6958 c1 = c11;
6959 c2 = c12;
6960 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6961 break;
6962 }
6963 }
6964 result = c2 - c1;
6965 if (result == 0)
6966 *n = (int)(str2 - s2);
6967 }
6968#endif
6969
6970 return result;
6971}
6972
6973/*
6974 * cstrchr: This function is used a lot for simple searches, keep it fast!
6975 */
6976 static char_u *
6977cstrchr(s, c)
6978 char_u *s;
6979 int c;
6980{
6981 char_u *p;
6982 int cc;
6983
6984 if (!ireg_ic
6985#ifdef FEAT_MBYTE
6986 || (!enc_utf8 && mb_char2len(c) > 1)
6987#endif
6988 )
6989 return vim_strchr(s, c);
6990
6991 /* tolower() and toupper() can be slow, comparing twice should be a lot
6992 * faster (esp. when using MS Visual C++!).
6993 * For UTF-8 need to use folded case. */
6994#ifdef FEAT_MBYTE
6995 if (enc_utf8 && c > 0x80)
6996 cc = utf_fold(c);
6997 else
6998#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006999 if (MB_ISUPPER(c))
7000 cc = MB_TOLOWER(c);
7001 else if (MB_ISLOWER(c))
7002 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007003 else
7004 return vim_strchr(s, c);
7005
7006#ifdef FEAT_MBYTE
7007 if (has_mbyte)
7008 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007009 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007010 {
7011 if (enc_utf8 && c > 0x80)
7012 {
7013 if (utf_fold(utf_ptr2char(p)) == cc)
7014 return p;
7015 }
7016 else if (*p == c || *p == cc)
7017 return p;
7018 }
7019 }
7020 else
7021#endif
7022 /* Faster version for when there are no multi-byte characters. */
7023 for (p = s; *p != NUL; ++p)
7024 if (*p == c || *p == cc)
7025 return p;
7026
7027 return NULL;
7028}
7029
7030/***************************************************************
7031 * regsub stuff *
7032 ***************************************************************/
7033
7034/* This stuff below really confuses cc on an SGI -- webb */
7035#ifdef __sgi
7036# undef __ARGS
7037# define __ARGS(x) ()
7038#endif
7039
7040/*
7041 * We should define ftpr as a pointer to a function returning a pointer to
7042 * a function returning a pointer to a function ...
7043 * This is impossible, so we declare a pointer to a function returning a
7044 * pointer to a function returning void. This should work for all compilers.
7045 */
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007046typedef void (*(*fptr_T) __ARGS((int *, int)))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007047
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007048static fptr_T do_upper __ARGS((int *, int));
7049static fptr_T do_Upper __ARGS((int *, int));
7050static fptr_T do_lower __ARGS((int *, int));
7051static fptr_T do_Lower __ARGS((int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007052
7053static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
7054
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007055 static fptr_T
Bram Moolenaar071d4272004-06-13 20:20:40 +00007056do_upper(d, c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007057 int *d;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007058 int c;
7059{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007060 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007061
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007062 return (fptr_T)NULL;
7063}
7064
7065 static fptr_T
7066do_Upper(d, c)
7067 int *d;
7068 int c;
7069{
7070 *d = MB_TOUPPER(c);
7071
7072 return (fptr_T)do_Upper;
7073}
7074
7075 static fptr_T
7076do_lower(d, c)
7077 int *d;
7078 int c;
7079{
7080 *d = MB_TOLOWER(c);
7081
7082 return (fptr_T)NULL;
7083}
7084
7085 static fptr_T
7086do_Lower(d, c)
7087 int *d;
7088 int c;
7089{
7090 *d = MB_TOLOWER(c);
7091
7092 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007093}
7094
7095/*
7096 * regtilde(): Replace tildes in the pattern by the old pattern.
7097 *
7098 * Short explanation of the tilde: It stands for the previous replacement
7099 * pattern. If that previous pattern also contains a ~ we should go back a
7100 * step further... But we insert the previous pattern into the current one
7101 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007102 * This still does not handle the case where "magic" changes. So require the
7103 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007104 *
7105 * The tildes are parsed once before the first call to vim_regsub().
7106 */
7107 char_u *
7108regtilde(source, magic)
7109 char_u *source;
7110 int magic;
7111{
7112 char_u *newsub = source;
7113 char_u *tmpsub;
7114 char_u *p;
7115 int len;
7116 int prevlen;
7117
7118 for (p = newsub; *p; ++p)
7119 {
7120 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7121 {
7122 if (reg_prev_sub != NULL)
7123 {
7124 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7125 prevlen = (int)STRLEN(reg_prev_sub);
7126 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7127 if (tmpsub != NULL)
7128 {
7129 /* copy prefix */
7130 len = (int)(p - newsub); /* not including ~ */
7131 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007132 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007133 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7134 /* copy postfix */
7135 if (!magic)
7136 ++p; /* back off \ */
7137 STRCPY(tmpsub + len + prevlen, p + 1);
7138
7139 if (newsub != source) /* already allocated newsub */
7140 vim_free(newsub);
7141 newsub = tmpsub;
7142 p = newsub + len + prevlen;
7143 }
7144 }
7145 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007146 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007147 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007148 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007149 --p;
7150 }
7151 else
7152 {
7153 if (*p == '\\' && p[1]) /* skip escaped characters */
7154 ++p;
7155#ifdef FEAT_MBYTE
7156 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007157 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007158#endif
7159 }
7160 }
7161
7162 vim_free(reg_prev_sub);
7163 if (newsub != source) /* newsub was allocated, just keep it */
7164 reg_prev_sub = newsub;
7165 else /* no ~ found, need to save newsub */
7166 reg_prev_sub = vim_strsave(newsub);
7167 return newsub;
7168}
7169
7170#ifdef FEAT_EVAL
7171static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7172
7173/* These pointers are used instead of reg_match and reg_mmatch for
7174 * reg_submatch(). Needed for when the substitution string is an expression
7175 * that contains a call to substitute() and submatch(). */
7176static regmatch_T *submatch_match;
7177static regmmatch_T *submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007178static linenr_T submatch_firstlnum;
7179static linenr_T submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007180static int submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007181#endif
7182
7183#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
7184/*
7185 * vim_regsub() - perform substitutions after a vim_regexec() or
7186 * vim_regexec_multi() match.
7187 *
7188 * If "copy" is TRUE really copy into "dest".
7189 * If "copy" is FALSE nothing is copied, this is just to find out the length
7190 * of the result.
7191 *
7192 * If "backslash" is TRUE, a backslash will be removed later, need to double
7193 * them to keep them, and insert a backslash before a CR to avoid it being
7194 * replaced with a line break later.
7195 *
7196 * Note: The matched text must not change between the call of
7197 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7198 * references invalid!
7199 *
7200 * Returns the size of the replacement, including terminating NUL.
7201 */
7202 int
7203vim_regsub(rmp, source, dest, copy, magic, backslash)
7204 regmatch_T *rmp;
7205 char_u *source;
7206 char_u *dest;
7207 int copy;
7208 int magic;
7209 int backslash;
7210{
7211 reg_match = rmp;
7212 reg_mmatch = NULL;
7213 reg_maxline = 0;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01007214 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007215 return vim_regsub_both(source, dest, copy, magic, backslash);
7216}
7217#endif
7218
7219 int
7220vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
7221 regmmatch_T *rmp;
7222 linenr_T lnum;
7223 char_u *source;
7224 char_u *dest;
7225 int copy;
7226 int magic;
7227 int backslash;
7228{
7229 reg_match = NULL;
7230 reg_mmatch = rmp;
7231 reg_buf = curbuf; /* always works on the current buffer! */
7232 reg_firstlnum = lnum;
7233 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7234 return vim_regsub_both(source, dest, copy, magic, backslash);
7235}
7236
7237 static int
7238vim_regsub_both(source, dest, copy, magic, backslash)
7239 char_u *source;
7240 char_u *dest;
7241 int copy;
7242 int magic;
7243 int backslash;
7244{
7245 char_u *src;
7246 char_u *dst;
7247 char_u *s;
7248 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007249 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007250 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007251 fptr_T func_all = (fptr_T)NULL;
7252 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007253 linenr_T clnum = 0; /* init for GCC */
7254 int len = 0; /* init for GCC */
7255#ifdef FEAT_EVAL
7256 static char_u *eval_result = NULL;
7257#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007258
7259 /* Be paranoid... */
7260 if (source == NULL || dest == NULL)
7261 {
7262 EMSG(_(e_null));
7263 return 0;
7264 }
7265 if (prog_magic_wrong())
7266 return 0;
7267 src = source;
7268 dst = dest;
7269
7270 /*
7271 * When the substitute part starts with "\=" evaluate it as an expression.
7272 */
7273 if (source[0] == '\\' && source[1] == '='
7274#ifdef FEAT_EVAL
7275 && !can_f_submatch /* can't do this recursively */
7276#endif
7277 )
7278 {
7279#ifdef FEAT_EVAL
7280 /* To make sure that the length doesn't change between checking the
7281 * length and copying the string, and to speed up things, the
7282 * resulting string is saved from the call with "copy" == FALSE to the
7283 * call with "copy" == TRUE. */
7284 if (copy)
7285 {
7286 if (eval_result != NULL)
7287 {
7288 STRCPY(dest, eval_result);
7289 dst += STRLEN(eval_result);
7290 vim_free(eval_result);
7291 eval_result = NULL;
7292 }
7293 }
7294 else
7295 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007296 win_T *save_reg_win;
7297 int save_ireg_ic;
7298
7299 vim_free(eval_result);
7300
7301 /* The expression may contain substitute(), which calls us
7302 * recursively. Make sure submatch() gets the text from the first
7303 * level. Don't need to save "reg_buf", because
7304 * vim_regexec_multi() can't be called recursively. */
7305 submatch_match = reg_match;
7306 submatch_mmatch = reg_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007307 submatch_firstlnum = reg_firstlnum;
7308 submatch_maxline = reg_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007309 submatch_line_lbr = reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007310 save_reg_win = reg_win;
7311 save_ireg_ic = ireg_ic;
7312 can_f_submatch = TRUE;
7313
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007314 eval_result = eval_to_string(source + 2, NULL, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007315 if (eval_result != NULL)
7316 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007317 int had_backslash = FALSE;
7318
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00007319 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007320 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007321 /* Change NL to CR, so that it becomes a line break,
7322 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007323 * Skip over a backslashed character. */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007324 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007325 *s = CAR;
7326 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007327 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007328 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007329 /* Change NL to CR here too, so that this works:
7330 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7331 * abc\
7332 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007333 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007334 */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007335 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007336 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007337 had_backslash = TRUE;
7338 }
7339 }
7340 if (had_backslash && backslash)
7341 {
7342 /* Backslashes will be consumed, need to double them. */
7343 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7344 if (s != NULL)
7345 {
7346 vim_free(eval_result);
7347 eval_result = s;
7348 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007349 }
7350
7351 dst += STRLEN(eval_result);
7352 }
7353
7354 reg_match = submatch_match;
7355 reg_mmatch = submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007356 reg_firstlnum = submatch_firstlnum;
7357 reg_maxline = submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007358 reg_line_lbr = submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007359 reg_win = save_reg_win;
7360 ireg_ic = save_ireg_ic;
7361 can_f_submatch = FALSE;
7362 }
7363#endif
7364 }
7365 else
7366 while ((c = *src++) != NUL)
7367 {
7368 if (c == '&' && magic)
7369 no = 0;
7370 else if (c == '\\' && *src != NUL)
7371 {
7372 if (*src == '&' && !magic)
7373 {
7374 ++src;
7375 no = 0;
7376 }
7377 else if ('0' <= *src && *src <= '9')
7378 {
7379 no = *src++ - '0';
7380 }
7381 else if (vim_strchr((char_u *)"uUlLeE", *src))
7382 {
7383 switch (*src++)
7384 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007385 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007386 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007387 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007388 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007389 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007390 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007391 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007392 continue;
7393 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007394 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007395 continue;
7396 }
7397 }
7398 }
7399 if (no < 0) /* Ordinary character. */
7400 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007401 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7402 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007403 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007404 if (copy)
7405 {
7406 *dst++ = c;
7407 *dst++ = *src++;
7408 *dst++ = *src++;
7409 }
7410 else
7411 {
7412 dst += 3;
7413 src += 2;
7414 }
7415 continue;
7416 }
7417
Bram Moolenaar071d4272004-06-13 20:20:40 +00007418 if (c == '\\' && *src != NUL)
7419 {
7420 /* Check for abbreviations -- webb */
7421 switch (*src)
7422 {
7423 case 'r': c = CAR; ++src; break;
7424 case 'n': c = NL; ++src; break;
7425 case 't': c = TAB; ++src; break;
7426 /* Oh no! \e already has meaning in subst pat :-( */
7427 /* case 'e': c = ESC; ++src; break; */
7428 case 'b': c = Ctrl_H; ++src; break;
7429
7430 /* If "backslash" is TRUE the backslash will be removed
7431 * later. Used to insert a literal CR. */
7432 default: if (backslash)
7433 {
7434 if (copy)
7435 *dst = '\\';
7436 ++dst;
7437 }
7438 c = *src++;
7439 }
7440 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007441#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007442 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007443 c = mb_ptr2char(src - 1);
7444#endif
7445
Bram Moolenaardb552d602006-03-23 22:59:57 +00007446 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007447 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007448 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007449 func_one = (fptr_T)(func_one(&cc, c));
7450 else if (func_all != (fptr_T)NULL)
7451 /* Turbo C complains without the typecast */
7452 func_all = (fptr_T)(func_all(&cc, c));
7453 else /* just copy */
7454 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007455
7456#ifdef FEAT_MBYTE
7457 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007458 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007459 int totlen = mb_ptr2len(src - 1);
7460
Bram Moolenaar071d4272004-06-13 20:20:40 +00007461 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007462 mb_char2bytes(cc, dst);
7463 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007464 if (enc_utf8)
7465 {
7466 int clen = utf_ptr2len(src - 1);
7467
7468 /* If the character length is shorter than "totlen", there
7469 * are composing characters; copy them as-is. */
7470 if (clen < totlen)
7471 {
7472 if (copy)
7473 mch_memmove(dst + 1, src - 1 + clen,
7474 (size_t)(totlen - clen));
7475 dst += totlen - clen;
7476 }
7477 }
7478 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007479 }
7480 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007481#endif
7482 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007483 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007484 dst++;
7485 }
7486 else
7487 {
7488 if (REG_MULTI)
7489 {
7490 clnum = reg_mmatch->startpos[no].lnum;
7491 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7492 s = NULL;
7493 else
7494 {
7495 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7496 if (reg_mmatch->endpos[no].lnum == clnum)
7497 len = reg_mmatch->endpos[no].col
7498 - reg_mmatch->startpos[no].col;
7499 else
7500 len = (int)STRLEN(s);
7501 }
7502 }
7503 else
7504 {
7505 s = reg_match->startp[no];
7506 if (reg_match->endp[no] == NULL)
7507 s = NULL;
7508 else
7509 len = (int)(reg_match->endp[no] - s);
7510 }
7511 if (s != NULL)
7512 {
7513 for (;;)
7514 {
7515 if (len == 0)
7516 {
7517 if (REG_MULTI)
7518 {
7519 if (reg_mmatch->endpos[no].lnum == clnum)
7520 break;
7521 if (copy)
7522 *dst = CAR;
7523 ++dst;
7524 s = reg_getline(++clnum);
7525 if (reg_mmatch->endpos[no].lnum == clnum)
7526 len = reg_mmatch->endpos[no].col;
7527 else
7528 len = (int)STRLEN(s);
7529 }
7530 else
7531 break;
7532 }
7533 else if (*s == NUL) /* we hit NUL. */
7534 {
7535 if (copy)
7536 EMSG(_(e_re_damg));
7537 goto exit;
7538 }
7539 else
7540 {
7541 if (backslash && (*s == CAR || *s == '\\'))
7542 {
7543 /*
7544 * Insert a backslash in front of a CR, otherwise
7545 * it will be replaced by a line break.
7546 * Number of backslashes will be halved later,
7547 * double them here.
7548 */
7549 if (copy)
7550 {
7551 dst[0] = '\\';
7552 dst[1] = *s;
7553 }
7554 dst += 2;
7555 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007556 else
7557 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007558#ifdef FEAT_MBYTE
7559 if (has_mbyte)
7560 c = mb_ptr2char(s);
7561 else
7562#endif
7563 c = *s;
7564
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007565 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007566 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007567 func_one = (fptr_T)(func_one(&cc, c));
7568 else if (func_all != (fptr_T)NULL)
7569 /* Turbo C complains without the typecast */
7570 func_all = (fptr_T)(func_all(&cc, c));
7571 else /* just copy */
7572 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007573
7574#ifdef FEAT_MBYTE
7575 if (has_mbyte)
7576 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007577 int l;
7578
7579 /* Copy composing characters separately, one
7580 * at a time. */
7581 if (enc_utf8)
7582 l = utf_ptr2len(s) - 1;
7583 else
7584 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007585
7586 s += l;
7587 len -= l;
7588 if (copy)
7589 mb_char2bytes(cc, dst);
7590 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007591 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007592 else
7593#endif
7594 if (copy)
7595 *dst = cc;
7596 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007597 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007598
Bram Moolenaar071d4272004-06-13 20:20:40 +00007599 ++s;
7600 --len;
7601 }
7602 }
7603 }
7604 no = -1;
7605 }
7606 }
7607 if (copy)
7608 *dst = NUL;
7609
7610exit:
7611 return (int)((dst - dest) + 1);
7612}
7613
7614#ifdef FEAT_EVAL
Bram Moolenaard32a3192009-11-26 19:40:49 +00007615static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7616
Bram Moolenaar071d4272004-06-13 20:20:40 +00007617/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007618 * Call reg_getline() with the line numbers from the submatch. If a
7619 * substitute() was used the reg_maxline and other values have been
7620 * overwritten.
7621 */
7622 static char_u *
7623reg_getline_submatch(lnum)
7624 linenr_T lnum;
7625{
7626 char_u *s;
7627 linenr_T save_first = reg_firstlnum;
7628 linenr_T save_max = reg_maxline;
7629
7630 reg_firstlnum = submatch_firstlnum;
7631 reg_maxline = submatch_maxline;
7632
7633 s = reg_getline(lnum);
7634
7635 reg_firstlnum = save_first;
7636 reg_maxline = save_max;
7637 return s;
7638}
7639
7640/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007641 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007642 * allocated memory.
7643 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7644 */
7645 char_u *
7646reg_submatch(no)
7647 int no;
7648{
7649 char_u *retval = NULL;
7650 char_u *s;
7651 int len;
7652 int round;
7653 linenr_T lnum;
7654
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007655 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007656 return NULL;
7657
7658 if (submatch_match == NULL)
7659 {
7660 /*
7661 * First round: compute the length and allocate memory.
7662 * Second round: copy the text.
7663 */
7664 for (round = 1; round <= 2; ++round)
7665 {
7666 lnum = submatch_mmatch->startpos[no].lnum;
7667 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7668 return NULL;
7669
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007670 s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007671 if (s == NULL) /* anti-crash check, cannot happen? */
7672 break;
7673 if (submatch_mmatch->endpos[no].lnum == lnum)
7674 {
7675 /* Within one line: take form start to end col. */
7676 len = submatch_mmatch->endpos[no].col
7677 - submatch_mmatch->startpos[no].col;
7678 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007679 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007680 ++len;
7681 }
7682 else
7683 {
7684 /* Multiple lines: take start line from start col, middle
7685 * lines completely and end line up to end col. */
7686 len = (int)STRLEN(s);
7687 if (round == 2)
7688 {
7689 STRCPY(retval, s);
7690 retval[len] = '\n';
7691 }
7692 ++len;
7693 ++lnum;
7694 while (lnum < submatch_mmatch->endpos[no].lnum)
7695 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007696 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007697 if (round == 2)
7698 STRCPY(retval + len, s);
7699 len += (int)STRLEN(s);
7700 if (round == 2)
7701 retval[len] = '\n';
7702 ++len;
7703 }
7704 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007705 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00007706 submatch_mmatch->endpos[no].col);
7707 len += submatch_mmatch->endpos[no].col;
7708 if (round == 2)
7709 retval[len] = NUL;
7710 ++len;
7711 }
7712
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007713 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007714 {
7715 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007716 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007717 return NULL;
7718 }
7719 }
7720 }
7721 else
7722 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00007723 s = submatch_match->startp[no];
7724 if (s == NULL || submatch_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007725 retval = NULL;
7726 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007727 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007728 }
7729
7730 return retval;
7731}
7732#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007733
7734static regengine_T bt_regengine =
7735{
7736 bt_regcomp,
7737 bt_regexec,
7738#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7739 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7740 bt_regexec_nl,
7741#endif
7742 bt_regexec_multi
7743#ifdef DEBUG
7744 ,(char_u *)""
7745#endif
7746};
7747
7748
7749#include "regexp_nfa.c"
7750
7751static regengine_T nfa_regengine =
7752{
7753 nfa_regcomp,
7754 nfa_regexec,
7755#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7756 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7757 nfa_regexec_nl,
7758#endif
7759 nfa_regexec_multi
7760#ifdef DEBUG
7761 ,(char_u *)""
7762#endif
7763};
7764
7765/* Which regexp engine to use? Needed for vim_regcomp().
7766 * Must match with 'regexpengine'. */
7767static int regexp_engine = 0;
7768#define AUTOMATIC_ENGINE 0
7769#define BACKTRACKING_ENGINE 1
7770#define NFA_ENGINE 2
7771#ifdef DEBUG
7772static char_u regname[][30] = {
7773 "AUTOMATIC Regexp Engine",
7774 "BACKTACKING Regexp Engine",
7775 "NFA Regexp Engine"
7776 };
7777#endif
7778
7779/*
7780 * Compile a regular expression into internal code.
7781 * Returns the program in allocated memory. Returns NULL for an error.
7782 */
7783 regprog_T *
7784vim_regcomp(expr_arg, re_flags)
7785 char_u *expr_arg;
7786 int re_flags;
7787{
7788 regprog_T *prog = NULL;
7789 char_u *expr = expr_arg;
7790
7791 syntax_error = FALSE;
7792 regexp_engine = p_re;
7793
7794 /* Check for prefix "\%#=", that sets the regexp engine */
7795 if (STRNCMP(expr, "\\%#=", 4) == 0)
7796 {
7797 int newengine = expr[4] - '0';
7798
7799 if (newengine == AUTOMATIC_ENGINE
7800 || newengine == BACKTRACKING_ENGINE
7801 || newengine == NFA_ENGINE)
7802 {
7803 regexp_engine = expr[4] - '0';
7804 expr += 5;
7805#ifdef DEBUG
7806 EMSG3("New regexp mode selected (%d): %s", regexp_engine,
7807 regname[newengine]);
7808#endif
7809 }
7810 else
7811 {
7812 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
7813 regexp_engine = AUTOMATIC_ENGINE;
7814 }
7815 }
7816#ifdef DEBUG
7817 bt_regengine.expr = expr;
7818 nfa_regengine.expr = expr;
7819#endif
7820
7821 /*
7822 * First try the NFA engine, unless backtracking was requested.
7823 */
7824 if (regexp_engine != BACKTRACKING_ENGINE)
7825 prog = nfa_regengine.regcomp(expr, re_flags);
7826 else
7827 prog = bt_regengine.regcomp(expr, re_flags);
7828
7829 if (prog == NULL) /* error compiling regexp with initial engine */
7830 {
7831#ifdef DEBUG
7832 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
7833 {
7834 FILE *f;
7835 f = fopen("debug.log", "a");
7836 if (f)
7837 {
7838 if (!syntax_error)
7839 fprintf(f, "NFA engine could not handle \"%s\"\n", expr);
7840 else
7841 fprintf(f, "Syntax error in \"%s\"\n", expr);
7842 fclose(f);
7843 }
7844 else
7845 EMSG("(NFA) Could not open \"debug.log\" to write !!!");
7846 /*
7847 if (syntax_error)
7848 EMSG("NFA Regexp: Syntax Error !");
7849 */
7850 }
7851#endif
7852 /*
7853 * If NFA engine failed, then revert to the backtracking engine.
7854 * Except when there was a syntax error, which was properly handled by
7855 * NFA engine.
7856 */
7857 if (regexp_engine == AUTOMATIC_ENGINE)
7858 if (!syntax_error)
7859 prog = bt_regengine.regcomp(expr, re_flags);
7860
7861 } /* endif prog==NULL */
7862
7863
7864 return prog;
7865}
7866
7867/*
7868 * Match a regexp against a string.
7869 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
7870 * Uses curbuf for line count and 'iskeyword'.
7871 *
7872 * Return TRUE if there is a match, FALSE if not.
7873 */
7874 int
7875vim_regexec(rmp, line, col)
7876 regmatch_T *rmp;
7877 char_u *line; /* string to match against */
7878 colnr_T col; /* column to start looking for match */
7879{
7880 return rmp->regprog->engine->regexec(rmp, line, col);
7881}
7882
7883#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7884 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7885/*
7886 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
7887 */
7888 int
7889vim_regexec_nl(rmp, line, col)
7890 regmatch_T *rmp;
7891 char_u *line;
7892 colnr_T col;
7893{
7894 return rmp->regprog->engine->regexec_nl(rmp, line, col);
7895}
7896#endif
7897
7898/*
7899 * Match a regexp against multiple lines.
7900 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
7901 * Uses curbuf for line count and 'iskeyword'.
7902 *
7903 * Return zero if there is no match. Return number of lines contained in the
7904 * match otherwise.
7905 */
7906 long
7907vim_regexec_multi(rmp, win, buf, lnum, col, tm)
7908 regmmatch_T *rmp;
7909 win_T *win; /* window in which to search or NULL */
7910 buf_T *buf; /* buffer in which to search */
7911 linenr_T lnum; /* nr of line to start looking for match */
7912 colnr_T col; /* column to start looking for match */
7913 proftime_T *tm; /* timeout limit or NULL */
7914{
7915 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
7916}