blob: ae29ef53b9192f513f590591b50329b520432704 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
247
Bram Moolenaar071d4272004-06-13 20:20:40 +0000248/*
249 * Magic characters have a special meaning, they don't match literally.
250 * Magic characters are negative. This separates them from literal characters
251 * (possibly multi-byte). Only ASCII characters can be Magic.
252 */
253#define Magic(x) ((int)(x) - 256)
254#define un_Magic(x) ((x) + 256)
255#define is_Magic(x) ((x) < 0)
256
257static int no_Magic __ARGS((int x));
258static int toggle_Magic __ARGS((int x));
259
260 static int
261no_Magic(x)
262 int x;
263{
264 if (is_Magic(x))
265 return un_Magic(x);
266 return x;
267}
268
269 static int
270toggle_Magic(x)
271 int x;
272{
273 if (is_Magic(x))
274 return un_Magic(x);
275 return Magic(x);
276}
277
278/*
279 * The first byte of the regexp internal "program" is actually this magic
280 * number; the start node begins in the second byte. It's used to catch the
281 * most severe mutilation of the program by the caller.
282 */
283
284#define REGMAGIC 0234
285
286/*
287 * Opcode notes:
288 *
289 * BRANCH The set of branches constituting a single choice are hooked
290 * together with their "next" pointers, since precedence prevents
291 * anything being concatenated to any individual branch. The
292 * "next" pointer of the last BRANCH in a choice points to the
293 * thing following the whole choice. This is also where the
294 * final "next" pointer of each individual branch points; each
295 * branch starts with the operand node of a BRANCH node.
296 *
297 * BACK Normal "next" pointers all implicitly point forward; BACK
298 * exists to make loop structures possible.
299 *
300 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
301 * BRANCH structures using BACK. Simple cases (one character
302 * per match) are implemented with STAR and PLUS for speed
303 * and to minimize recursive plunges.
304 *
305 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
306 * node, and defines the min and max limits to be used for that
307 * node.
308 *
309 * MOPEN,MCLOSE ...are numbered at compile time.
310 * ZOPEN,ZCLOSE ...ditto
311 */
312
313/*
314 * A node is one char of opcode followed by two chars of "next" pointer.
315 * "Next" pointers are stored as two 8-bit bytes, high order first. The
316 * value is a positive offset from the opcode of the node containing it.
317 * An operand, if any, simply follows the node. (Note that much of the
318 * code generation knows about this implicit relationship.)
319 *
320 * Using two bytes for the "next" pointer is vast overkill for most things,
321 * but allows patterns to get big without disasters.
322 */
323#define OP(p) ((int)*(p))
324#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
325#define OPERAND(p) ((p) + 3)
326/* Obtain an operand that was stored as four bytes, MSB first. */
327#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
328 + ((long)(p)[5] << 8) + (long)(p)[6])
329/* Obtain a second operand stored as four bytes. */
330#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
331/* Obtain a second single-byte operand stored after a four bytes operand. */
332#define OPERAND_CMP(p) (p)[7]
333
334/*
335 * Utility definitions.
336 */
337#define UCHARAT(p) ((int)*(char_u *)(p))
338
339/* Used for an error (down from) vim_regcomp(): give the error message, set
340 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000341#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000342#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200343#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
344#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
345#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346
347#define MAX_LIMIT (32767L << 16L)
348
349static int re_multi_type __ARGS((int));
350static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
351static char_u *cstrchr __ARGS((char_u *, int));
352
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200353#ifdef BT_REGEXP_DUMP
354static void regdump __ARGS((char_u *, bt_regprog_T *));
355#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +0000357static char_u *regprop __ARGS((char_u *));
358#endif
359
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200360static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
361static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
362static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
363static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200364#ifdef FEAT_SYN_HL
Bram Moolenaar5de820b2013-06-02 15:01:57 +0200365static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
366static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here");
Bram Moolenaar01d89dd2013-06-03 19:41:06 +0200367#endif
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +0200368static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
Bram Moolenaar2976c022013-06-05 21:30:37 +0200369static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
Bram Moolenaar071d4272004-06-13 20:20:40 +0000370#define NOT_MULTI 0
371#define MULTI_ONE 1
372#define MULTI_MULT 2
373/*
374 * Return NOT_MULTI if c is not a "multi" operator.
375 * Return MULTI_ONE if c is a single "multi" operator.
376 * Return MULTI_MULT if c is a multi "multi" operator.
377 */
378 static int
379re_multi_type(c)
380 int c;
381{
382 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
383 return MULTI_ONE;
384 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
385 return MULTI_MULT;
386 return NOT_MULTI;
387}
388
389/*
390 * Flags to be passed up and down.
391 */
392#define HASWIDTH 0x1 /* Known never to match null string. */
393#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
394#define SPSTART 0x4 /* Starts with * or +. */
395#define HASNL 0x8 /* Contains some \n. */
396#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
397#define WORST 0 /* Worst case. */
398
399/*
400 * When regcode is set to this value, code is not emitted and size is computed
401 * instead.
402 */
403#define JUST_CALC_SIZE ((char_u *) -1)
404
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000405static char_u *reg_prev_sub = NULL;
406
Bram Moolenaar071d4272004-06-13 20:20:40 +0000407/*
408 * REGEXP_INRANGE contains all characters which are always special in a []
409 * range after '\'.
410 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
411 * These are:
412 * \n - New line (NL).
413 * \r - Carriage Return (CR).
414 * \t - Tab (TAB).
415 * \e - Escape (ESC).
416 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000417 * \d - Character code in decimal, eg \d123
418 * \o - Character code in octal, eg \o80
419 * \x - Character code in hex, eg \x4a
420 * \u - Multibyte character code, eg \u20ac
421 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000422 */
423static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000424static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000425
426static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000427static int get_char_class __ARGS((char_u **pp));
428static int get_equi_class __ARGS((char_u **pp));
429static void reg_equi_class __ARGS((int c));
430static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000431static char_u *skip_anyof __ARGS((char_u *p));
432static void init_class_tab __ARGS((void));
433
434/*
435 * Translate '\x' to its control character, except "\n", which is Magic.
436 */
437 static int
438backslash_trans(c)
439 int c;
440{
441 switch (c)
442 {
443 case 'r': return CAR;
444 case 't': return TAB;
445 case 'e': return ESC;
446 case 'b': return BS;
447 }
448 return c;
449}
450
451/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000452 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
454 * recognized. Otherwise "pp" is advanced to after the item.
455 */
456 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000457get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000458 char_u **pp;
459{
460 static const char *(class_names[]) =
461 {
462 "alnum:]",
463#define CLASS_ALNUM 0
464 "alpha:]",
465#define CLASS_ALPHA 1
466 "blank:]",
467#define CLASS_BLANK 2
468 "cntrl:]",
469#define CLASS_CNTRL 3
470 "digit:]",
471#define CLASS_DIGIT 4
472 "graph:]",
473#define CLASS_GRAPH 5
474 "lower:]",
475#define CLASS_LOWER 6
476 "print:]",
477#define CLASS_PRINT 7
478 "punct:]",
479#define CLASS_PUNCT 8
480 "space:]",
481#define CLASS_SPACE 9
482 "upper:]",
483#define CLASS_UPPER 10
484 "xdigit:]",
485#define CLASS_XDIGIT 11
486 "tab:]",
487#define CLASS_TAB 12
488 "return:]",
489#define CLASS_RETURN 13
490 "backspace:]",
491#define CLASS_BACKSPACE 14
492 "escape:]",
493#define CLASS_ESCAPE 15
494 };
495#define CLASS_NONE 99
496 int i;
497
498 if ((*pp)[1] == ':')
499 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000500 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000501 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
502 {
503 *pp += STRLEN(class_names[i]) + 2;
504 return i;
505 }
506 }
507 return CLASS_NONE;
508}
509
510/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000511 * Specific version of character class functions.
512 * Using a table to keep this fast.
513 */
514static short class_tab[256];
515
516#define RI_DIGIT 0x01
517#define RI_HEX 0x02
518#define RI_OCTAL 0x04
519#define RI_WORD 0x08
520#define RI_HEAD 0x10
521#define RI_ALPHA 0x20
522#define RI_LOWER 0x40
523#define RI_UPPER 0x80
524#define RI_WHITE 0x100
525
526 static void
527init_class_tab()
528{
529 int i;
530 static int done = FALSE;
531
532 if (done)
533 return;
534
535 for (i = 0; i < 256; ++i)
536 {
537 if (i >= '0' && i <= '7')
538 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
539 else if (i >= '8' && i <= '9')
540 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
541 else if (i >= 'a' && i <= 'f')
542 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
543#ifdef EBCDIC
544 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
545 || (i >= 's' && i <= 'z'))
546#else
547 else if (i >= 'g' && i <= 'z')
548#endif
549 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
550 else if (i >= 'A' && i <= 'F')
551 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
552#ifdef EBCDIC
553 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
554 || (i >= 'S' && i <= 'Z'))
555#else
556 else if (i >= 'G' && i <= 'Z')
557#endif
558 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
559 else if (i == '_')
560 class_tab[i] = RI_WORD + RI_HEAD;
561 else
562 class_tab[i] = 0;
563 }
564 class_tab[' '] |= RI_WHITE;
565 class_tab['\t'] |= RI_WHITE;
566 done = TRUE;
567}
568
569#ifdef FEAT_MBYTE
570# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
571# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
572# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
573# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
574# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
575# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
576# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
577# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
578# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
579#else
580# define ri_digit(c) (class_tab[c] & RI_DIGIT)
581# define ri_hex(c) (class_tab[c] & RI_HEX)
582# define ri_octal(c) (class_tab[c] & RI_OCTAL)
583# define ri_word(c) (class_tab[c] & RI_WORD)
584# define ri_head(c) (class_tab[c] & RI_HEAD)
585# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
586# define ri_lower(c) (class_tab[c] & RI_LOWER)
587# define ri_upper(c) (class_tab[c] & RI_UPPER)
588# define ri_white(c) (class_tab[c] & RI_WHITE)
589#endif
590
591/* flags for regflags */
592#define RF_ICASE 1 /* ignore case */
593#define RF_NOICASE 2 /* don't ignore case */
594#define RF_HASNL 4 /* can match a NL */
595#define RF_ICOMBINE 8 /* ignore combining characters */
596#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
597
598/*
599 * Global work variables for vim_regcomp().
600 */
601
602static char_u *regparse; /* Input-scan pointer. */
603static int prevchr_len; /* byte length of previous char */
604static int num_complex_braces; /* Complex \{...} count */
605static int regnpar; /* () count. */
606#ifdef FEAT_SYN_HL
607static int regnzpar; /* \z() count. */
608static int re_has_z; /* \z item detected */
609#endif
610static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
611static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000612static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000613static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
614static unsigned regflags; /* RF_ flags for prog */
615static long brace_min[10]; /* Minimums for complex brace repeats */
616static long brace_max[10]; /* Maximums for complex brace repeats */
617static int brace_count[10]; /* Current counts for complex brace repeats */
618#if defined(FEAT_SYN_HL) || defined(PROTO)
619static int had_eol; /* TRUE when EOL found by vim_regcomp() */
620#endif
621static int one_exactly = FALSE; /* only do one char for EXACTLY */
622
623static int reg_magic; /* magicness of the pattern: */
624#define MAGIC_NONE 1 /* "\V" very unmagic */
625#define MAGIC_OFF 2 /* "\M" or 'magic' off */
626#define MAGIC_ON 3 /* "\m" or 'magic' */
627#define MAGIC_ALL 4 /* "\v" very magic */
628
629static int reg_string; /* matching with a string instead of a buffer
630 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000631static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000632
633/*
634 * META contains all characters that may be magic, except '^' and '$'.
635 */
636
637#ifdef EBCDIC
638static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
639#else
640/* META[] is used often enough to justify turning it into a table. */
641static char_u META_flags[] = {
642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644/* % & ( ) * + . */
645 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
646/* 1 2 3 4 5 6 7 8 9 < = > ? */
647 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
648/* @ A C D F H I K L M O */
649 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
650/* P S U V W X Z [ _ */
651 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
652/* a c d f h i k l m n o */
653 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
654/* p s u v w x z { | ~ */
655 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
656};
657#endif
658
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200659static int curchr; /* currently parsed character */
660/* Previous character. Note: prevchr is sometimes -1 when we are not at the
661 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
662 * because ^ was taken to be magic -- webb */
663static int prevchr;
664static int prevprevchr; /* previous-previous character */
665static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000666
667/* arguments for reg() */
668#define REG_NOPAREN 0 /* toplevel reg() */
669#define REG_PAREN 1 /* \(\) */
670#define REG_ZPAREN 2 /* \z(\) */
671#define REG_NPAREN 3 /* \%(\) */
672
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200673typedef struct
674{
675 char_u *regparse;
676 int prevchr_len;
677 int curchr;
678 int prevchr;
679 int prevprevchr;
680 int nextchr;
681 int at_start;
682 int prev_at_start;
683 int regnpar;
684} parse_state_T;
685
Bram Moolenaar071d4272004-06-13 20:20:40 +0000686/*
687 * Forward declarations for vim_regcomp()'s friends.
688 */
689static void initchr __ARGS((char_u *));
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200690static void save_parse_state __ARGS((parse_state_T *ps));
691static void restore_parse_state __ARGS((parse_state_T *ps));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000692static int getchr __ARGS((void));
693static void skipchr_keepstart __ARGS((void));
694static int peekchr __ARGS((void));
695static void skipchr __ARGS((void));
696static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000697static int gethexchrs __ARGS((int maxinputlen));
698static int getoctchrs __ARGS((void));
699static int getdecchrs __ARGS((void));
700static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000701static void regcomp_start __ARGS((char_u *expr, int flags));
702static char_u *reg __ARGS((int, int *));
703static char_u *regbranch __ARGS((int *flagp));
704static char_u *regconcat __ARGS((int *flagp));
705static char_u *regpiece __ARGS((int *));
706static char_u *regatom __ARGS((int *));
707static char_u *regnode __ARGS((int));
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000708#ifdef FEAT_MBYTE
709static int use_multibytecode __ARGS((int c));
710#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000711static int prog_magic_wrong __ARGS((void));
712static char_u *regnext __ARGS((char_u *));
713static void regc __ARGS((int b));
714#ifdef FEAT_MBYTE
715static void regmbc __ARGS((int c));
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200716# define REGMBC(x) regmbc(x);
717# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000718#else
719# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200720# define REGMBC(x)
721# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000722#endif
723static void reginsert __ARGS((int, char_u *));
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200724static void reginsert_nr __ARGS((int op, long val, char_u *opnd));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000725static void reginsert_limits __ARGS((int, long, long, char_u *));
726static char_u *re_put_long __ARGS((char_u *pr, long_u val));
727static int read_limits __ARGS((long *, long *));
728static void regtail __ARGS((char_u *, char_u *));
729static void regoptail __ARGS((char_u *, char_u *));
730
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200731static regengine_T bt_regengine;
732static regengine_T nfa_regengine;
733
Bram Moolenaar071d4272004-06-13 20:20:40 +0000734/*
735 * Return TRUE if compiled regular expression "prog" can match a line break.
736 */
737 int
738re_multiline(prog)
739 regprog_T *prog;
740{
741 return (prog->regflags & RF_HASNL);
742}
743
744/*
745 * Return TRUE if compiled regular expression "prog" looks before the start
746 * position (pattern contains "\@<=" or "\@<!").
747 */
748 int
749re_lookbehind(prog)
750 regprog_T *prog;
751{
752 return (prog->regflags & RF_LOOKBH);
753}
754
755/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000756 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
757 * Returns a character representing the class. Zero means that no item was
758 * recognized. Otherwise "pp" is advanced to after the item.
759 */
760 static int
761get_equi_class(pp)
762 char_u **pp;
763{
764 int c;
765 int l = 1;
766 char_u *p = *pp;
767
768 if (p[1] == '=')
769 {
770#ifdef FEAT_MBYTE
771 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000772 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000773#endif
774 if (p[l + 2] == '=' && p[l + 3] == ']')
775 {
776#ifdef FEAT_MBYTE
777 if (has_mbyte)
778 c = mb_ptr2char(p + 2);
779 else
780#endif
781 c = p[2];
782 *pp += l + 4;
783 return c;
784 }
785 }
786 return 0;
787}
788
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200789#ifdef EBCDIC
790/*
791 * Table for equivalence class "c". (IBM-1047)
792 */
793char *EQUIVAL_CLASS_C[16] = {
794 "A\x62\x63\x64\x65\x66\x67",
795 "C\x68",
796 "E\x71\x72\x73\x74",
797 "I\x75\x76\x77\x78",
798 "N\x69",
799 "O\xEB\xEC\xED\xEE\xEF",
800 "U\xFB\xFC\xFD\xFE",
801 "Y\xBA",
802 "a\x42\x43\x44\x45\x46\x47",
803 "c\x48",
804 "e\x51\x52\x53\x54",
805 "i\x55\x56\x57\x58",
806 "n\x49",
807 "o\xCB\xCC\xCD\xCE\xCF",
808 "u\xDB\xDC\xDD\xDE",
809 "y\x8D\xDF",
810};
811#endif
812
Bram Moolenaardf177f62005-02-22 08:39:57 +0000813/*
814 * Produce the bytes for equivalence class "c".
815 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200816 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000817 */
818 static void
819reg_equi_class(c)
820 int c;
821{
822#ifdef FEAT_MBYTE
823 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000824 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000825#endif
826 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200827#ifdef EBCDIC
828 int i;
829
830 /* This might be slower than switch/case below. */
831 for (i = 0; i < 16; i++)
832 {
833 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
834 {
835 char *p = EQUIVAL_CLASS_C[i];
836
837 while (*p != 0)
838 regmbc(*p++);
839 return;
840 }
841 }
842#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000843 switch (c)
844 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000845 case 'A': case '\300': case '\301': case '\302':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200846 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
847 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000848 case '\303': case '\304': case '\305':
849 regmbc('A'); regmbc('\300'); regmbc('\301');
850 regmbc('\302'); regmbc('\303'); regmbc('\304');
851 regmbc('\305');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200852 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
853 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
854 REGMBC(0x1ea2)
855 return;
856 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
857 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000858 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000859 case 'C': case '\307':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200860 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000861 regmbc('C'); regmbc('\307');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200862 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
863 REGMBC(0x10c)
864 return;
865 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
866 CASEMBC(0x1e0e) CASEMBC(0x1e10)
867 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
868 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000869 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000870 case 'E': case '\310': case '\311': case '\312': case '\313':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200871 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
872 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000873 regmbc('E'); regmbc('\310'); regmbc('\311');
874 regmbc('\312'); regmbc('\313');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200875 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
876 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
877 REGMBC(0x1ebc)
878 return;
879 case 'F': CASEMBC(0x1e1e)
880 regmbc('F'); REGMBC(0x1e1e)
881 return;
882 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
883 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
884 CASEMBC(0x1e20)
885 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
886 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
887 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
888 return;
889 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
890 CASEMBC(0x1e26) CASEMBC(0x1e28)
891 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
892 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000893 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000894 case 'I': case '\314': case '\315': case '\316': case '\317':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200895 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
896 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000897 regmbc('I'); regmbc('\314'); regmbc('\315');
898 regmbc('\316'); regmbc('\317');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200899 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
900 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
901 REGMBC(0x1ec8)
902 return;
903 case 'J': CASEMBC(0x134)
904 regmbc('J'); REGMBC(0x134)
905 return;
906 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
907 CASEMBC(0x1e34)
908 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
909 REGMBC(0x1e30) REGMBC(0x1e34)
910 return;
911 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
912 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
913 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
914 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
915 REGMBC(0x1e3a)
916 return;
917 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
918 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000919 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000920 case 'N': case '\321':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200921 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
922 CASEMBC(0x1e48)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000923 regmbc('N'); regmbc('\321');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200924 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
925 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000926 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000927 case 'O': case '\322': case '\323': case '\324': case '\325':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200928 case '\326': case '\330':
929 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
930 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000931 regmbc('O'); regmbc('\322'); regmbc('\323');
932 regmbc('\324'); regmbc('\325'); regmbc('\326');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200933 regmbc('\330');
934 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
935 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
936 REGMBC(0x1ec) REGMBC(0x1ece)
937 return;
938 case 'P': case 0x1e54: case 0x1e56:
939 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
940 return;
941 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
942 CASEMBC(0x1e58) CASEMBC(0x1e5e)
943 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
944 REGMBC(0x1e58) REGMBC(0x1e5e)
945 return;
946 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
947 CASEMBC(0x160) CASEMBC(0x1e60)
948 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
949 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
950 return;
951 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
952 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
953 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
954 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000955 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000956 case 'U': case '\331': case '\332': case '\333': case '\334':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200957 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
958 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
959 CASEMBC(0x1ee6)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000960 regmbc('U'); regmbc('\331'); regmbc('\332');
961 regmbc('\333'); regmbc('\334');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200962 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
963 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
964 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
965 return;
966 case 'V': CASEMBC(0x1e7c)
967 regmbc('V'); REGMBC(0x1e7c)
968 return;
969 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
970 CASEMBC(0x1e84) CASEMBC(0x1e86)
971 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
972 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
973 return;
974 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
975 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000976 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000977 case 'Y': case '\335':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200978 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
979 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000980 regmbc('Y'); regmbc('\335');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200981 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
982 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
983 return;
984 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
985 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
986 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
987 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
988 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000989 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000990 case 'a': case '\340': case '\341': case '\342':
991 case '\343': case '\344': case '\345':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200992 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
993 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000994 regmbc('a'); regmbc('\340'); regmbc('\341');
995 regmbc('\342'); regmbc('\343'); regmbc('\344');
996 regmbc('\345');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200997 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
998 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
999 REGMBC(0x1ea3)
1000 return;
1001 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
1002 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001003 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001004 case 'c': case '\347':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001005 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001006 regmbc('c'); regmbc('\347');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001007 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
1008 REGMBC(0x10d)
1009 return;
1010 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1d0b)
1011 CASEMBC(0x1e11)
1012 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
1013 REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001014 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001015 case 'e': case '\350': case '\351': case '\352': case '\353':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001016 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1017 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001018 regmbc('e'); regmbc('\350'); regmbc('\351');
1019 regmbc('\352'); regmbc('\353');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001020 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1021 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1022 REGMBC(0x1ebd)
1023 return;
1024 case 'f': CASEMBC(0x1e1f)
1025 regmbc('f'); REGMBC(0x1e1f)
1026 return;
1027 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1028 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1029 CASEMBC(0x1e21)
1030 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1031 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1032 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1033 return;
1034 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1035 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1036 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1037 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1038 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001039 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001040 case 'i': case '\354': case '\355': case '\356': case '\357':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001041 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1042 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001043 regmbc('i'); regmbc('\354'); regmbc('\355');
1044 regmbc('\356'); regmbc('\357');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001045 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1046 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1047 return;
1048 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1049 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1050 return;
1051 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1052 CASEMBC(0x1e35)
1053 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1054 REGMBC(0x1e31) REGMBC(0x1e35)
1055 return;
1056 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1057 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1058 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1059 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1060 REGMBC(0x1e3b)
1061 return;
1062 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1063 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001064 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001065 case 'n': case '\361':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001066 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1067 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001068 regmbc('n'); regmbc('\361');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001069 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1070 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001071 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001072 case 'o': case '\362': case '\363': case '\364': case '\365':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001073 case '\366': case '\370':
1074 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1075 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001076 regmbc('o'); regmbc('\362'); regmbc('\363');
1077 regmbc('\364'); regmbc('\365'); regmbc('\366');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001078 regmbc('\370');
1079 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1080 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1081 REGMBC(0x1ed) REGMBC(0x1ecf)
1082 return;
1083 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1084 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1085 return;
1086 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1087 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1088 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1089 REGMBC(0x1e59) REGMBC(0x1e5f)
1090 return;
1091 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1092 CASEMBC(0x161) CASEMBC(0x1e61)
1093 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1094 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1095 return;
1096 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1097 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1098 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1099 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001100 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001101 case 'u': case '\371': case '\372': case '\373': case '\374':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001102 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1103 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1104 CASEMBC(0x1ee7)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001105 regmbc('u'); regmbc('\371'); regmbc('\372');
1106 regmbc('\373'); regmbc('\374');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001107 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1108 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1109 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1110 return;
1111 case 'v': CASEMBC(0x1e7d)
1112 regmbc('v'); REGMBC(0x1e7d)
1113 return;
1114 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1115 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1116 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1117 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1118 REGMBC(0x1e98)
1119 return;
1120 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1121 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001122 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001123 case 'y': case '\375': case '\377':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001124 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1125 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001126 regmbc('y'); regmbc('\375'); regmbc('\377');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001127 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1128 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1129 return;
1130 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1131 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1132 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1133 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1134 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001135 return;
1136 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001137#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001138 }
1139 regmbc(c);
1140}
1141
1142/*
1143 * Check for a collating element "[.a.]". "pp" points to the '['.
1144 * Returns a character. Zero means that no item was recognized. Otherwise
1145 * "pp" is advanced to after the item.
1146 * Currently only single characters are recognized!
1147 */
1148 static int
1149get_coll_element(pp)
1150 char_u **pp;
1151{
1152 int c;
1153 int l = 1;
1154 char_u *p = *pp;
1155
1156 if (p[1] == '.')
1157 {
1158#ifdef FEAT_MBYTE
1159 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001160 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001161#endif
1162 if (p[l + 2] == '.' && p[l + 3] == ']')
1163 {
1164#ifdef FEAT_MBYTE
1165 if (has_mbyte)
1166 c = mb_ptr2char(p + 2);
1167 else
1168#endif
1169 c = p[2];
1170 *pp += l + 4;
1171 return c;
1172 }
1173 }
1174 return 0;
1175}
1176
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001177static void get_cpo_flags __ARGS((void));
1178static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1179static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1180
1181 static void
1182get_cpo_flags()
1183{
1184 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1185 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1186}
Bram Moolenaardf177f62005-02-22 08:39:57 +00001187
1188/*
1189 * Skip over a "[]" range.
1190 * "p" must point to the character after the '['.
1191 * The returned pointer is on the matching ']', or the terminating NUL.
1192 */
1193 static char_u *
1194skip_anyof(p)
1195 char_u *p;
1196{
Bram Moolenaardf177f62005-02-22 08:39:57 +00001197#ifdef FEAT_MBYTE
1198 int l;
1199#endif
1200
Bram Moolenaardf177f62005-02-22 08:39:57 +00001201 if (*p == '^') /* Complement of range. */
1202 ++p;
1203 if (*p == ']' || *p == '-')
1204 ++p;
1205 while (*p != NUL && *p != ']')
1206 {
1207#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001208 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001209 p += l;
1210 else
1211#endif
1212 if (*p == '-')
1213 {
1214 ++p;
1215 if (*p != ']' && *p != NUL)
1216 mb_ptr_adv(p);
1217 }
1218 else if (*p == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001219 && !reg_cpo_bsl
Bram Moolenaardf177f62005-02-22 08:39:57 +00001220 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001221 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
Bram Moolenaardf177f62005-02-22 08:39:57 +00001222 p += 2;
1223 else if (*p == '[')
1224 {
1225 if (get_char_class(&p) == CLASS_NONE
1226 && get_equi_class(&p) == 0
1227 && get_coll_element(&p) == 0)
1228 ++p; /* It was not a class name */
1229 }
1230 else
1231 ++p;
1232 }
1233
1234 return p;
1235}
1236
1237/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001238 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001239 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001240 * Take care of characters with a backslash in front of it.
1241 * Skip strings inside [ and ].
1242 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1243 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1244 * is changed in-place.
1245 */
1246 char_u *
1247skip_regexp(startp, dirc, magic, newp)
1248 char_u *startp;
1249 int dirc;
1250 int magic;
1251 char_u **newp;
1252{
1253 int mymagic;
1254 char_u *p = startp;
1255
1256 if (magic)
1257 mymagic = MAGIC_ON;
1258 else
1259 mymagic = MAGIC_OFF;
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001260 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001261
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001262 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001263 {
1264 if (p[0] == dirc) /* found end of regexp */
1265 break;
1266 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1267 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1268 {
1269 p = skip_anyof(p + 1);
1270 if (p[0] == NUL)
1271 break;
1272 }
1273 else if (p[0] == '\\' && p[1] != NUL)
1274 {
1275 if (dirc == '?' && newp != NULL && p[1] == '?')
1276 {
1277 /* change "\?" to "?", make a copy first. */
1278 if (*newp == NULL)
1279 {
1280 *newp = vim_strsave(startp);
1281 if (*newp != NULL)
1282 p = *newp + (p - startp);
1283 }
1284 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001285 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001286 else
1287 ++p;
1288 }
1289 else
1290 ++p; /* skip next character */
1291 if (*p == 'v')
1292 mymagic = MAGIC_ALL;
1293 else if (*p == 'V')
1294 mymagic = MAGIC_NONE;
1295 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001296 }
1297 return p;
1298}
1299
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001300static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
1301
Bram Moolenaar071d4272004-06-13 20:20:40 +00001302/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001303 * bt_regcomp() - compile a regular expression into internal code for the
1304 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001305 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001306 *
1307 * We can't allocate space until we know how big the compiled form will be,
1308 * but we can't compile it (and thus know how big it is) until we've got a
1309 * place to put the code. So we cheat: we compile it twice, once with code
1310 * generation turned off and size counting turned on, and once "for real".
1311 * This also means that we don't allocate space until we are sure that the
1312 * thing really will compile successfully, and we never have to move the
1313 * code and thus invalidate pointers into it. (Note that it has to be in
1314 * one piece because vim_free() must be able to free it all.)
1315 *
1316 * Whether upper/lower case is to be ignored is decided when executing the
1317 * program, it does not matter here.
1318 *
1319 * Beware that the optimization-preparation code in here knows about some
1320 * of the structure of the compiled regexp.
1321 * "re_flags": RE_MAGIC and/or RE_STRING.
1322 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001323 static regprog_T *
1324bt_regcomp(expr, re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001325 char_u *expr;
1326 int re_flags;
1327{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001328 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001329 char_u *scan;
1330 char_u *longest;
1331 int len;
1332 int flags;
1333
1334 if (expr == NULL)
1335 EMSG_RET_NULL(_(e_null));
1336
1337 init_class_tab();
1338
1339 /*
1340 * First pass: determine size, legality.
1341 */
1342 regcomp_start(expr, re_flags);
1343 regcode = JUST_CALC_SIZE;
1344 regc(REGMAGIC);
1345 if (reg(REG_NOPAREN, &flags) == NULL)
1346 return NULL;
1347
1348 /* Small enough for pointer-storage convention? */
1349#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1350 if (regsize >= 65536L - 256L)
1351 EMSG_RET_NULL(_("E339: Pattern too long"));
1352#endif
1353
1354 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001355 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001356 if (r == NULL)
1357 return NULL;
1358
1359 /*
1360 * Second pass: emit code.
1361 */
1362 regcomp_start(expr, re_flags);
1363 regcode = r->program;
1364 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001365 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001366 {
1367 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001368 if (reg_toolong)
1369 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001370 return NULL;
1371 }
1372
1373 /* Dig out information for optimizations. */
1374 r->regstart = NUL; /* Worst-case defaults. */
1375 r->reganch = 0;
1376 r->regmust = NULL;
1377 r->regmlen = 0;
1378 r->regflags = regflags;
1379 if (flags & HASNL)
1380 r->regflags |= RF_HASNL;
1381 if (flags & HASLOOKBH)
1382 r->regflags |= RF_LOOKBH;
1383#ifdef FEAT_SYN_HL
1384 /* Remember whether this pattern has any \z specials in it. */
1385 r->reghasz = re_has_z;
1386#endif
1387 scan = r->program + 1; /* First BRANCH. */
1388 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1389 {
1390 scan = OPERAND(scan);
1391
1392 /* Starting-point info. */
1393 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1394 {
1395 r->reganch++;
1396 scan = regnext(scan);
1397 }
1398
1399 if (OP(scan) == EXACTLY)
1400 {
1401#ifdef FEAT_MBYTE
1402 if (has_mbyte)
1403 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1404 else
1405#endif
1406 r->regstart = *OPERAND(scan);
1407 }
1408 else if ((OP(scan) == BOW
1409 || OP(scan) == EOW
1410 || OP(scan) == NOTHING
1411 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1412 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1413 && OP(regnext(scan)) == EXACTLY)
1414 {
1415#ifdef FEAT_MBYTE
1416 if (has_mbyte)
1417 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1418 else
1419#endif
1420 r->regstart = *OPERAND(regnext(scan));
1421 }
1422
1423 /*
1424 * If there's something expensive in the r.e., find the longest
1425 * literal string that must appear and make it the regmust. Resolve
1426 * ties in favor of later strings, since the regstart check works
1427 * with the beginning of the r.e. and avoiding duplication
1428 * strengthens checking. Not a strong reason, but sufficient in the
1429 * absence of others.
1430 */
1431 /*
1432 * When the r.e. starts with BOW, it is faster to look for a regmust
1433 * first. Used a lot for "#" and "*" commands. (Added by mool).
1434 */
1435 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1436 && !(flags & HASNL))
1437 {
1438 longest = NULL;
1439 len = 0;
1440 for (; scan != NULL; scan = regnext(scan))
1441 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1442 {
1443 longest = OPERAND(scan);
1444 len = (int)STRLEN(OPERAND(scan));
1445 }
1446 r->regmust = longest;
1447 r->regmlen = len;
1448 }
1449 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001450#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001451 regdump(expr, r);
1452#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001453 r->engine = &bt_regengine;
1454 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001455}
1456
1457/*
1458 * Setup to parse the regexp. Used once to get the length and once to do it.
1459 */
1460 static void
1461regcomp_start(expr, re_flags)
1462 char_u *expr;
1463 int re_flags; /* see vim_regcomp() */
1464{
1465 initchr(expr);
1466 if (re_flags & RE_MAGIC)
1467 reg_magic = MAGIC_ON;
1468 else
1469 reg_magic = MAGIC_OFF;
1470 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001471 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02001472 get_cpo_flags();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001473
1474 num_complex_braces = 0;
1475 regnpar = 1;
1476 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1477#ifdef FEAT_SYN_HL
1478 regnzpar = 1;
1479 re_has_z = 0;
1480#endif
1481 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001482 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001483 regflags = 0;
1484#if defined(FEAT_SYN_HL) || defined(PROTO)
1485 had_eol = FALSE;
1486#endif
1487}
1488
1489#if defined(FEAT_SYN_HL) || defined(PROTO)
1490/*
1491 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1492 * found. This is messy, but it works fine.
1493 */
1494 int
1495vim_regcomp_had_eol()
1496{
1497 return had_eol;
1498}
1499#endif
1500
1501/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001502 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001503 *
1504 * Caller must absorb opening parenthesis.
1505 *
1506 * Combining parenthesis handling with the base level of regular expression
1507 * is a trifle forced, but the need to tie the tails of the branches to what
1508 * follows makes it hard to avoid.
1509 */
1510 static char_u *
1511reg(paren, flagp)
1512 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1513 int *flagp;
1514{
1515 char_u *ret;
1516 char_u *br;
1517 char_u *ender;
1518 int parno = 0;
1519 int flags;
1520
1521 *flagp = HASWIDTH; /* Tentatively. */
1522
1523#ifdef FEAT_SYN_HL
1524 if (paren == REG_ZPAREN)
1525 {
1526 /* Make a ZOPEN node. */
1527 if (regnzpar >= NSUBEXP)
1528 EMSG_RET_NULL(_("E50: Too many \\z("));
1529 parno = regnzpar;
1530 regnzpar++;
1531 ret = regnode(ZOPEN + parno);
1532 }
1533 else
1534#endif
1535 if (paren == REG_PAREN)
1536 {
1537 /* Make a MOPEN node. */
1538 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001539 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001540 parno = regnpar;
1541 ++regnpar;
1542 ret = regnode(MOPEN + parno);
1543 }
1544 else if (paren == REG_NPAREN)
1545 {
1546 /* Make a NOPEN node. */
1547 ret = regnode(NOPEN);
1548 }
1549 else
1550 ret = NULL;
1551
1552 /* Pick up the branches, linking them together. */
1553 br = regbranch(&flags);
1554 if (br == NULL)
1555 return NULL;
1556 if (ret != NULL)
1557 regtail(ret, br); /* [MZ]OPEN -> first. */
1558 else
1559 ret = br;
1560 /* If one of the branches can be zero-width, the whole thing can.
1561 * If one of the branches has * at start or matches a line-break, the
1562 * whole thing can. */
1563 if (!(flags & HASWIDTH))
1564 *flagp &= ~HASWIDTH;
1565 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1566 while (peekchr() == Magic('|'))
1567 {
1568 skipchr();
1569 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001570 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001571 return NULL;
1572 regtail(ret, br); /* BRANCH -> BRANCH. */
1573 if (!(flags & HASWIDTH))
1574 *flagp &= ~HASWIDTH;
1575 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1576 }
1577
1578 /* Make a closing node, and hook it on the end. */
1579 ender = regnode(
1580#ifdef FEAT_SYN_HL
1581 paren == REG_ZPAREN ? ZCLOSE + parno :
1582#endif
1583 paren == REG_PAREN ? MCLOSE + parno :
1584 paren == REG_NPAREN ? NCLOSE : END);
1585 regtail(ret, ender);
1586
1587 /* Hook the tails of the branches to the closing node. */
1588 for (br = ret; br != NULL; br = regnext(br))
1589 regoptail(br, ender);
1590
1591 /* Check for proper termination. */
1592 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1593 {
1594#ifdef FEAT_SYN_HL
1595 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001596 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001597 else
1598#endif
1599 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001600 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001601 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001602 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001603 }
1604 else if (paren == REG_NOPAREN && peekchr() != NUL)
1605 {
1606 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001607 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001608 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001609 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001610 /* NOTREACHED */
1611 }
1612 /*
1613 * Here we set the flag allowing back references to this set of
1614 * parentheses.
1615 */
1616 if (paren == REG_PAREN)
1617 had_endbrace[parno] = TRUE; /* have seen the close paren */
1618 return ret;
1619}
1620
1621/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001622 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001623 * Implements the & operator.
1624 */
1625 static char_u *
1626regbranch(flagp)
1627 int *flagp;
1628{
1629 char_u *ret;
1630 char_u *chain = NULL;
1631 char_u *latest;
1632 int flags;
1633
1634 *flagp = WORST | HASNL; /* Tentatively. */
1635
1636 ret = regnode(BRANCH);
1637 for (;;)
1638 {
1639 latest = regconcat(&flags);
1640 if (latest == NULL)
1641 return NULL;
1642 /* If one of the branches has width, the whole thing has. If one of
1643 * the branches anchors at start-of-line, the whole thing does.
1644 * If one of the branches uses look-behind, the whole thing does. */
1645 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1646 /* If one of the branches doesn't match a line-break, the whole thing
1647 * doesn't. */
1648 *flagp &= ~HASNL | (flags & HASNL);
1649 if (chain != NULL)
1650 regtail(chain, latest);
1651 if (peekchr() != Magic('&'))
1652 break;
1653 skipchr();
1654 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001655 if (reg_toolong)
1656 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001657 reginsert(MATCH, latest);
1658 chain = latest;
1659 }
1660
1661 return ret;
1662}
1663
1664/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001665 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001666 * Implements the concatenation operator.
1667 */
1668 static char_u *
1669regconcat(flagp)
1670 int *flagp;
1671{
1672 char_u *first = NULL;
1673 char_u *chain = NULL;
1674 char_u *latest;
1675 int flags;
1676 int cont = TRUE;
1677
1678 *flagp = WORST; /* Tentatively. */
1679
1680 while (cont)
1681 {
1682 switch (peekchr())
1683 {
1684 case NUL:
1685 case Magic('|'):
1686 case Magic('&'):
1687 case Magic(')'):
1688 cont = FALSE;
1689 break;
1690 case Magic('Z'):
1691#ifdef FEAT_MBYTE
1692 regflags |= RF_ICOMBINE;
1693#endif
1694 skipchr_keepstart();
1695 break;
1696 case Magic('c'):
1697 regflags |= RF_ICASE;
1698 skipchr_keepstart();
1699 break;
1700 case Magic('C'):
1701 regflags |= RF_NOICASE;
1702 skipchr_keepstart();
1703 break;
1704 case Magic('v'):
1705 reg_magic = MAGIC_ALL;
1706 skipchr_keepstart();
1707 curchr = -1;
1708 break;
1709 case Magic('m'):
1710 reg_magic = MAGIC_ON;
1711 skipchr_keepstart();
1712 curchr = -1;
1713 break;
1714 case Magic('M'):
1715 reg_magic = MAGIC_OFF;
1716 skipchr_keepstart();
1717 curchr = -1;
1718 break;
1719 case Magic('V'):
1720 reg_magic = MAGIC_NONE;
1721 skipchr_keepstart();
1722 curchr = -1;
1723 break;
1724 default:
1725 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001726 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001727 return NULL;
1728 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1729 if (chain == NULL) /* First piece. */
1730 *flagp |= flags & SPSTART;
1731 else
1732 regtail(chain, latest);
1733 chain = latest;
1734 if (first == NULL)
1735 first = latest;
1736 break;
1737 }
1738 }
1739 if (first == NULL) /* Loop ran zero times. */
1740 first = regnode(NOTHING);
1741 return first;
1742}
1743
1744/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001745 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001746 *
1747 * Note that the branching code sequences used for = and the general cases
1748 * of * and + are somewhat optimized: they use the same NOTHING node as
1749 * both the endmarker for their branch list and the body of the last branch.
1750 * It might seem that this node could be dispensed with entirely, but the
1751 * endmarker role is not redundant.
1752 */
1753 static char_u *
1754regpiece(flagp)
1755 int *flagp;
1756{
1757 char_u *ret;
1758 int op;
1759 char_u *next;
1760 int flags;
1761 long minval;
1762 long maxval;
1763
1764 ret = regatom(&flags);
1765 if (ret == NULL)
1766 return NULL;
1767
1768 op = peekchr();
1769 if (re_multi_type(op) == NOT_MULTI)
1770 {
1771 *flagp = flags;
1772 return ret;
1773 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001774 /* default flags */
1775 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1776
1777 skipchr();
1778 switch (op)
1779 {
1780 case Magic('*'):
1781 if (flags & SIMPLE)
1782 reginsert(STAR, ret);
1783 else
1784 {
1785 /* Emit x* as (x&|), where & means "self". */
1786 reginsert(BRANCH, ret); /* Either x */
1787 regoptail(ret, regnode(BACK)); /* and loop */
1788 regoptail(ret, ret); /* back */
1789 regtail(ret, regnode(BRANCH)); /* or */
1790 regtail(ret, regnode(NOTHING)); /* null. */
1791 }
1792 break;
1793
1794 case Magic('+'):
1795 if (flags & SIMPLE)
1796 reginsert(PLUS, ret);
1797 else
1798 {
1799 /* Emit x+ as x(&|), where & means "self". */
1800 next = regnode(BRANCH); /* Either */
1801 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001802 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001803 regtail(next, regnode(BRANCH)); /* or */
1804 regtail(ret, regnode(NOTHING)); /* null. */
1805 }
1806 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1807 break;
1808
1809 case Magic('@'):
1810 {
1811 int lop = END;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001812 int nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001813
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001814 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001815 switch (no_Magic(getchr()))
1816 {
1817 case '=': lop = MATCH; break; /* \@= */
1818 case '!': lop = NOMATCH; break; /* \@! */
1819 case '>': lop = SUBPAT; break; /* \@> */
1820 case '<': switch (no_Magic(getchr()))
1821 {
1822 case '=': lop = BEHIND; break; /* \@<= */
1823 case '!': lop = NOBEHIND; break; /* \@<! */
1824 }
1825 }
1826 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001827 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001828 reg_magic == MAGIC_ALL);
1829 /* Look behind must match with behind_pos. */
1830 if (lop == BEHIND || lop == NOBEHIND)
1831 {
1832 regtail(ret, regnode(BHPOS));
1833 *flagp |= HASLOOKBH;
1834 }
1835 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001836 if (lop == BEHIND || lop == NOBEHIND)
1837 {
1838 if (nr < 0)
1839 nr = 0; /* no limit is same as zero limit */
1840 reginsert_nr(lop, nr, ret);
1841 }
1842 else
1843 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001844 break;
1845 }
1846
1847 case Magic('?'):
1848 case Magic('='):
1849 /* Emit x= as (x|) */
1850 reginsert(BRANCH, ret); /* Either x */
1851 regtail(ret, regnode(BRANCH)); /* or */
1852 next = regnode(NOTHING); /* null. */
1853 regtail(ret, next);
1854 regoptail(ret, next);
1855 break;
1856
1857 case Magic('{'):
1858 if (!read_limits(&minval, &maxval))
1859 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001860 if (flags & SIMPLE)
1861 {
1862 reginsert(BRACE_SIMPLE, ret);
1863 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1864 }
1865 else
1866 {
1867 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001868 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001869 reg_magic == MAGIC_ALL);
1870 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1871 regoptail(ret, regnode(BACK));
1872 regoptail(ret, ret);
1873 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1874 ++num_complex_braces;
1875 }
1876 if (minval > 0 && maxval > 0)
1877 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1878 break;
1879 }
1880 if (re_multi_type(peekchr()) != NOT_MULTI)
1881 {
1882 /* Can't have a multi follow a multi. */
1883 if (peekchr() == Magic('*'))
1884 sprintf((char *)IObuff, _("E61: Nested %s*"),
1885 reg_magic >= MAGIC_ON ? "" : "\\");
1886 else
1887 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1888 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1889 EMSG_RET_NULL(IObuff);
1890 }
1891
1892 return ret;
1893}
1894
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001895/* When making changes to classchars also change nfa_classcodes. */
1896static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1897static int classcodes[] = {
1898 ANY, IDENT, SIDENT, KWORD, SKWORD,
1899 FNAME, SFNAME, PRINT, SPRINT,
1900 WHITE, NWHITE, DIGIT, NDIGIT,
1901 HEX, NHEX, OCTAL, NOCTAL,
1902 WORD, NWORD, HEAD, NHEAD,
1903 ALPHA, NALPHA, LOWER, NLOWER,
1904 UPPER, NUPPER
1905};
1906
Bram Moolenaar071d4272004-06-13 20:20:40 +00001907/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001908 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001909 *
1910 * Optimization: gobbles an entire sequence of ordinary characters so that
1911 * it can turn them into a single node, which is smaller to store and
1912 * faster to run. Don't do this when one_exactly is set.
1913 */
1914 static char_u *
1915regatom(flagp)
1916 int *flagp;
1917{
1918 char_u *ret;
1919 int flags;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001920 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001921 char_u *p;
1922 int extra = 0;
1923
1924 *flagp = WORST; /* Tentatively. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001925
1926 c = getchr();
1927 switch (c)
1928 {
1929 case Magic('^'):
1930 ret = regnode(BOL);
1931 break;
1932
1933 case Magic('$'):
1934 ret = regnode(EOL);
1935#if defined(FEAT_SYN_HL) || defined(PROTO)
1936 had_eol = TRUE;
1937#endif
1938 break;
1939
1940 case Magic('<'):
1941 ret = regnode(BOW);
1942 break;
1943
1944 case Magic('>'):
1945 ret = regnode(EOW);
1946 break;
1947
1948 case Magic('_'):
1949 c = no_Magic(getchr());
1950 if (c == '^') /* "\_^" is start-of-line */
1951 {
1952 ret = regnode(BOL);
1953 break;
1954 }
1955 if (c == '$') /* "\_$" is end-of-line */
1956 {
1957 ret = regnode(EOL);
1958#if defined(FEAT_SYN_HL) || defined(PROTO)
1959 had_eol = TRUE;
1960#endif
1961 break;
1962 }
1963
1964 extra = ADD_NL;
1965 *flagp |= HASNL;
1966
1967 /* "\_[" is character range plus newline */
1968 if (c == '[')
1969 goto collection;
1970
1971 /* "\_x" is character class plus newline */
1972 /*FALLTHROUGH*/
1973
1974 /*
1975 * Character classes.
1976 */
1977 case Magic('.'):
1978 case Magic('i'):
1979 case Magic('I'):
1980 case Magic('k'):
1981 case Magic('K'):
1982 case Magic('f'):
1983 case Magic('F'):
1984 case Magic('p'):
1985 case Magic('P'):
1986 case Magic('s'):
1987 case Magic('S'):
1988 case Magic('d'):
1989 case Magic('D'):
1990 case Magic('x'):
1991 case Magic('X'):
1992 case Magic('o'):
1993 case Magic('O'):
1994 case Magic('w'):
1995 case Magic('W'):
1996 case Magic('h'):
1997 case Magic('H'):
1998 case Magic('a'):
1999 case Magic('A'):
2000 case Magic('l'):
2001 case Magic('L'):
2002 case Magic('u'):
2003 case Magic('U'):
2004 p = vim_strchr(classchars, no_Magic(c));
2005 if (p == NULL)
2006 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002007#ifdef FEAT_MBYTE
2008 /* When '.' is followed by a composing char ignore the dot, so that
2009 * the composing char is matched here. */
2010 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2011 {
2012 c = getchr();
2013 goto do_multibyte;
2014 }
2015#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002016 ret = regnode(classcodes[p - classchars] + extra);
2017 *flagp |= HASWIDTH | SIMPLE;
2018 break;
2019
2020 case Magic('n'):
2021 if (reg_string)
2022 {
2023 /* In a string "\n" matches a newline character. */
2024 ret = regnode(EXACTLY);
2025 regc(NL);
2026 regc(NUL);
2027 *flagp |= HASWIDTH | SIMPLE;
2028 }
2029 else
2030 {
2031 /* In buffer text "\n" matches the end of a line. */
2032 ret = regnode(NEWL);
2033 *flagp |= HASWIDTH | HASNL;
2034 }
2035 break;
2036
2037 case Magic('('):
2038 if (one_exactly)
2039 EMSG_ONE_RET_NULL;
2040 ret = reg(REG_PAREN, &flags);
2041 if (ret == NULL)
2042 return NULL;
2043 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2044 break;
2045
2046 case NUL:
2047 case Magic('|'):
2048 case Magic('&'):
2049 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002050 if (one_exactly)
2051 EMSG_ONE_RET_NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002052 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
2053 /* NOTREACHED */
2054
2055 case Magic('='):
2056 case Magic('?'):
2057 case Magic('+'):
2058 case Magic('@'):
2059 case Magic('{'):
2060 case Magic('*'):
2061 c = no_Magic(c);
2062 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2063 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2064 ? "" : "\\", c);
2065 EMSG_RET_NULL(IObuff);
2066 /* NOTREACHED */
2067
2068 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002069 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002070 {
2071 char_u *lp;
2072
2073 ret = regnode(EXACTLY);
2074 lp = reg_prev_sub;
2075 while (*lp != NUL)
2076 regc(*lp++);
2077 regc(NUL);
2078 if (*reg_prev_sub != NUL)
2079 {
2080 *flagp |= HASWIDTH;
2081 if ((lp - reg_prev_sub) == 1)
2082 *flagp |= SIMPLE;
2083 }
2084 }
2085 else
2086 EMSG_RET_NULL(_(e_nopresub));
2087 break;
2088
2089 case Magic('1'):
2090 case Magic('2'):
2091 case Magic('3'):
2092 case Magic('4'):
2093 case Magic('5'):
2094 case Magic('6'):
2095 case Magic('7'):
2096 case Magic('8'):
2097 case Magic('9'):
2098 {
2099 int refnum;
2100
2101 refnum = c - Magic('0');
2102 /*
2103 * Check if the back reference is legal. We must have seen the
2104 * close brace.
2105 * TODO: Should also check that we don't refer to something
2106 * that is repeated (+*=): what instance of the repetition
2107 * should we match?
2108 */
2109 if (!had_endbrace[refnum])
2110 {
2111 /* Trick: check if "@<=" or "@<!" follows, in which case
2112 * the \1 can appear before the referenced match. */
2113 for (p = regparse; *p != NUL; ++p)
2114 if (p[0] == '@' && p[1] == '<'
2115 && (p[2] == '!' || p[2] == '='))
2116 break;
2117 if (*p == NUL)
2118 EMSG_RET_NULL(_("E65: Illegal back reference"));
2119 }
2120 ret = regnode(BACKREF + refnum);
2121 }
2122 break;
2123
Bram Moolenaar071d4272004-06-13 20:20:40 +00002124 case Magic('z'):
2125 {
2126 c = no_Magic(getchr());
2127 switch (c)
2128 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002129#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002130 case '(': if (reg_do_extmatch != REX_SET)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002131 EMSG_RET_NULL(_(e_z_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002132 if (one_exactly)
2133 EMSG_ONE_RET_NULL;
2134 ret = reg(REG_ZPAREN, &flags);
2135 if (ret == NULL)
2136 return NULL;
2137 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2138 re_has_z = REX_SET;
2139 break;
2140
2141 case '1':
2142 case '2':
2143 case '3':
2144 case '4':
2145 case '5':
2146 case '6':
2147 case '7':
2148 case '8':
2149 case '9': if (reg_do_extmatch != REX_USE)
Bram Moolenaar5de820b2013-06-02 15:01:57 +02002150 EMSG_RET_NULL(_(e_z1_not_allowed));
Bram Moolenaar071d4272004-06-13 20:20:40 +00002151 ret = regnode(ZREF + c - '0');
2152 re_has_z = REX_USE;
2153 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002154#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002155
2156 case 's': ret = regnode(MOPEN + 0);
2157 break;
2158
2159 case 'e': ret = regnode(MCLOSE + 0);
2160 break;
2161
2162 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2163 }
2164 }
2165 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002166
2167 case Magic('%'):
2168 {
2169 c = no_Magic(getchr());
2170 switch (c)
2171 {
2172 /* () without a back reference */
2173 case '(':
2174 if (one_exactly)
2175 EMSG_ONE_RET_NULL;
2176 ret = reg(REG_NPAREN, &flags);
2177 if (ret == NULL)
2178 return NULL;
2179 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2180 break;
2181
2182 /* Catch \%^ and \%$ regardless of where they appear in the
2183 * pattern -- regardless of whether or not it makes sense. */
2184 case '^':
2185 ret = regnode(RE_BOF);
2186 break;
2187
2188 case '$':
2189 ret = regnode(RE_EOF);
2190 break;
2191
2192 case '#':
2193 ret = regnode(CURSOR);
2194 break;
2195
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002196 case 'V':
2197 ret = regnode(RE_VISUAL);
2198 break;
2199
Bram Moolenaar071d4272004-06-13 20:20:40 +00002200 /* \%[abc]: Emit as a list of branches, all ending at the last
2201 * branch which matches nothing. */
2202 case '[':
2203 if (one_exactly) /* doesn't nest */
2204 EMSG_ONE_RET_NULL;
2205 {
2206 char_u *lastbranch;
2207 char_u *lastnode = NULL;
2208 char_u *br;
2209
2210 ret = NULL;
2211 while ((c = getchr()) != ']')
2212 {
2213 if (c == NUL)
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002214 EMSG2_RET_NULL(_(e_missing_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002215 reg_magic == MAGIC_ALL);
2216 br = regnode(BRANCH);
2217 if (ret == NULL)
2218 ret = br;
2219 else
2220 regtail(lastnode, br);
2221
2222 ungetchr();
2223 one_exactly = TRUE;
2224 lastnode = regatom(flagp);
2225 one_exactly = FALSE;
2226 if (lastnode == NULL)
2227 return NULL;
2228 }
2229 if (ret == NULL)
Bram Moolenaar2976c022013-06-05 21:30:37 +02002230 EMSG2_RET_NULL(_(e_empty_sb),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002231 reg_magic == MAGIC_ALL);
2232 lastbranch = regnode(BRANCH);
2233 br = regnode(NOTHING);
2234 if (ret != JUST_CALC_SIZE)
2235 {
2236 regtail(lastnode, br);
2237 regtail(lastbranch, br);
2238 /* connect all branches to the NOTHING
2239 * branch at the end */
2240 for (br = ret; br != lastnode; )
2241 {
2242 if (OP(br) == BRANCH)
2243 {
2244 regtail(br, lastbranch);
2245 br = OPERAND(br);
2246 }
2247 else
2248 br = regnext(br);
2249 }
2250 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002251 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002252 break;
2253 }
2254
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002255 case 'd': /* %d123 decimal */
2256 case 'o': /* %o123 octal */
2257 case 'x': /* %xab hex 2 */
2258 case 'u': /* %uabcd hex 4 */
2259 case 'U': /* %U1234abcd hex 8 */
2260 {
2261 int i;
2262
2263 switch (c)
2264 {
2265 case 'd': i = getdecchrs(); break;
2266 case 'o': i = getoctchrs(); break;
2267 case 'x': i = gethexchrs(2); break;
2268 case 'u': i = gethexchrs(4); break;
2269 case 'U': i = gethexchrs(8); break;
2270 default: i = -1; break;
2271 }
2272
2273 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002274 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002275 _("E678: Invalid character after %s%%[dxouU]"),
2276 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002277#ifdef FEAT_MBYTE
2278 if (use_multibytecode(i))
2279 ret = regnode(MULTIBYTECODE);
2280 else
2281#endif
2282 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002283 if (i == 0)
2284 regc(0x0a);
2285 else
2286#ifdef FEAT_MBYTE
2287 regmbc(i);
2288#else
2289 regc(i);
2290#endif
2291 regc(NUL);
2292 *flagp |= HASWIDTH;
2293 break;
2294 }
2295
Bram Moolenaar071d4272004-06-13 20:20:40 +00002296 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002297 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2298 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002299 {
2300 long_u n = 0;
2301 int cmp;
2302
2303 cmp = c;
2304 if (cmp == '<' || cmp == '>')
2305 c = getchr();
2306 while (VIM_ISDIGIT(c))
2307 {
2308 n = n * 10 + (c - '0');
2309 c = getchr();
2310 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002311 if (c == '\'' && n == 0)
2312 {
2313 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2314 c = getchr();
2315 ret = regnode(RE_MARK);
2316 if (ret == JUST_CALC_SIZE)
2317 regsize += 2;
2318 else
2319 {
2320 *regcode++ = c;
2321 *regcode++ = cmp;
2322 }
2323 break;
2324 }
2325 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002326 {
2327 if (c == 'l')
2328 ret = regnode(RE_LNUM);
2329 else if (c == 'c')
2330 ret = regnode(RE_COL);
2331 else
2332 ret = regnode(RE_VCOL);
2333 if (ret == JUST_CALC_SIZE)
2334 regsize += 5;
2335 else
2336 {
2337 /* put the number and the optional
2338 * comparator after the opcode */
2339 regcode = re_put_long(regcode, n);
2340 *regcode++ = cmp;
2341 }
2342 break;
2343 }
2344 }
2345
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002346 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002347 reg_magic == MAGIC_ALL);
2348 }
2349 }
2350 break;
2351
2352 case Magic('['):
2353collection:
2354 {
2355 char_u *lp;
2356
2357 /*
2358 * If there is no matching ']', we assume the '[' is a normal
2359 * character. This makes 'incsearch' and ":help [" work.
2360 */
2361 lp = skip_anyof(regparse);
2362 if (*lp == ']') /* there is a matching ']' */
2363 {
2364 int startc = -1; /* > 0 when next '-' is a range */
2365 int endc;
2366
2367 /*
2368 * In a character class, different parsing rules apply.
2369 * Not even \ is special anymore, nothing is.
2370 */
2371 if (*regparse == '^') /* Complement of range. */
2372 {
2373 ret = regnode(ANYBUT + extra);
2374 regparse++;
2375 }
2376 else
2377 ret = regnode(ANYOF + extra);
2378
2379 /* At the start ']' and '-' mean the literal character. */
2380 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002381 {
2382 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002383 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002384 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002385
2386 while (*regparse != NUL && *regparse != ']')
2387 {
2388 if (*regparse == '-')
2389 {
2390 ++regparse;
2391 /* The '-' is not used for a range at the end and
2392 * after or before a '\n'. */
2393 if (*regparse == ']' || *regparse == NUL
2394 || startc == -1
2395 || (regparse[0] == '\\' && regparse[1] == 'n'))
2396 {
2397 regc('-');
2398 startc = '-'; /* [--x] is a range */
2399 }
2400 else
2401 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002402 /* Also accept "a-[.z.]" */
2403 endc = 0;
2404 if (*regparse == '[')
2405 endc = get_coll_element(&regparse);
2406 if (endc == 0)
2407 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002408#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002409 if (has_mbyte)
2410 endc = mb_ptr2char_adv(&regparse);
2411 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002412#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002413 endc = *regparse++;
2414 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002415
2416 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002417 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002418 endc = coll_get_char();
2419
Bram Moolenaar071d4272004-06-13 20:20:40 +00002420 if (startc > endc)
2421 EMSG_RET_NULL(_(e_invrange));
2422#ifdef FEAT_MBYTE
2423 if (has_mbyte && ((*mb_char2len)(startc) > 1
2424 || (*mb_char2len)(endc) > 1))
2425 {
2426 /* Limit to a range of 256 chars */
2427 if (endc > startc + 256)
2428 EMSG_RET_NULL(_(e_invrange));
2429 while (++startc <= endc)
2430 regmbc(startc);
2431 }
2432 else
2433#endif
2434 {
2435#ifdef EBCDIC
2436 int alpha_only = FALSE;
2437
2438 /* for alphabetical range skip the gaps
2439 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2440 if (isalpha(startc) && isalpha(endc))
2441 alpha_only = TRUE;
2442#endif
2443 while (++startc <= endc)
2444#ifdef EBCDIC
2445 if (!alpha_only || isalpha(startc))
2446#endif
2447 regc(startc);
2448 }
2449 startc = -1;
2450 }
2451 }
2452 /*
2453 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2454 * accepts "\t", "\e", etc., but only when the 'l' flag in
2455 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002456 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002457 */
2458 else if (*regparse == '\\'
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002459 && !reg_cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002460 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
Bram Moolenaar1cd3f2c2013-06-05 12:43:09 +02002461 || (!reg_cpo_lit
Bram Moolenaar071d4272004-06-13 20:20:40 +00002462 && vim_strchr(REGEXP_ABBR,
2463 regparse[1]) != NULL)))
2464 {
2465 regparse++;
2466 if (*regparse == 'n')
2467 {
2468 /* '\n' in range: also match NL */
2469 if (ret != JUST_CALC_SIZE)
2470 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002471 /* Using \n inside [^] does not change what
2472 * matches. "[^\n]" is the same as ".". */
2473 if (*ret == ANYOF)
2474 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002475 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002476 *flagp |= HASNL;
2477 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002478 /* else: must have had a \n already */
2479 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002480 regparse++;
2481 startc = -1;
2482 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002483 else if (*regparse == 'd'
2484 || *regparse == 'o'
2485 || *regparse == 'x'
2486 || *regparse == 'u'
2487 || *regparse == 'U')
2488 {
2489 startc = coll_get_char();
2490 if (startc == 0)
2491 regc(0x0a);
2492 else
2493#ifdef FEAT_MBYTE
2494 regmbc(startc);
2495#else
2496 regc(startc);
2497#endif
2498 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002499 else
2500 {
2501 startc = backslash_trans(*regparse++);
2502 regc(startc);
2503 }
2504 }
2505 else if (*regparse == '[')
2506 {
2507 int c_class;
2508 int cu;
2509
Bram Moolenaardf177f62005-02-22 08:39:57 +00002510 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002511 startc = -1;
2512 /* Characters assumed to be 8 bits! */
2513 switch (c_class)
2514 {
2515 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002516 c_class = get_equi_class(&regparse);
2517 if (c_class != 0)
2518 {
2519 /* produce equivalence class */
2520 reg_equi_class(c_class);
2521 }
2522 else if ((c_class =
2523 get_coll_element(&regparse)) != 0)
2524 {
2525 /* produce a collating element */
2526 regmbc(c_class);
2527 }
2528 else
2529 {
2530 /* literal '[', allow [[-x] as a range */
2531 startc = *regparse++;
2532 regc(startc);
2533 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002534 break;
2535 case CLASS_ALNUM:
2536 for (cu = 1; cu <= 255; cu++)
2537 if (isalnum(cu))
2538 regc(cu);
2539 break;
2540 case CLASS_ALPHA:
2541 for (cu = 1; cu <= 255; cu++)
2542 if (isalpha(cu))
2543 regc(cu);
2544 break;
2545 case CLASS_BLANK:
2546 regc(' ');
2547 regc('\t');
2548 break;
2549 case CLASS_CNTRL:
2550 for (cu = 1; cu <= 255; cu++)
2551 if (iscntrl(cu))
2552 regc(cu);
2553 break;
2554 case CLASS_DIGIT:
2555 for (cu = 1; cu <= 255; cu++)
2556 if (VIM_ISDIGIT(cu))
2557 regc(cu);
2558 break;
2559 case CLASS_GRAPH:
2560 for (cu = 1; cu <= 255; cu++)
2561 if (isgraph(cu))
2562 regc(cu);
2563 break;
2564 case CLASS_LOWER:
2565 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002566 if (MB_ISLOWER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002567 regc(cu);
2568 break;
2569 case CLASS_PRINT:
2570 for (cu = 1; cu <= 255; cu++)
2571 if (vim_isprintc(cu))
2572 regc(cu);
2573 break;
2574 case CLASS_PUNCT:
2575 for (cu = 1; cu <= 255; cu++)
2576 if (ispunct(cu))
2577 regc(cu);
2578 break;
2579 case CLASS_SPACE:
2580 for (cu = 9; cu <= 13; cu++)
2581 regc(cu);
2582 regc(' ');
2583 break;
2584 case CLASS_UPPER:
2585 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002586 if (MB_ISUPPER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002587 regc(cu);
2588 break;
2589 case CLASS_XDIGIT:
2590 for (cu = 1; cu <= 255; cu++)
2591 if (vim_isxdigit(cu))
2592 regc(cu);
2593 break;
2594 case CLASS_TAB:
2595 regc('\t');
2596 break;
2597 case CLASS_RETURN:
2598 regc('\r');
2599 break;
2600 case CLASS_BACKSPACE:
2601 regc('\b');
2602 break;
2603 case CLASS_ESCAPE:
2604 regc('\033');
2605 break;
2606 }
2607 }
2608 else
2609 {
2610#ifdef FEAT_MBYTE
2611 if (has_mbyte)
2612 {
2613 int len;
2614
2615 /* produce a multibyte character, including any
2616 * following composing characters */
2617 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002618 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002619 if (enc_utf8 && utf_char2len(startc) != len)
2620 startc = -1; /* composing chars */
2621 while (--len >= 0)
2622 regc(*regparse++);
2623 }
2624 else
2625#endif
2626 {
2627 startc = *regparse++;
2628 regc(startc);
2629 }
2630 }
2631 }
2632 regc(NUL);
2633 prevchr_len = 1; /* last char was the ']' */
2634 if (*regparse != ']')
2635 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2636 skipchr(); /* let's be friends with the lexer again */
2637 *flagp |= HASWIDTH | SIMPLE;
2638 break;
2639 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002640 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002641 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002642 }
2643 /* FALLTHROUGH */
2644
2645 default:
2646 {
2647 int len;
2648
2649#ifdef FEAT_MBYTE
2650 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002651 * before a multi and when it's a composing char. */
2652 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002653 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002654do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002655 ret = regnode(MULTIBYTECODE);
2656 regmbc(c);
2657 *flagp |= HASWIDTH | SIMPLE;
2658 break;
2659 }
2660#endif
2661
2662 ret = regnode(EXACTLY);
2663
2664 /*
2665 * Append characters as long as:
2666 * - there is no following multi, we then need the character in
2667 * front of it as a single character operand
2668 * - not running into a Magic character
2669 * - "one_exactly" is not set
2670 * But always emit at least one character. Might be a Multi,
2671 * e.g., a "[" without matching "]".
2672 */
2673 for (len = 0; c != NUL && (len == 0
2674 || (re_multi_type(peekchr()) == NOT_MULTI
2675 && !one_exactly
2676 && !is_Magic(c))); ++len)
2677 {
2678 c = no_Magic(c);
2679#ifdef FEAT_MBYTE
2680 if (has_mbyte)
2681 {
2682 regmbc(c);
2683 if (enc_utf8)
2684 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002685 int l;
2686
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002687 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002688 for (;;)
2689 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002690 l = utf_ptr2len(regparse);
2691 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002692 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002693 regmbc(utf_ptr2char(regparse));
2694 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002695 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002696 }
2697 }
2698 else
2699#endif
2700 regc(c);
2701 c = getchr();
2702 }
2703 ungetchr();
2704
2705 regc(NUL);
2706 *flagp |= HASWIDTH;
2707 if (len == 1)
2708 *flagp |= SIMPLE;
2709 }
2710 break;
2711 }
2712
2713 return ret;
2714}
2715
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002716#ifdef FEAT_MBYTE
2717/*
2718 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2719 * character "c".
2720 */
2721 static int
2722use_multibytecode(c)
2723 int c;
2724{
2725 return has_mbyte && (*mb_char2len)(c) > 1
2726 && (re_multi_type(peekchr()) != NOT_MULTI
2727 || (enc_utf8 && utf_iscomposing(c)));
2728}
2729#endif
2730
Bram Moolenaar071d4272004-06-13 20:20:40 +00002731/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002732 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002733 * Return pointer to generated code.
2734 */
2735 static char_u *
2736regnode(op)
2737 int op;
2738{
2739 char_u *ret;
2740
2741 ret = regcode;
2742 if (ret == JUST_CALC_SIZE)
2743 regsize += 3;
2744 else
2745 {
2746 *regcode++ = op;
2747 *regcode++ = NUL; /* Null "next" pointer. */
2748 *regcode++ = NUL;
2749 }
2750 return ret;
2751}
2752
2753/*
2754 * Emit (if appropriate) a byte of code
2755 */
2756 static void
2757regc(b)
2758 int b;
2759{
2760 if (regcode == JUST_CALC_SIZE)
2761 regsize++;
2762 else
2763 *regcode++ = b;
2764}
2765
2766#ifdef FEAT_MBYTE
2767/*
2768 * Emit (if appropriate) a multi-byte character of code
2769 */
2770 static void
2771regmbc(c)
2772 int c;
2773{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002774 if (!has_mbyte && c > 0xff)
2775 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002776 if (regcode == JUST_CALC_SIZE)
2777 regsize += (*mb_char2len)(c);
2778 else
2779 regcode += (*mb_char2bytes)(c, regcode);
2780}
2781#endif
2782
2783/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002784 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002785 *
2786 * Means relocating the operand.
2787 */
2788 static void
2789reginsert(op, opnd)
2790 int op;
2791 char_u *opnd;
2792{
2793 char_u *src;
2794 char_u *dst;
2795 char_u *place;
2796
2797 if (regcode == JUST_CALC_SIZE)
2798 {
2799 regsize += 3;
2800 return;
2801 }
2802 src = regcode;
2803 regcode += 3;
2804 dst = regcode;
2805 while (src > opnd)
2806 *--dst = *--src;
2807
2808 place = opnd; /* Op node, where operand used to be. */
2809 *place++ = op;
2810 *place++ = NUL;
2811 *place = NUL;
2812}
2813
2814/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002815 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002816 * Add a number to the operator.
2817 */
2818 static void
2819reginsert_nr(op, val, opnd)
2820 int op;
2821 long val;
2822 char_u *opnd;
2823{
2824 char_u *src;
2825 char_u *dst;
2826 char_u *place;
2827
2828 if (regcode == JUST_CALC_SIZE)
2829 {
2830 regsize += 7;
2831 return;
2832 }
2833 src = regcode;
2834 regcode += 7;
2835 dst = regcode;
2836 while (src > opnd)
2837 *--dst = *--src;
2838
2839 place = opnd; /* Op node, where operand used to be. */
2840 *place++ = op;
2841 *place++ = NUL;
2842 *place++ = NUL;
2843 place = re_put_long(place, (long_u)val);
2844}
2845
2846/*
2847 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002848 * The operator has the given limit values as operands. Also set next pointer.
2849 *
2850 * Means relocating the operand.
2851 */
2852 static void
2853reginsert_limits(op, minval, maxval, opnd)
2854 int op;
2855 long minval;
2856 long maxval;
2857 char_u *opnd;
2858{
2859 char_u *src;
2860 char_u *dst;
2861 char_u *place;
2862
2863 if (regcode == JUST_CALC_SIZE)
2864 {
2865 regsize += 11;
2866 return;
2867 }
2868 src = regcode;
2869 regcode += 11;
2870 dst = regcode;
2871 while (src > opnd)
2872 *--dst = *--src;
2873
2874 place = opnd; /* Op node, where operand used to be. */
2875 *place++ = op;
2876 *place++ = NUL;
2877 *place++ = NUL;
2878 place = re_put_long(place, (long_u)minval);
2879 place = re_put_long(place, (long_u)maxval);
2880 regtail(opnd, place);
2881}
2882
2883/*
2884 * Write a long as four bytes at "p" and return pointer to the next char.
2885 */
2886 static char_u *
2887re_put_long(p, val)
2888 char_u *p;
2889 long_u val;
2890{
2891 *p++ = (char_u) ((val >> 24) & 0377);
2892 *p++ = (char_u) ((val >> 16) & 0377);
2893 *p++ = (char_u) ((val >> 8) & 0377);
2894 *p++ = (char_u) (val & 0377);
2895 return p;
2896}
2897
2898/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002899 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002900 */
2901 static void
2902regtail(p, val)
2903 char_u *p;
2904 char_u *val;
2905{
2906 char_u *scan;
2907 char_u *temp;
2908 int offset;
2909
2910 if (p == JUST_CALC_SIZE)
2911 return;
2912
2913 /* Find last node. */
2914 scan = p;
2915 for (;;)
2916 {
2917 temp = regnext(scan);
2918 if (temp == NULL)
2919 break;
2920 scan = temp;
2921 }
2922
Bram Moolenaar582fd852005-03-28 20:58:01 +00002923 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002924 offset = (int)(scan - val);
2925 else
2926 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002927 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002928 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002929 * values in too many places. */
2930 if (offset > 0xffff)
2931 reg_toolong = TRUE;
2932 else
2933 {
2934 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2935 *(scan + 2) = (char_u) (offset & 0377);
2936 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002937}
2938
2939/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002940 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002941 */
2942 static void
2943regoptail(p, val)
2944 char_u *p;
2945 char_u *val;
2946{
2947 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2948 if (p == NULL || p == JUST_CALC_SIZE
2949 || (OP(p) != BRANCH
2950 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2951 return;
2952 regtail(OPERAND(p), val);
2953}
2954
2955/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002956 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002957 */
2958
Bram Moolenaar071d4272004-06-13 20:20:40 +00002959static int at_start; /* True when on the first character */
2960static int prev_at_start; /* True when on the second character */
2961
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002962/*
2963 * Start parsing at "str".
2964 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002965 static void
2966initchr(str)
2967 char_u *str;
2968{
2969 regparse = str;
2970 prevchr_len = 0;
2971 curchr = prevprevchr = prevchr = nextchr = -1;
2972 at_start = TRUE;
2973 prev_at_start = FALSE;
2974}
2975
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002976/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002977 * Save the current parse state, so that it can be restored and parsing
2978 * starts in the same state again.
2979 */
2980 static void
2981save_parse_state(ps)
2982 parse_state_T *ps;
2983{
2984 ps->regparse = regparse;
2985 ps->prevchr_len = prevchr_len;
2986 ps->curchr = curchr;
2987 ps->prevchr = prevchr;
2988 ps->prevprevchr = prevprevchr;
2989 ps->nextchr = nextchr;
2990 ps->at_start = at_start;
2991 ps->prev_at_start = prev_at_start;
2992 ps->regnpar = regnpar;
2993}
2994
2995/*
2996 * Restore a previously saved parse state.
2997 */
2998 static void
2999restore_parse_state(ps)
3000 parse_state_T *ps;
3001{
3002 regparse = ps->regparse;
3003 prevchr_len = ps->prevchr_len;
3004 curchr = ps->curchr;
3005 prevchr = ps->prevchr;
3006 prevprevchr = ps->prevprevchr;
3007 nextchr = ps->nextchr;
3008 at_start = ps->at_start;
3009 prev_at_start = ps->prev_at_start;
3010 regnpar = ps->regnpar;
3011}
3012
3013
3014/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003015 * Get the next character without advancing.
3016 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003017 static int
3018peekchr()
3019{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003020 static int after_slash = FALSE;
3021
Bram Moolenaar071d4272004-06-13 20:20:40 +00003022 if (curchr == -1)
3023 {
3024 switch (curchr = regparse[0])
3025 {
3026 case '.':
3027 case '[':
3028 case '~':
3029 /* magic when 'magic' is on */
3030 if (reg_magic >= MAGIC_ON)
3031 curchr = Magic(curchr);
3032 break;
3033 case '(':
3034 case ')':
3035 case '{':
3036 case '%':
3037 case '+':
3038 case '=':
3039 case '?':
3040 case '@':
3041 case '!':
3042 case '&':
3043 case '|':
3044 case '<':
3045 case '>':
3046 case '#': /* future ext. */
3047 case '"': /* future ext. */
3048 case '\'': /* future ext. */
3049 case ',': /* future ext. */
3050 case '-': /* future ext. */
3051 case ':': /* future ext. */
3052 case ';': /* future ext. */
3053 case '`': /* future ext. */
3054 case '/': /* Can't be used in / command */
3055 /* magic only after "\v" */
3056 if (reg_magic == MAGIC_ALL)
3057 curchr = Magic(curchr);
3058 break;
3059 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003060 /* * is not magic as the very first character, eg "?*ptr", when
3061 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3062 * "\(\*" is not magic, thus must be magic if "after_slash" */
3063 if (reg_magic >= MAGIC_ON
3064 && !at_start
3065 && !(prev_at_start && prevchr == Magic('^'))
3066 && (after_slash
3067 || (prevchr != Magic('(')
3068 && prevchr != Magic('&')
3069 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003070 curchr = Magic('*');
3071 break;
3072 case '^':
3073 /* '^' is only magic as the very first character and if it's after
3074 * "\(", "\|", "\&' or "\n" */
3075 if (reg_magic >= MAGIC_OFF
3076 && (at_start
3077 || reg_magic == MAGIC_ALL
3078 || prevchr == Magic('(')
3079 || prevchr == Magic('|')
3080 || prevchr == Magic('&')
3081 || prevchr == Magic('n')
3082 || (no_Magic(prevchr) == '('
3083 && prevprevchr == Magic('%'))))
3084 {
3085 curchr = Magic('^');
3086 at_start = TRUE;
3087 prev_at_start = FALSE;
3088 }
3089 break;
3090 case '$':
3091 /* '$' is only magic as the very last char and if it's in front of
3092 * either "\|", "\)", "\&", or "\n" */
3093 if (reg_magic >= MAGIC_OFF)
3094 {
3095 char_u *p = regparse + 1;
3096
3097 /* ignore \c \C \m and \M after '$' */
3098 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
3099 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
3100 p += 2;
3101 if (p[0] == NUL
3102 || (p[0] == '\\'
3103 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3104 || p[1] == 'n'))
3105 || reg_magic == MAGIC_ALL)
3106 curchr = Magic('$');
3107 }
3108 break;
3109 case '\\':
3110 {
3111 int c = regparse[1];
3112
3113 if (c == NUL)
3114 curchr = '\\'; /* trailing '\' */
3115 else if (
3116#ifdef EBCDIC
3117 vim_strchr(META, c)
3118#else
3119 c <= '~' && META_flags[c]
3120#endif
3121 )
3122 {
3123 /*
3124 * META contains everything that may be magic sometimes,
3125 * except ^ and $ ("\^" and "\$" are only magic after
3126 * "\v"). We now fetch the next character and toggle its
3127 * magicness. Therefore, \ is so meta-magic that it is
3128 * not in META.
3129 */
3130 curchr = -1;
3131 prev_at_start = at_start;
3132 at_start = FALSE; /* be able to say "/\*ptr" */
3133 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003134 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003135 peekchr();
3136 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003137 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003138 curchr = toggle_Magic(curchr);
3139 }
3140 else if (vim_strchr(REGEXP_ABBR, c))
3141 {
3142 /*
3143 * Handle abbreviations, like "\t" for TAB -- webb
3144 */
3145 curchr = backslash_trans(c);
3146 }
3147 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3148 curchr = toggle_Magic(c);
3149 else
3150 {
3151 /*
3152 * Next character can never be (made) magic?
3153 * Then backslashing it won't do anything.
3154 */
3155#ifdef FEAT_MBYTE
3156 if (has_mbyte)
3157 curchr = (*mb_ptr2char)(regparse + 1);
3158 else
3159#endif
3160 curchr = c;
3161 }
3162 break;
3163 }
3164
3165#ifdef FEAT_MBYTE
3166 default:
3167 if (has_mbyte)
3168 curchr = (*mb_ptr2char)(regparse);
3169#endif
3170 }
3171 }
3172
3173 return curchr;
3174}
3175
3176/*
3177 * Eat one lexed character. Do this in a way that we can undo it.
3178 */
3179 static void
3180skipchr()
3181{
3182 /* peekchr() eats a backslash, do the same here */
3183 if (*regparse == '\\')
3184 prevchr_len = 1;
3185 else
3186 prevchr_len = 0;
3187 if (regparse[prevchr_len] != NUL)
3188 {
3189#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003190 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003191 /* exclude composing chars that mb_ptr2len does include */
3192 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003193 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003194 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003195 else
3196#endif
3197 ++prevchr_len;
3198 }
3199 regparse += prevchr_len;
3200 prev_at_start = at_start;
3201 at_start = FALSE;
3202 prevprevchr = prevchr;
3203 prevchr = curchr;
3204 curchr = nextchr; /* use previously unget char, or -1 */
3205 nextchr = -1;
3206}
3207
3208/*
3209 * Skip a character while keeping the value of prev_at_start for at_start.
3210 * prevchr and prevprevchr are also kept.
3211 */
3212 static void
3213skipchr_keepstart()
3214{
3215 int as = prev_at_start;
3216 int pr = prevchr;
3217 int prpr = prevprevchr;
3218
3219 skipchr();
3220 at_start = as;
3221 prevchr = pr;
3222 prevprevchr = prpr;
3223}
3224
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003225/*
3226 * Get the next character from the pattern. We know about magic and such, so
3227 * therefore we need a lexical analyzer.
3228 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003229 static int
3230getchr()
3231{
3232 int chr = peekchr();
3233
3234 skipchr();
3235 return chr;
3236}
3237
3238/*
3239 * put character back. Works only once!
3240 */
3241 static void
3242ungetchr()
3243{
3244 nextchr = curchr;
3245 curchr = prevchr;
3246 prevchr = prevprevchr;
3247 at_start = prev_at_start;
3248 prev_at_start = FALSE;
3249
3250 /* Backup regparse, so that it's at the same position as before the
3251 * getchr(). */
3252 regparse -= prevchr_len;
3253}
3254
3255/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003256 * Get and return the value of the hex string at the current position.
3257 * Return -1 if there is no valid hex number.
3258 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003259 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003260 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003261 * The parameter controls the maximum number of input characters. This will be
3262 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3263 */
3264 static int
3265gethexchrs(maxinputlen)
3266 int maxinputlen;
3267{
3268 int nr = 0;
3269 int c;
3270 int i;
3271
3272 for (i = 0; i < maxinputlen; ++i)
3273 {
3274 c = regparse[0];
3275 if (!vim_isxdigit(c))
3276 break;
3277 nr <<= 4;
3278 nr |= hex2nr(c);
3279 ++regparse;
3280 }
3281
3282 if (i == 0)
3283 return -1;
3284 return nr;
3285}
3286
3287/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003288 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003289 * current position. Return -1 for invalid. Consumes all digits.
3290 */
3291 static int
3292getdecchrs()
3293{
3294 int nr = 0;
3295 int c;
3296 int i;
3297
3298 for (i = 0; ; ++i)
3299 {
3300 c = regparse[0];
3301 if (c < '0' || c > '9')
3302 break;
3303 nr *= 10;
3304 nr += c - '0';
3305 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003306 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003307 }
3308
3309 if (i == 0)
3310 return -1;
3311 return nr;
3312}
3313
3314/*
3315 * get and return the value of the octal string immediately after the current
3316 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3317 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3318 * treat 8 or 9 as recognised characters. Position is updated:
3319 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003320 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003321 */
3322 static int
3323getoctchrs()
3324{
3325 int nr = 0;
3326 int c;
3327 int i;
3328
3329 for (i = 0; i < 3 && nr < 040; ++i)
3330 {
3331 c = regparse[0];
3332 if (c < '0' || c > '7')
3333 break;
3334 nr <<= 3;
3335 nr |= hex2nr(c);
3336 ++regparse;
3337 }
3338
3339 if (i == 0)
3340 return -1;
3341 return nr;
3342}
3343
3344/*
3345 * Get a number after a backslash that is inside [].
3346 * When nothing is recognized return a backslash.
3347 */
3348 static int
3349coll_get_char()
3350{
3351 int nr = -1;
3352
3353 switch (*regparse++)
3354 {
3355 case 'd': nr = getdecchrs(); break;
3356 case 'o': nr = getoctchrs(); break;
3357 case 'x': nr = gethexchrs(2); break;
3358 case 'u': nr = gethexchrs(4); break;
3359 case 'U': nr = gethexchrs(8); break;
3360 }
3361 if (nr < 0)
3362 {
3363 /* If getting the number fails be backwards compatible: the character
3364 * is a backslash. */
3365 --regparse;
3366 nr = '\\';
3367 }
3368 return nr;
3369}
3370
3371/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003372 * read_limits - Read two integers to be taken as a minimum and maximum.
3373 * If the first character is '-', then the range is reversed.
3374 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3375 * missing, a very big number is the default.
3376 */
3377 static int
3378read_limits(minval, maxval)
3379 long *minval;
3380 long *maxval;
3381{
3382 int reverse = FALSE;
3383 char_u *first_char;
3384 long tmp;
3385
3386 if (*regparse == '-')
3387 {
3388 /* Starts with '-', so reverse the range later */
3389 regparse++;
3390 reverse = TRUE;
3391 }
3392 first_char = regparse;
3393 *minval = getdigits(&regparse);
3394 if (*regparse == ',') /* There is a comma */
3395 {
3396 if (vim_isdigit(*++regparse))
3397 *maxval = getdigits(&regparse);
3398 else
3399 *maxval = MAX_LIMIT;
3400 }
3401 else if (VIM_ISDIGIT(*first_char))
3402 *maxval = *minval; /* It was \{n} or \{-n} */
3403 else
3404 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3405 if (*regparse == '\\')
3406 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003407 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003408 {
3409 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3410 reg_magic == MAGIC_ALL ? "" : "\\");
3411 EMSG_RET_FAIL(IObuff);
3412 }
3413
3414 /*
3415 * Reverse the range if there was a '-', or make sure it is in the right
3416 * order otherwise.
3417 */
3418 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3419 {
3420 tmp = *minval;
3421 *minval = *maxval;
3422 *maxval = tmp;
3423 }
3424 skipchr(); /* let's be friends with the lexer again */
3425 return OK;
3426}
3427
3428/*
3429 * vim_regexec and friends
3430 */
3431
3432/*
3433 * Global work variables for vim_regexec().
3434 */
3435
3436/* The current match-position is remembered with these variables: */
3437static linenr_T reglnum; /* line number, relative to first line */
3438static char_u *regline; /* start of current line */
3439static char_u *reginput; /* current input, points into "regline" */
3440
3441static int need_clear_subexpr; /* subexpressions still need to be
3442 * cleared */
3443#ifdef FEAT_SYN_HL
3444static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3445 * still need to be cleared */
3446#endif
3447
Bram Moolenaar071d4272004-06-13 20:20:40 +00003448/*
3449 * Structure used to save the current input state, when it needs to be
3450 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003451 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003452 */
3453typedef struct
3454{
3455 union
3456 {
3457 char_u *ptr; /* reginput pointer, for single-line regexp */
3458 lpos_T pos; /* reginput pos, for multi-line regexp */
3459 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003460 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003461} regsave_T;
3462
3463/* struct to save start/end pointer/position in for \(\) */
3464typedef struct
3465{
3466 union
3467 {
3468 char_u *ptr;
3469 lpos_T pos;
3470 } se_u;
3471} save_se_T;
3472
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003473/* used for BEHIND and NOBEHIND matching */
3474typedef struct regbehind_S
3475{
3476 regsave_T save_after;
3477 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003478 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003479 save_se_T save_start[NSUBEXP];
3480 save_se_T save_end[NSUBEXP];
3481} regbehind_T;
3482
Bram Moolenaar071d4272004-06-13 20:20:40 +00003483static char_u *reg_getline __ARGS((linenr_T lnum));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003484static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3485static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003486static void cleanup_subexpr __ARGS((void));
3487#ifdef FEAT_SYN_HL
3488static void cleanup_zsubexpr __ARGS((void));
3489#endif
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003490static void save_subexpr __ARGS((regbehind_T *bp));
3491static void restore_subexpr __ARGS((regbehind_T *bp));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003492static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003493static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3494static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003495static int reg_save_equal __ARGS((regsave_T *save));
3496static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3497static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3498
3499/* Save the sub-expressions before attempting a match. */
3500#define save_se(savep, posp, pp) \
3501 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3502
3503/* After a failed match restore the sub-expressions. */
3504#define restore_se(savep, posp, pp) { \
3505 if (REG_MULTI) \
3506 *(posp) = (savep)->se_u.pos; \
3507 else \
3508 *(pp) = (savep)->se_u.ptr; }
3509
3510static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003511static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003512static int regrepeat __ARGS((char_u *p, long maxcount));
3513
3514#ifdef DEBUG
3515int regnarrate = 0;
3516#endif
3517
3518/*
3519 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3520 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3521 * contains '\c' or '\C' the value is overruled.
3522 */
3523static int ireg_ic;
3524
3525#ifdef FEAT_MBYTE
3526/*
3527 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3528 * in the regexp. Defaults to false, always.
3529 */
3530static int ireg_icombine;
3531#endif
3532
3533/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003534 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3535 * there is no maximum.
3536 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003537static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003538
3539/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003540 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3541 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003542 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003543 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003544static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003545static unsigned reg_tofreelen;
3546
3547/*
3548 * These variables are set when executing a regexp to speed up the execution.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003549 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003550 * done:
3551 * single-line multi-line
3552 * reg_match &regmatch_T NULL
3553 * reg_mmatch NULL &regmmatch_T
3554 * reg_startp reg_match->startp <invalid>
3555 * reg_endp reg_match->endp <invalid>
3556 * reg_startpos <invalid> reg_mmatch->startpos
3557 * reg_endpos <invalid> reg_mmatch->endpos
3558 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003559 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003560 * reg_firstlnum <invalid> first line in which to search
3561 * reg_maxline 0 last line nr
3562 * reg_line_lbr FALSE or TRUE FALSE
3563 */
3564static regmatch_T *reg_match;
3565static regmmatch_T *reg_mmatch;
3566static char_u **reg_startp = NULL;
3567static char_u **reg_endp = NULL;
3568static lpos_T *reg_startpos = NULL;
3569static lpos_T *reg_endpos = NULL;
3570static win_T *reg_win;
3571static buf_T *reg_buf;
3572static linenr_T reg_firstlnum;
3573static linenr_T reg_maxline;
3574static int reg_line_lbr; /* "\n" in string is line break */
3575
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003576/* Values for rs_state in regitem_T. */
3577typedef enum regstate_E
3578{
3579 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3580 , RS_MOPEN /* MOPEN + [0-9] */
3581 , RS_MCLOSE /* MCLOSE + [0-9] */
3582#ifdef FEAT_SYN_HL
3583 , RS_ZOPEN /* ZOPEN + [0-9] */
3584 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3585#endif
3586 , RS_BRANCH /* BRANCH */
3587 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3588 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3589 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3590 , RS_NOMATCH /* NOMATCH */
3591 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3592 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3593 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3594 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3595} regstate_T;
3596
3597/*
3598 * When there are alternatives a regstate_T is put on the regstack to remember
3599 * what we are doing.
3600 * Before it may be another type of item, depending on rs_state, to remember
3601 * more things.
3602 */
3603typedef struct regitem_S
3604{
3605 regstate_T rs_state; /* what we are doing, one of RS_ above */
3606 char_u *rs_scan; /* current node in program */
3607 union
3608 {
3609 save_se_T sesave;
3610 regsave_T regsave;
3611 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003612 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003613} regitem_T;
3614
3615static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3616static void regstack_pop __ARGS((char_u **scan));
3617
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003618/* used for STAR, PLUS and BRACE_SIMPLE matching */
3619typedef struct regstar_S
3620{
3621 int nextb; /* next byte */
3622 int nextb_ic; /* next byte reverse case */
3623 long count;
3624 long minval;
3625 long maxval;
3626} regstar_T;
3627
3628/* used to store input position when a BACK was encountered, so that we now if
3629 * we made any progress since the last time. */
3630typedef struct backpos_S
3631{
3632 char_u *bp_scan; /* "scan" where BACK was encountered */
3633 regsave_T bp_pos; /* last input position */
3634} backpos_T;
3635
3636/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003637 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3638 * to avoid invoking malloc() and free() often.
3639 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3640 * or regbehind_T.
3641 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003642 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003643static garray_T regstack = {0, 0, 0, 0, NULL};
3644static garray_T backpos = {0, 0, 0, 0, NULL};
3645
3646/*
3647 * Both for regstack and backpos tables we use the following strategy of
3648 * allocation (to reduce malloc/free calls):
3649 * - Initial size is fairly small.
3650 * - When needed, the tables are grown bigger (8 times at first, double after
3651 * that).
3652 * - After executing the match we free the memory only if the array has grown.
3653 * Thus the memory is kept allocated when it's at the initial size.
3654 * This makes it fast while not keeping a lot of memory allocated.
3655 * A three times speed increase was observed when using many simple patterns.
3656 */
3657#define REGSTACK_INITIAL 2048
3658#define BACKPOS_INITIAL 64
3659
3660#if defined(EXITFREE) || defined(PROTO)
3661 void
3662free_regexp_stuff()
3663{
3664 ga_clear(&regstack);
3665 ga_clear(&backpos);
3666 vim_free(reg_tofree);
3667 vim_free(reg_prev_sub);
3668}
3669#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003670
Bram Moolenaar071d4272004-06-13 20:20:40 +00003671/*
3672 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3673 */
3674 static char_u *
3675reg_getline(lnum)
3676 linenr_T lnum;
3677{
3678 /* when looking behind for a match/no-match lnum is negative. But we
3679 * can't go before line 1 */
3680 if (reg_firstlnum + lnum < 1)
3681 return NULL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00003682 if (lnum > reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003683 /* Must have matched the "\n" in the last line. */
3684 return (char_u *)"";
Bram Moolenaar071d4272004-06-13 20:20:40 +00003685 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3686}
3687
3688static regsave_T behind_pos;
3689
3690#ifdef FEAT_SYN_HL
3691static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3692static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3693static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3694static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3695#endif
3696
3697/* TRUE if using multi-line regexp. */
3698#define REG_MULTI (reg_match == NULL)
3699
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003700static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
3701
Bram Moolenaar071d4272004-06-13 20:20:40 +00003702/*
3703 * Match a regexp against a string.
3704 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3705 * Uses curbuf for line count and 'iskeyword'.
3706 *
3707 * Return TRUE if there is a match, FALSE if not.
3708 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003709 static int
3710bt_regexec(rmp, line, col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003711 regmatch_T *rmp;
3712 char_u *line; /* string to match against */
3713 colnr_T col; /* column to start looking for match */
3714{
3715 reg_match = rmp;
3716 reg_mmatch = NULL;
3717 reg_maxline = 0;
3718 reg_line_lbr = FALSE;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003719 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003720 reg_win = NULL;
3721 ireg_ic = rmp->rm_ic;
3722#ifdef FEAT_MBYTE
3723 ireg_icombine = FALSE;
3724#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003725 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003726 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003727}
3728
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003729#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3730 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003731
3732static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
3733
Bram Moolenaar071d4272004-06-13 20:20:40 +00003734/*
3735 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3736 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003737 static int
3738bt_regexec_nl(rmp, line, col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003739 regmatch_T *rmp;
3740 char_u *line; /* string to match against */
3741 colnr_T col; /* column to start looking for match */
3742{
3743 reg_match = rmp;
3744 reg_mmatch = NULL;
3745 reg_maxline = 0;
3746 reg_line_lbr = TRUE;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003747 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003748 reg_win = NULL;
3749 ireg_ic = rmp->rm_ic;
3750#ifdef FEAT_MBYTE
3751 ireg_icombine = FALSE;
3752#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003753 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003754 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003755}
3756#endif
3757
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003758static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
3759
Bram Moolenaar071d4272004-06-13 20:20:40 +00003760/*
3761 * Match a regexp against multiple lines.
3762 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3763 * Uses curbuf for line count and 'iskeyword'.
3764 *
3765 * Return zero if there is no match. Return number of lines contained in the
3766 * match otherwise.
3767 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003768 static long
3769bt_regexec_multi(rmp, win, buf, lnum, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003770 regmmatch_T *rmp;
3771 win_T *win; /* window in which to search or NULL */
3772 buf_T *buf; /* buffer in which to search */
3773 linenr_T lnum; /* nr of line to start looking for match */
3774 colnr_T col; /* column to start looking for match */
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003775 proftime_T *tm; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003776{
3777 long r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003778
3779 reg_match = NULL;
3780 reg_mmatch = rmp;
3781 reg_buf = buf;
3782 reg_win = win;
3783 reg_firstlnum = lnum;
3784 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3785 reg_line_lbr = FALSE;
3786 ireg_ic = rmp->rmm_ic;
3787#ifdef FEAT_MBYTE
3788 ireg_icombine = FALSE;
3789#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003790 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003791
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003792 r = bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003793
3794 return r;
3795}
3796
3797/*
3798 * Match a regexp against a string ("line" points to the string) or multiple
3799 * lines ("line" is NULL, use reg_getline()).
3800 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003801 static long
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003802bt_regexec_both(line, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003803 char_u *line;
3804 colnr_T col; /* column to start looking for match */
Bram Moolenaar78a15312009-05-15 19:33:18 +00003805 proftime_T *tm UNUSED; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003806{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003807 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003808 char_u *s;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003809 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003810
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003811 /* Create "regstack" and "backpos" if they are not allocated yet.
3812 * We allocate *_INITIAL amount of bytes first and then set the grow size
3813 * to much bigger value to avoid many malloc calls in case of deep regular
3814 * expressions. */
3815 if (regstack.ga_data == NULL)
3816 {
3817 /* Use an item size of 1 byte, since we push different things
3818 * onto the regstack. */
3819 ga_init2(&regstack, 1, REGSTACK_INITIAL);
3820 ga_grow(&regstack, REGSTACK_INITIAL);
3821 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3822 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003823
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003824 if (backpos.ga_data == NULL)
3825 {
3826 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3827 ga_grow(&backpos, BACKPOS_INITIAL);
3828 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3829 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003830
Bram Moolenaar071d4272004-06-13 20:20:40 +00003831 if (REG_MULTI)
3832 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003833 prog = (bt_regprog_T *)reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003834 line = reg_getline((linenr_T)0);
3835 reg_startpos = reg_mmatch->startpos;
3836 reg_endpos = reg_mmatch->endpos;
3837 }
3838 else
3839 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003840 prog = (bt_regprog_T *)reg_match->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003841 reg_startp = reg_match->startp;
3842 reg_endp = reg_match->endp;
3843 }
3844
3845 /* Be paranoid... */
3846 if (prog == NULL || line == NULL)
3847 {
3848 EMSG(_(e_null));
3849 goto theend;
3850 }
3851
3852 /* Check validity of program. */
3853 if (prog_magic_wrong())
3854 goto theend;
3855
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003856 /* If the start column is past the maximum column: no need to try. */
3857 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3858 goto theend;
3859
Bram Moolenaar071d4272004-06-13 20:20:40 +00003860 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3861 if (prog->regflags & RF_ICASE)
3862 ireg_ic = TRUE;
3863 else if (prog->regflags & RF_NOICASE)
3864 ireg_ic = FALSE;
3865
3866#ifdef FEAT_MBYTE
3867 /* If pattern contains "\Z" overrule value of ireg_icombine */
3868 if (prog->regflags & RF_ICOMBINE)
3869 ireg_icombine = TRUE;
3870#endif
3871
3872 /* If there is a "must appear" string, look for it. */
3873 if (prog->regmust != NULL)
3874 {
3875 int c;
3876
3877#ifdef FEAT_MBYTE
3878 if (has_mbyte)
3879 c = (*mb_ptr2char)(prog->regmust);
3880 else
3881#endif
3882 c = *prog->regmust;
3883 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003884
3885 /*
3886 * This is used very often, esp. for ":global". Use three versions of
3887 * the loop to avoid overhead of conditions.
3888 */
3889 if (!ireg_ic
3890#ifdef FEAT_MBYTE
3891 && !has_mbyte
3892#endif
3893 )
3894 while ((s = vim_strbyte(s, c)) != NULL)
3895 {
3896 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3897 break; /* Found it. */
3898 ++s;
3899 }
3900#ifdef FEAT_MBYTE
3901 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3902 while ((s = vim_strchr(s, c)) != NULL)
3903 {
3904 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3905 break; /* Found it. */
3906 mb_ptr_adv(s);
3907 }
3908#endif
3909 else
3910 while ((s = cstrchr(s, c)) != NULL)
3911 {
3912 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3913 break; /* Found it. */
3914 mb_ptr_adv(s);
3915 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003916 if (s == NULL) /* Not present. */
3917 goto theend;
3918 }
3919
3920 regline = line;
3921 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003922 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003923
3924 /* Simplest case: Anchored match need be tried only once. */
3925 if (prog->reganch)
3926 {
3927 int c;
3928
3929#ifdef FEAT_MBYTE
3930 if (has_mbyte)
3931 c = (*mb_ptr2char)(regline + col);
3932 else
3933#endif
3934 c = regline[col];
3935 if (prog->regstart == NUL
3936 || prog->regstart == c
3937 || (ireg_ic && ((
3938#ifdef FEAT_MBYTE
3939 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3940 || (c < 255 && prog->regstart < 255 &&
3941#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003942 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003943 retval = regtry(prog, col);
3944 else
3945 retval = 0;
3946 }
3947 else
3948 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003949#ifdef FEAT_RELTIME
3950 int tm_count = 0;
3951#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003952 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003953 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003954 {
3955 if (prog->regstart != NUL)
3956 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003957 /* Skip until the char we know it must start with.
3958 * Used often, do some work to avoid call overhead. */
3959 if (!ireg_ic
3960#ifdef FEAT_MBYTE
3961 && !has_mbyte
3962#endif
3963 )
3964 s = vim_strbyte(regline + col, prog->regstart);
3965 else
3966 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003967 if (s == NULL)
3968 {
3969 retval = 0;
3970 break;
3971 }
3972 col = (int)(s - regline);
3973 }
3974
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003975 /* Check for maximum column to try. */
3976 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3977 {
3978 retval = 0;
3979 break;
3980 }
3981
Bram Moolenaar071d4272004-06-13 20:20:40 +00003982 retval = regtry(prog, col);
3983 if (retval > 0)
3984 break;
3985
3986 /* if not currently on the first line, get it again */
3987 if (reglnum != 0)
3988 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003989 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003990 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003991 }
3992 if (regline[col] == NUL)
3993 break;
3994#ifdef FEAT_MBYTE
3995 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003996 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003997 else
3998#endif
3999 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00004000#ifdef FEAT_RELTIME
4001 /* Check for timeout once in a twenty times to avoid overhead. */
4002 if (tm != NULL && ++tm_count == 20)
4003 {
4004 tm_count = 0;
4005 if (profile_passed_limit(tm))
4006 break;
4007 }
4008#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00004009 }
4010 }
4011
Bram Moolenaar071d4272004-06-13 20:20:40 +00004012theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004013 /* Free "reg_tofree" when it's a bit big.
4014 * Free regstack and backpos if they are bigger than their initial size. */
4015 if (reg_tofreelen > 400)
4016 {
4017 vim_free(reg_tofree);
4018 reg_tofree = NULL;
4019 }
4020 if (regstack.ga_maxlen > REGSTACK_INITIAL)
4021 ga_clear(&regstack);
4022 if (backpos.ga_maxlen > BACKPOS_INITIAL)
4023 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004024
Bram Moolenaar071d4272004-06-13 20:20:40 +00004025 return retval;
4026}
4027
4028#ifdef FEAT_SYN_HL
4029static reg_extmatch_T *make_extmatch __ARGS((void));
4030
4031/*
4032 * Create a new extmatch and mark it as referenced once.
4033 */
4034 static reg_extmatch_T *
4035make_extmatch()
4036{
4037 reg_extmatch_T *em;
4038
4039 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4040 if (em != NULL)
4041 em->refcnt = 1;
4042 return em;
4043}
4044
4045/*
4046 * Add a reference to an extmatch.
4047 */
4048 reg_extmatch_T *
4049ref_extmatch(em)
4050 reg_extmatch_T *em;
4051{
4052 if (em != NULL)
4053 em->refcnt++;
4054 return em;
4055}
4056
4057/*
4058 * Remove a reference to an extmatch. If there are no references left, free
4059 * the info.
4060 */
4061 void
4062unref_extmatch(em)
4063 reg_extmatch_T *em;
4064{
4065 int i;
4066
4067 if (em != NULL && --em->refcnt <= 0)
4068 {
4069 for (i = 0; i < NSUBEXP; ++i)
4070 vim_free(em->matches[i]);
4071 vim_free(em);
4072 }
4073}
4074#endif
4075
4076/*
4077 * regtry - try match of "prog" with at regline["col"].
4078 * Returns 0 for failure, number of lines contained in the match otherwise.
4079 */
4080 static long
4081regtry(prog, col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004082 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004083 colnr_T col;
4084{
4085 reginput = regline + col;
4086 need_clear_subexpr = TRUE;
4087#ifdef FEAT_SYN_HL
4088 /* Clear the external match subpointers if necessary. */
4089 if (prog->reghasz == REX_SET)
4090 need_clear_zsubexpr = TRUE;
4091#endif
4092
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004093 if (regmatch(prog->program + 1) == 0)
4094 return 0;
4095
4096 cleanup_subexpr();
4097 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004098 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004099 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004100 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004101 reg_startpos[0].lnum = 0;
4102 reg_startpos[0].col = col;
4103 }
4104 if (reg_endpos[0].lnum < 0)
4105 {
4106 reg_endpos[0].lnum = reglnum;
4107 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004108 }
4109 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004110 /* Use line number of "\ze". */
4111 reglnum = reg_endpos[0].lnum;
4112 }
4113 else
4114 {
4115 if (reg_startp[0] == NULL)
4116 reg_startp[0] = regline + col;
4117 if (reg_endp[0] == NULL)
4118 reg_endp[0] = reginput;
4119 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004120#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004121 /* Package any found \z(...\) matches for export. Default is none. */
4122 unref_extmatch(re_extmatch_out);
4123 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004124
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004125 if (prog->reghasz == REX_SET)
4126 {
4127 int i;
4128
4129 cleanup_zsubexpr();
4130 re_extmatch_out = make_extmatch();
4131 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004132 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004133 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004134 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004135 /* Only accept single line matches. */
4136 if (reg_startzpos[i].lnum >= 0
4137 && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
4138 re_extmatch_out->matches[i] =
4139 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004140 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004141 reg_endzpos[i].col - reg_startzpos[i].col);
4142 }
4143 else
4144 {
4145 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4146 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004147 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004148 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004149 }
4150 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004151 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004152#endif
4153 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004154}
4155
4156#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00004157static int reg_prev_class __ARGS((void));
4158
Bram Moolenaar071d4272004-06-13 20:20:40 +00004159/*
4160 * Get class of previous character.
4161 */
4162 static int
4163reg_prev_class()
4164{
4165 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004166 return mb_get_class_buf(reginput - 1
4167 - (*mb_head_off)(regline, reginput - 1), reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004168 return -1;
4169}
4170
Bram Moolenaar071d4272004-06-13 20:20:40 +00004171#endif
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004172#ifdef FEAT_VISUAL
4173static int reg_match_visual __ARGS((void));
4174
4175/*
4176 * Return TRUE if the current reginput position matches the Visual area.
4177 */
4178 static int
4179reg_match_visual()
4180{
4181 pos_T top, bot;
4182 linenr_T lnum;
4183 colnr_T col;
4184 win_T *wp = reg_win == NULL ? curwin : reg_win;
4185 int mode;
4186 colnr_T start, end;
4187 colnr_T start2, end2;
4188 colnr_T cols;
4189
4190 /* Check if the buffer is the current buffer. */
4191 if (reg_buf != curbuf || VIsual.lnum == 0)
4192 return FALSE;
4193
4194 if (VIsual_active)
4195 {
4196 if (lt(VIsual, wp->w_cursor))
4197 {
4198 top = VIsual;
4199 bot = wp->w_cursor;
4200 }
4201 else
4202 {
4203 top = wp->w_cursor;
4204 bot = VIsual;
4205 }
4206 mode = VIsual_mode;
4207 }
4208 else
4209 {
4210 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
4211 {
4212 top = curbuf->b_visual.vi_start;
4213 bot = curbuf->b_visual.vi_end;
4214 }
4215 else
4216 {
4217 top = curbuf->b_visual.vi_end;
4218 bot = curbuf->b_visual.vi_start;
4219 }
4220 mode = curbuf->b_visual.vi_mode;
4221 }
4222 lnum = reglnum + reg_firstlnum;
4223 if (lnum < top.lnum || lnum > bot.lnum)
4224 return FALSE;
4225
4226 if (mode == 'v')
4227 {
4228 col = (colnr_T)(reginput - regline);
4229 if ((lnum == top.lnum && col < top.col)
4230 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4231 return FALSE;
4232 }
4233 else if (mode == Ctrl_V)
4234 {
4235 getvvcol(wp, &top, &start, NULL, &end);
4236 getvvcol(wp, &bot, &start2, NULL, &end2);
4237 if (start2 < start)
4238 start = start2;
4239 if (end2 > end)
4240 end = end2;
4241 if (top.col == MAXCOL || bot.col == MAXCOL)
4242 end = MAXCOL;
4243 cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline));
4244 if (cols < start || cols > end - (*p_sel == 'e'))
4245 return FALSE;
4246 }
4247 return TRUE;
4248}
4249#endif
4250
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00004251#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004252
4253/*
4254 * The arguments from BRACE_LIMITS are stored here. They are actually local
4255 * to regmatch(), but they are here to reduce the amount of stack space used
4256 * (it can be called recursively many times).
4257 */
4258static long bl_minval;
4259static long bl_maxval;
4260
4261/*
4262 * regmatch - main matching routine
4263 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004264 * Conceptually the strategy is simple: Check to see whether the current node
4265 * matches, push an item onto the regstack and loop to see whether the rest
4266 * matches, and then act accordingly. In practice we make some effort to
4267 * avoid using the regstack, in particular by going through "ordinary" nodes
4268 * (that don't need to know whether the rest of the match failed) by a nested
4269 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004270 *
4271 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4272 * the last matched character.
4273 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4274 * undefined state!
4275 */
4276 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004277regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004278 char_u *scan; /* Current node. */
4279{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004280 char_u *next; /* Next node. */
4281 int op;
4282 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004283 regitem_T *rp;
4284 int no;
4285 int status; /* one of the RA_ values: */
4286#define RA_FAIL 1 /* something failed, abort */
4287#define RA_CONT 2 /* continue in inner loop */
4288#define RA_BREAK 3 /* break inner loop */
4289#define RA_MATCH 4 /* successful match */
4290#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004291
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004292 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004293 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004294 regstack.ga_len = 0;
4295 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004296
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004297 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004298 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004299 */
4300 for (;;)
4301 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004302 /* Some patterns may cause a long time to match, even though they are not
Bram Moolenaar071d4272004-06-13 20:20:40 +00004303 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
4304 fast_breakcheck();
4305
4306#ifdef DEBUG
4307 if (scan != NULL && regnarrate)
4308 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004309 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004310 mch_errmsg("(\n");
4311 }
4312#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004313
4314 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004315 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004316 * regstack.
4317 */
4318 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004319 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004320 if (got_int || scan == NULL)
4321 {
4322 status = RA_FAIL;
4323 break;
4324 }
4325 status = RA_CONT;
4326
Bram Moolenaar071d4272004-06-13 20:20:40 +00004327#ifdef DEBUG
4328 if (regnarrate)
4329 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004330 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004331 mch_errmsg("...\n");
4332# ifdef FEAT_SYN_HL
4333 if (re_extmatch_in != NULL)
4334 {
4335 int i;
4336
4337 mch_errmsg(_("External submatches:\n"));
4338 for (i = 0; i < NSUBEXP; i++)
4339 {
4340 mch_errmsg(" \"");
4341 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004342 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004343 mch_errmsg("\"\n");
4344 }
4345 }
4346# endif
4347 }
4348#endif
4349 next = regnext(scan);
4350
4351 op = OP(scan);
4352 /* Check for character class with NL added. */
Bram Moolenaar640009d2006-10-17 16:48:26 +00004353 if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
4354 && *reginput == NUL && reglnum <= reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004355 {
4356 reg_nextline();
4357 }
4358 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
4359 {
4360 ADVANCE_REGINPUT();
4361 }
4362 else
4363 {
4364 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004365 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004366#ifdef FEAT_MBYTE
4367 if (has_mbyte)
4368 c = (*mb_ptr2char)(reginput);
4369 else
4370#endif
4371 c = *reginput;
4372 switch (op)
4373 {
4374 case BOL:
4375 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004376 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004377 break;
4378
4379 case EOL:
4380 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004381 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004382 break;
4383
4384 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004385 /* We're not at the beginning of the file when below the first
4386 * line where we started, not at the start of the line or we
4387 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004388 if (reglnum != 0 || reginput != regline
Bram Moolenaara7139332007-12-09 18:26:22 +00004389 || (REG_MULTI && reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004390 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004391 break;
4392
4393 case RE_EOF:
4394 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004395 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004396 break;
4397
4398 case CURSOR:
4399 /* Check if the buffer is in a window and compare the
4400 * reg_win->w_cursor position to the match position. */
4401 if (reg_win == NULL
4402 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
4403 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004404 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004405 break;
4406
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004407 case RE_MARK:
Bram Moolenaar044aa292013-06-04 21:27:38 +02004408 /* Compare the mark position to the match position. */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004409 {
4410 int mark = OPERAND(scan)[0];
4411 int cmp = OPERAND(scan)[1];
4412 pos_T *pos;
4413
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004414 pos = getmark_buf(reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004415 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar044aa292013-06-04 21:27:38 +02004416 || pos->lnum <= 0 /* mark isn't set in reg_buf */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004417 || (pos->lnum == reglnum + reg_firstlnum
4418 ? (pos->col == (colnr_T)(reginput - regline)
4419 ? (cmp == '<' || cmp == '>')
4420 : (pos->col < (colnr_T)(reginput - regline)
4421 ? cmp != '>'
4422 : cmp != '<'))
4423 : (pos->lnum < reglnum + reg_firstlnum
4424 ? cmp != '>'
4425 : cmp != '<')))
4426 status = RA_NOMATCH;
4427 }
4428 break;
4429
4430 case RE_VISUAL:
4431#ifdef FEAT_VISUAL
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004432 if (!reg_match_visual())
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004433#endif
Bram Moolenaardacd7de2013-06-04 18:28:48 +02004434 status = RA_NOMATCH;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004435 break;
4436
Bram Moolenaar071d4272004-06-13 20:20:40 +00004437 case RE_LNUM:
4438 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4439 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004440 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004441 break;
4442
4443 case RE_COL:
4444 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004445 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004446 break;
4447
4448 case RE_VCOL:
4449 if (!re_num_cmp((long_u)win_linetabsize(
4450 reg_win == NULL ? curwin : reg_win,
4451 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004452 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004453 break;
4454
4455 case BOW: /* \<word; reginput points to w */
4456 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004457 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004458#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004459 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004460 {
4461 int this_class;
4462
4463 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004464 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004465 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004466 status = RA_NOMATCH; /* not on a word at all */
4467 else if (reg_prev_class() == this_class)
4468 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004469 }
4470#endif
4471 else
4472 {
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01004473 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline
4474 && vim_iswordc_buf(reginput[-1], reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004475 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004476 }
4477 break;
4478
4479 case EOW: /* word\>; reginput points after d */
4480 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004481 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004482#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004483 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004484 {
4485 int this_class, prev_class;
4486
4487 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004488 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004489 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004490 if (this_class == prev_class
4491 || prev_class == 0 || prev_class == 1)
4492 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004493 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004494#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004495 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004496 {
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004497 if (!vim_iswordc_buf(reginput[-1], reg_buf)
4498 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004499 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004500 }
4501 break; /* Matched with EOW */
4502
4503 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004504 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004505 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004506 status = RA_NOMATCH;
4507 else
4508 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004509 break;
4510
4511 case IDENT:
4512 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004513 status = RA_NOMATCH;
4514 else
4515 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004516 break;
4517
4518 case SIDENT:
4519 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004520 status = RA_NOMATCH;
4521 else
4522 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004523 break;
4524
4525 case KWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004526 if (!vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004527 status = RA_NOMATCH;
4528 else
4529 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004530 break;
4531
4532 case SKWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004533 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004534 status = RA_NOMATCH;
4535 else
4536 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004537 break;
4538
4539 case FNAME:
4540 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004541 status = RA_NOMATCH;
4542 else
4543 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004544 break;
4545
4546 case SFNAME:
4547 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004548 status = RA_NOMATCH;
4549 else
4550 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004551 break;
4552
4553 case PRINT:
4554 if (ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004555 status = RA_NOMATCH;
4556 else
4557 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004558 break;
4559
4560 case SPRINT:
4561 if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004562 status = RA_NOMATCH;
4563 else
4564 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004565 break;
4566
4567 case WHITE:
4568 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004569 status = RA_NOMATCH;
4570 else
4571 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004572 break;
4573
4574 case NWHITE:
4575 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004576 status = RA_NOMATCH;
4577 else
4578 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004579 break;
4580
4581 case DIGIT:
4582 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004583 status = RA_NOMATCH;
4584 else
4585 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004586 break;
4587
4588 case NDIGIT:
4589 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004590 status = RA_NOMATCH;
4591 else
4592 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004593 break;
4594
4595 case HEX:
4596 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004597 status = RA_NOMATCH;
4598 else
4599 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004600 break;
4601
4602 case NHEX:
4603 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004604 status = RA_NOMATCH;
4605 else
4606 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004607 break;
4608
4609 case OCTAL:
4610 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004611 status = RA_NOMATCH;
4612 else
4613 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004614 break;
4615
4616 case NOCTAL:
4617 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004618 status = RA_NOMATCH;
4619 else
4620 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004621 break;
4622
4623 case WORD:
4624 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004625 status = RA_NOMATCH;
4626 else
4627 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004628 break;
4629
4630 case NWORD:
4631 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004632 status = RA_NOMATCH;
4633 else
4634 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004635 break;
4636
4637 case HEAD:
4638 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004639 status = RA_NOMATCH;
4640 else
4641 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004642 break;
4643
4644 case NHEAD:
4645 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004646 status = RA_NOMATCH;
4647 else
4648 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004649 break;
4650
4651 case ALPHA:
4652 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004653 status = RA_NOMATCH;
4654 else
4655 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004656 break;
4657
4658 case NALPHA:
4659 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004660 status = RA_NOMATCH;
4661 else
4662 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004663 break;
4664
4665 case LOWER:
4666 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004667 status = RA_NOMATCH;
4668 else
4669 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004670 break;
4671
4672 case NLOWER:
4673 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004674 status = RA_NOMATCH;
4675 else
4676 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004677 break;
4678
4679 case UPPER:
4680 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004681 status = RA_NOMATCH;
4682 else
4683 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004684 break;
4685
4686 case NUPPER:
4687 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004688 status = RA_NOMATCH;
4689 else
4690 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004691 break;
4692
4693 case EXACTLY:
4694 {
4695 int len;
4696 char_u *opnd;
4697
4698 opnd = OPERAND(scan);
4699 /* Inline the first byte, for speed. */
4700 if (*opnd != *reginput
4701 && (!ireg_ic || (
4702#ifdef FEAT_MBYTE
4703 !enc_utf8 &&
4704#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004705 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004706 status = RA_NOMATCH;
4707 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004708 {
4709 /* match empty string always works; happens when "~" is
4710 * empty. */
4711 }
4712 else if (opnd[1] == NUL
4713#ifdef FEAT_MBYTE
4714 && !(enc_utf8 && ireg_ic)
4715#endif
4716 )
4717 ++reginput; /* matched a single char */
4718 else
4719 {
4720 len = (int)STRLEN(opnd);
4721 /* Need to match first byte again for multi-byte. */
4722 if (cstrncmp(opnd, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004723 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004724#ifdef FEAT_MBYTE
4725 /* Check for following composing character. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004726 else if (enc_utf8
4727 && UTF_COMPOSINGLIKE(reginput, reginput + len))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004728 {
4729 /* raaron: This code makes a composing character get
4730 * ignored, which is the correct behavior (sometimes)
4731 * for voweled Hebrew texts. */
4732 if (!ireg_icombine)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004733 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004734 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004735#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004736 else
4737 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004738 }
4739 }
4740 break;
4741
4742 case ANYOF:
4743 case ANYBUT:
4744 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004745 status = RA_NOMATCH;
4746 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4747 status = RA_NOMATCH;
4748 else
4749 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004750 break;
4751
4752#ifdef FEAT_MBYTE
4753 case MULTIBYTECODE:
4754 if (has_mbyte)
4755 {
4756 int i, len;
4757 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004758 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004759
4760 opnd = OPERAND(scan);
4761 /* Safety check (just in case 'encoding' was changed since
4762 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004763 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004764 {
4765 status = RA_NOMATCH;
4766 break;
4767 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004768 if (enc_utf8)
4769 opndc = mb_ptr2char(opnd);
4770 if (enc_utf8 && utf_iscomposing(opndc))
4771 {
4772 /* When only a composing char is given match at any
4773 * position where that composing char appears. */
4774 status = RA_NOMATCH;
4775 for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004776 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004777 inpc = mb_ptr2char(reginput + i);
4778 if (!utf_iscomposing(inpc))
4779 {
4780 if (i > 0)
4781 break;
4782 }
4783 else if (opndc == inpc)
4784 {
4785 /* Include all following composing chars. */
4786 len = i + mb_ptr2len(reginput + i);
4787 status = RA_MATCH;
4788 break;
4789 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004790 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004791 }
4792 else
4793 for (i = 0; i < len; ++i)
4794 if (opnd[i] != reginput[i])
4795 {
4796 status = RA_NOMATCH;
4797 break;
4798 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004799 reginput += len;
4800 }
4801 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004802 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004803 break;
4804#endif
4805
4806 case NOTHING:
4807 break;
4808
4809 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004810 {
4811 int i;
4812 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004813
Bram Moolenaar582fd852005-03-28 20:58:01 +00004814 /*
4815 * When we run into BACK we need to check if we don't keep
4816 * looping without matching any input. The second and later
4817 * times a BACK is encountered it fails if the input is still
4818 * at the same position as the previous time.
4819 * The positions are stored in "backpos" and found by the
4820 * current value of "scan", the position in the RE program.
4821 */
4822 bp = (backpos_T *)backpos.ga_data;
4823 for (i = 0; i < backpos.ga_len; ++i)
4824 if (bp[i].bp_scan == scan)
4825 break;
4826 if (i == backpos.ga_len)
4827 {
4828 /* First time at this BACK, make room to store the pos. */
4829 if (ga_grow(&backpos, 1) == FAIL)
4830 status = RA_FAIL;
4831 else
4832 {
4833 /* get "ga_data" again, it may have changed */
4834 bp = (backpos_T *)backpos.ga_data;
4835 bp[i].bp_scan = scan;
4836 ++backpos.ga_len;
4837 }
4838 }
4839 else if (reg_save_equal(&bp[i].bp_pos))
4840 /* Still at same position as last time, fail. */
4841 status = RA_NOMATCH;
4842
4843 if (status != RA_FAIL && status != RA_NOMATCH)
4844 reg_save(&bp[i].bp_pos, &backpos);
4845 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004846 break;
4847
Bram Moolenaar071d4272004-06-13 20:20:40 +00004848 case MOPEN + 0: /* Match start: \zs */
4849 case MOPEN + 1: /* \( */
4850 case MOPEN + 2:
4851 case MOPEN + 3:
4852 case MOPEN + 4:
4853 case MOPEN + 5:
4854 case MOPEN + 6:
4855 case MOPEN + 7:
4856 case MOPEN + 8:
4857 case MOPEN + 9:
4858 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004859 no = op - MOPEN;
4860 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004861 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004862 if (rp == NULL)
4863 status = RA_FAIL;
4864 else
4865 {
4866 rp->rs_no = no;
4867 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4868 &reg_startp[no]);
4869 /* We simply continue and handle the result when done. */
4870 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004871 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004872 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004873
4874 case NOPEN: /* \%( */
4875 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004876 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004877 status = RA_FAIL;
4878 /* We simply continue and handle the result when done. */
4879 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004880
4881#ifdef FEAT_SYN_HL
4882 case ZOPEN + 1:
4883 case ZOPEN + 2:
4884 case ZOPEN + 3:
4885 case ZOPEN + 4:
4886 case ZOPEN + 5:
4887 case ZOPEN + 6:
4888 case ZOPEN + 7:
4889 case ZOPEN + 8:
4890 case ZOPEN + 9:
4891 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004892 no = op - ZOPEN;
4893 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004894 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004895 if (rp == NULL)
4896 status = RA_FAIL;
4897 else
4898 {
4899 rp->rs_no = no;
4900 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4901 &reg_startzp[no]);
4902 /* We simply continue and handle the result when done. */
4903 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004904 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004905 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004906#endif
4907
4908 case MCLOSE + 0: /* Match end: \ze */
4909 case MCLOSE + 1: /* \) */
4910 case MCLOSE + 2:
4911 case MCLOSE + 3:
4912 case MCLOSE + 4:
4913 case MCLOSE + 5:
4914 case MCLOSE + 6:
4915 case MCLOSE + 7:
4916 case MCLOSE + 8:
4917 case MCLOSE + 9:
4918 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004919 no = op - MCLOSE;
4920 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004921 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004922 if (rp == NULL)
4923 status = RA_FAIL;
4924 else
4925 {
4926 rp->rs_no = no;
4927 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4928 /* We simply continue and handle the result when done. */
4929 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004930 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004931 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004932
4933#ifdef FEAT_SYN_HL
4934 case ZCLOSE + 1: /* \) after \z( */
4935 case ZCLOSE + 2:
4936 case ZCLOSE + 3:
4937 case ZCLOSE + 4:
4938 case ZCLOSE + 5:
4939 case ZCLOSE + 6:
4940 case ZCLOSE + 7:
4941 case ZCLOSE + 8:
4942 case ZCLOSE + 9:
4943 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004944 no = op - ZCLOSE;
4945 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004946 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004947 if (rp == NULL)
4948 status = RA_FAIL;
4949 else
4950 {
4951 rp->rs_no = no;
4952 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4953 &reg_endzp[no]);
4954 /* We simply continue and handle the result when done. */
4955 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004956 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004957 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004958#endif
4959
4960 case BACKREF + 1:
4961 case BACKREF + 2:
4962 case BACKREF + 3:
4963 case BACKREF + 4:
4964 case BACKREF + 5:
4965 case BACKREF + 6:
4966 case BACKREF + 7:
4967 case BACKREF + 8:
4968 case BACKREF + 9:
4969 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004970 int len;
4971 linenr_T clnum;
4972 colnr_T ccol;
4973 char_u *p;
4974
4975 no = op - BACKREF;
4976 cleanup_subexpr();
4977 if (!REG_MULTI) /* Single-line regexp */
4978 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004979 if (reg_startp[no] == NULL || reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004980 {
4981 /* Backref was not set: Match an empty string. */
4982 len = 0;
4983 }
4984 else
4985 {
4986 /* Compare current input with back-ref in the same
4987 * line. */
4988 len = (int)(reg_endp[no] - reg_startp[no]);
4989 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004990 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004991 }
4992 }
4993 else /* Multi-line regexp */
4994 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004995 if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004996 {
4997 /* Backref was not set: Match an empty string. */
4998 len = 0;
4999 }
5000 else
5001 {
5002 if (reg_startpos[no].lnum == reglnum
5003 && reg_endpos[no].lnum == reglnum)
5004 {
5005 /* Compare back-ref within the current line. */
5006 len = reg_endpos[no].col - reg_startpos[no].col;
5007 if (cstrncmp(regline + reg_startpos[no].col,
5008 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005009 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005010 }
5011 else
5012 {
5013 /* Messy situation: Need to compare between two
5014 * lines. */
5015 ccol = reg_startpos[no].col;
5016 clnum = reg_startpos[no].lnum;
5017 for (;;)
5018 {
5019 /* Since getting one line may invalidate
5020 * the other, need to make copy. Slow! */
5021 if (regline != reg_tofree)
5022 {
5023 len = (int)STRLEN(regline);
5024 if (reg_tofree == NULL
5025 || len >= (int)reg_tofreelen)
5026 {
5027 len += 50; /* get some extra */
5028 vim_free(reg_tofree);
5029 reg_tofree = alloc(len);
5030 if (reg_tofree == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005031 {
5032 status = RA_FAIL; /* outof memory!*/
5033 break;
5034 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005035 reg_tofreelen = len;
5036 }
5037 STRCPY(reg_tofree, regline);
5038 reginput = reg_tofree
5039 + (reginput - regline);
5040 regline = reg_tofree;
5041 }
5042
5043 /* Get the line to compare with. */
5044 p = reg_getline(clnum);
5045 if (clnum == reg_endpos[no].lnum)
5046 len = reg_endpos[no].col - ccol;
5047 else
5048 len = (int)STRLEN(p + ccol);
5049
5050 if (cstrncmp(p + ccol, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005051 {
5052 status = RA_NOMATCH; /* doesn't match */
5053 break;
5054 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005055 if (clnum == reg_endpos[no].lnum)
5056 break; /* match and at end! */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005057 if (reglnum >= reg_maxline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005058 {
5059 status = RA_NOMATCH; /* text too short */
5060 break;
5061 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005062
5063 /* Advance to next line. */
5064 reg_nextline();
5065 ++clnum;
5066 ccol = 0;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005067 if (got_int)
5068 {
5069 status = RA_FAIL;
5070 break;
5071 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005072 }
5073
5074 /* found a match! Note that regline may now point
5075 * to a copy of the line, that should not matter. */
5076 }
5077 }
5078 }
5079
5080 /* Matched the backref, skip over it. */
5081 reginput += len;
5082 }
5083 break;
5084
5085#ifdef FEAT_SYN_HL
5086 case ZREF + 1:
5087 case ZREF + 2:
5088 case ZREF + 3:
5089 case ZREF + 4:
5090 case ZREF + 5:
5091 case ZREF + 6:
5092 case ZREF + 7:
5093 case ZREF + 8:
5094 case ZREF + 9:
5095 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005096 int len;
5097
5098 cleanup_zsubexpr();
5099 no = op - ZREF;
5100 if (re_extmatch_in != NULL
5101 && re_extmatch_in->matches[no] != NULL)
5102 {
5103 len = (int)STRLEN(re_extmatch_in->matches[no]);
5104 if (cstrncmp(re_extmatch_in->matches[no],
5105 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005106 status = RA_NOMATCH;
5107 else
5108 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005109 }
5110 else
5111 {
5112 /* Backref was not set: Match an empty string. */
5113 }
5114 }
5115 break;
5116#endif
5117
5118 case BRANCH:
5119 {
5120 if (OP(next) != BRANCH) /* No choice. */
5121 next = OPERAND(scan); /* Avoid recursion. */
5122 else
5123 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005124 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005125 if (rp == NULL)
5126 status = RA_FAIL;
5127 else
5128 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005129 }
5130 }
5131 break;
5132
5133 case BRACE_LIMITS:
5134 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005135 if (OP(next) == BRACE_SIMPLE)
5136 {
5137 bl_minval = OPERAND_MIN(scan);
5138 bl_maxval = OPERAND_MAX(scan);
5139 }
5140 else if (OP(next) >= BRACE_COMPLEX
5141 && OP(next) < BRACE_COMPLEX + 10)
5142 {
5143 no = OP(next) - BRACE_COMPLEX;
5144 brace_min[no] = OPERAND_MIN(scan);
5145 brace_max[no] = OPERAND_MAX(scan);
5146 brace_count[no] = 0;
5147 }
5148 else
5149 {
5150 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005151 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005152 }
5153 }
5154 break;
5155
5156 case BRACE_COMPLEX + 0:
5157 case BRACE_COMPLEX + 1:
5158 case BRACE_COMPLEX + 2:
5159 case BRACE_COMPLEX + 3:
5160 case BRACE_COMPLEX + 4:
5161 case BRACE_COMPLEX + 5:
5162 case BRACE_COMPLEX + 6:
5163 case BRACE_COMPLEX + 7:
5164 case BRACE_COMPLEX + 8:
5165 case BRACE_COMPLEX + 9:
5166 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005167 no = op - BRACE_COMPLEX;
5168 ++brace_count[no];
5169
5170 /* If not matched enough times yet, try one more */
5171 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005172 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005173 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005174 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005175 if (rp == NULL)
5176 status = RA_FAIL;
5177 else
5178 {
5179 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005180 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005181 next = OPERAND(scan);
5182 /* We continue and handle the result when done. */
5183 }
5184 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005185 }
5186
5187 /* If matched enough times, may try matching some more */
5188 if (brace_min[no] <= brace_max[no])
5189 {
5190 /* Range is the normal way around, use longest match */
5191 if (brace_count[no] <= brace_max[no])
5192 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005193 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005194 if (rp == NULL)
5195 status = RA_FAIL;
5196 else
5197 {
5198 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005199 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005200 next = OPERAND(scan);
5201 /* We continue and handle the result when done. */
5202 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005203 }
5204 }
5205 else
5206 {
5207 /* Range is backwards, use shortest match first */
5208 if (brace_count[no] <= brace_min[no])
5209 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005210 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005211 if (rp == NULL)
5212 status = RA_FAIL;
5213 else
5214 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005215 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005216 /* We continue and handle the result when done. */
5217 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005218 }
5219 }
5220 }
5221 break;
5222
5223 case BRACE_SIMPLE:
5224 case STAR:
5225 case PLUS:
5226 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005227 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005228
5229 /*
5230 * Lookahead to avoid useless match attempts when we know
5231 * what character comes next.
5232 */
5233 if (OP(next) == EXACTLY)
5234 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005235 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005236 if (ireg_ic)
5237 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005238 if (MB_ISUPPER(rst.nextb))
5239 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005240 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005241 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005242 }
5243 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005244 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005245 }
5246 else
5247 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005248 rst.nextb = NUL;
5249 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005250 }
5251 if (op != BRACE_SIMPLE)
5252 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005253 rst.minval = (op == STAR) ? 0 : 1;
5254 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005255 }
5256 else
5257 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005258 rst.minval = bl_minval;
5259 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005260 }
5261
5262 /*
5263 * When maxval > minval, try matching as much as possible, up
5264 * to maxval. When maxval < minval, try matching at least the
5265 * minimal number (since the range is backwards, that's also
5266 * maxval!).
5267 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005268 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005269 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005270 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005271 status = RA_FAIL;
5272 break;
5273 }
5274 if (rst.minval <= rst.maxval
5275 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5276 {
5277 /* It could match. Prepare for trying to match what
5278 * follows. The code is below. Parameters are stored in
5279 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005280 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005281 {
5282 EMSG(_(e_maxmempat));
5283 status = RA_FAIL;
5284 }
5285 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005286 status = RA_FAIL;
5287 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005288 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005289 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005290 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005291 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005292 if (rp == NULL)
5293 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005294 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005295 {
5296 *(((regstar_T *)rp) - 1) = rst;
5297 status = RA_BREAK; /* skip the restore bits */
5298 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005299 }
5300 }
5301 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005302 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005303
Bram Moolenaar071d4272004-06-13 20:20:40 +00005304 }
5305 break;
5306
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005307 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005308 case MATCH:
5309 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005310 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005311 if (rp == NULL)
5312 status = RA_FAIL;
5313 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005314 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005315 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005316 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005317 next = OPERAND(scan);
5318 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005319 }
5320 break;
5321
5322 case BEHIND:
5323 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005324 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005325 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005326 {
5327 EMSG(_(e_maxmempat));
5328 status = RA_FAIL;
5329 }
5330 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005331 status = RA_FAIL;
5332 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005333 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005334 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005335 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005336 if (rp == NULL)
5337 status = RA_FAIL;
5338 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005339 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005340 /* Need to save the subexpr to be able to restore them
5341 * when there is a match but we don't use it. */
5342 save_subexpr(((regbehind_T *)rp) - 1);
5343
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005344 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005345 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005346 /* First try if what follows matches. If it does then we
5347 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005348 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005349 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005350 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005351
5352 case BHPOS:
5353 if (REG_MULTI)
5354 {
5355 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5356 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005357 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005358 }
5359 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005360 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005361 break;
5362
5363 case NEWL:
Bram Moolenaar640009d2006-10-17 16:48:26 +00005364 if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
5365 || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005366 status = RA_NOMATCH;
5367 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005368 ADVANCE_REGINPUT();
5369 else
5370 reg_nextline();
5371 break;
5372
5373 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005374 status = RA_MATCH; /* Success! */
5375 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005376
5377 default:
5378 EMSG(_(e_re_corr));
5379#ifdef DEBUG
5380 printf("Illegal op code %d\n", op);
5381#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005382 status = RA_FAIL;
5383 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005384 }
5385 }
5386
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005387 /* If we can't continue sequentially, break the inner loop. */
5388 if (status != RA_CONT)
5389 break;
5390
5391 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005392 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005393
5394 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005395
5396 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005397 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005398 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005399 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005400 while (regstack.ga_len > 0 && status != RA_FAIL)
5401 {
5402 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5403 switch (rp->rs_state)
5404 {
5405 case RS_NOPEN:
5406 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005407 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005408 break;
5409
5410 case RS_MOPEN:
5411 /* Pop the state. Restore pointers when there is no match. */
5412 if (status == RA_NOMATCH)
5413 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5414 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005415 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005416 break;
5417
5418#ifdef FEAT_SYN_HL
5419 case RS_ZOPEN:
5420 /* Pop the state. Restore pointers when there is no match. */
5421 if (status == RA_NOMATCH)
5422 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5423 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005424 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005425 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005426#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005427
5428 case RS_MCLOSE:
5429 /* Pop the state. Restore pointers when there is no match. */
5430 if (status == RA_NOMATCH)
5431 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5432 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005433 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005434 break;
5435
5436#ifdef FEAT_SYN_HL
5437 case RS_ZCLOSE:
5438 /* Pop the state. Restore pointers when there is no match. */
5439 if (status == RA_NOMATCH)
5440 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5441 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005442 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005443 break;
5444#endif
5445
5446 case RS_BRANCH:
5447 if (status == RA_MATCH)
5448 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005449 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005450 else
5451 {
5452 if (status != RA_BREAK)
5453 {
5454 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005455 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005456 scan = rp->rs_scan;
5457 }
5458 if (scan == NULL || OP(scan) != BRANCH)
5459 {
5460 /* no more branches, didn't find a match */
5461 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005462 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005463 }
5464 else
5465 {
5466 /* Prepare to try a branch. */
5467 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005468 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005469 scan = OPERAND(scan);
5470 }
5471 }
5472 break;
5473
5474 case RS_BRCPLX_MORE:
5475 /* Pop the state. Restore pointers when there is no match. */
5476 if (status == RA_NOMATCH)
5477 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005478 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005479 --brace_count[rp->rs_no]; /* decrement match count */
5480 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005481 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005482 break;
5483
5484 case RS_BRCPLX_LONG:
5485 /* Pop the state. Restore pointers when there is no match. */
5486 if (status == RA_NOMATCH)
5487 {
5488 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005489 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005490 --brace_count[rp->rs_no];
5491 /* continue with the items after "\{}" */
5492 status = RA_CONT;
5493 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005494 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005495 if (status == RA_CONT)
5496 scan = regnext(scan);
5497 break;
5498
5499 case RS_BRCPLX_SHORT:
5500 /* Pop the state. Restore pointers when there is no match. */
5501 if (status == RA_NOMATCH)
5502 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005503 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005504 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005505 if (status == RA_NOMATCH)
5506 {
5507 scan = OPERAND(scan);
5508 status = RA_CONT;
5509 }
5510 break;
5511
5512 case RS_NOMATCH:
5513 /* Pop the state. If the operand matches for NOMATCH or
5514 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5515 * except for SUBPAT, and continue with the next item. */
5516 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5517 status = RA_NOMATCH;
5518 else
5519 {
5520 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005521 if (rp->rs_no != SUBPAT) /* zero-width */
5522 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005523 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005524 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005525 if (status == RA_CONT)
5526 scan = regnext(scan);
5527 break;
5528
5529 case RS_BEHIND1:
5530 if (status == RA_NOMATCH)
5531 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005532 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005533 regstack.ga_len -= sizeof(regbehind_T);
5534 }
5535 else
5536 {
5537 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5538 * the behind part does (not) match before the current
5539 * position in the input. This must be done at every
5540 * position in the input and checking if the match ends at
5541 * the current position. */
5542
5543 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005544 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005545
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005546 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005547 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005548 * result, hitting the start of the line or the previous
5549 * line (for multi-line matching).
5550 * Set behind_pos to where the match should end, BHPOS
5551 * will match it. Save the current value. */
5552 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5553 behind_pos = rp->rs_un.regsave;
5554
5555 rp->rs_state = RS_BEHIND2;
5556
Bram Moolenaar582fd852005-03-28 20:58:01 +00005557 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005558 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005559 }
5560 break;
5561
5562 case RS_BEHIND2:
5563 /*
5564 * Looping for BEHIND / NOBEHIND match.
5565 */
5566 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5567 {
5568 /* found a match that ends where "next" started */
5569 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5570 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005571 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5572 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005573 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005574 {
5575 /* But we didn't want a match. Need to restore the
5576 * subexpr, because what follows matched, so they have
5577 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005578 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005579 restore_subexpr(((regbehind_T *)rp) - 1);
5580 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005581 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005582 regstack.ga_len -= sizeof(regbehind_T);
5583 }
5584 else
5585 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005586 long limit;
5587
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005588 /* No match or a match that doesn't end where we want it: Go
5589 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005590 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005591 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005592 if (REG_MULTI)
5593 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005594 if (limit > 0
5595 && ((rp->rs_un.regsave.rs_u.pos.lnum
5596 < behind_pos.rs_u.pos.lnum
5597 ? (colnr_T)STRLEN(regline)
5598 : behind_pos.rs_u.pos.col)
5599 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5600 no = FAIL;
5601 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005602 {
5603 if (rp->rs_un.regsave.rs_u.pos.lnum
5604 < behind_pos.rs_u.pos.lnum
5605 || reg_getline(
5606 --rp->rs_un.regsave.rs_u.pos.lnum)
5607 == NULL)
5608 no = FAIL;
5609 else
5610 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005611 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005612 rp->rs_un.regsave.rs_u.pos.col =
5613 (colnr_T)STRLEN(regline);
5614 }
5615 }
5616 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005617 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005618#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005619 if (has_mbyte)
5620 rp->rs_un.regsave.rs_u.pos.col -=
5621 (*mb_head_off)(regline, regline
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005622 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005623 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005624#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005625 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005626 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005627 }
5628 else
5629 {
5630 if (rp->rs_un.regsave.rs_u.ptr == regline)
5631 no = FAIL;
5632 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005633 {
5634 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
5635 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5636 - rp->rs_un.regsave.rs_u.ptr) > limit)
5637 no = FAIL;
5638 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005639 }
5640 if (no == OK)
5641 {
5642 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005643 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005644 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005645 if (status == RA_MATCH)
5646 {
5647 /* We did match, so subexpr may have been changed,
5648 * need to restore them for the next try. */
5649 status = RA_NOMATCH;
5650 restore_subexpr(((regbehind_T *)rp) - 1);
5651 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005652 }
5653 else
5654 {
5655 /* Can't advance. For NOBEHIND that's a match. */
5656 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5657 if (rp->rs_no == NOBEHIND)
5658 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005659 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5660 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005661 status = RA_MATCH;
5662 }
5663 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005664 {
5665 /* We do want a proper match. Need to restore the
5666 * subexpr if we had a match, because they may have
5667 * been set. */
5668 if (status == RA_MATCH)
5669 {
5670 status = RA_NOMATCH;
5671 restore_subexpr(((regbehind_T *)rp) - 1);
5672 }
5673 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005674 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005675 regstack.ga_len -= sizeof(regbehind_T);
5676 }
5677 }
5678 break;
5679
5680 case RS_STAR_LONG:
5681 case RS_STAR_SHORT:
5682 {
5683 regstar_T *rst = ((regstar_T *)rp) - 1;
5684
5685 if (status == RA_MATCH)
5686 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005687 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005688 regstack.ga_len -= sizeof(regstar_T);
5689 break;
5690 }
5691
5692 /* Tried once already, restore input pointers. */
5693 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005694 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005695
5696 /* Repeat until we found a position where it could match. */
5697 for (;;)
5698 {
5699 if (status != RA_BREAK)
5700 {
5701 /* Tried first position already, advance. */
5702 if (rp->rs_state == RS_STAR_LONG)
5703 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005704 /* Trying for longest match, but couldn't or
5705 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005706 if (--rst->count < rst->minval)
5707 break;
5708 if (reginput == regline)
5709 {
5710 /* backup to last char of previous line */
5711 --reglnum;
5712 regline = reg_getline(reglnum);
5713 /* Just in case regrepeat() didn't count
5714 * right. */
5715 if (regline == NULL)
5716 break;
5717 reginput = regline + STRLEN(regline);
5718 fast_breakcheck();
5719 }
5720 else
5721 mb_ptr_back(regline, reginput);
5722 }
5723 else
5724 {
5725 /* Range is backwards, use shortest match first.
5726 * Careful: maxval and minval are exchanged!
5727 * Couldn't or didn't match: try advancing one
5728 * char. */
5729 if (rst->count == rst->minval
5730 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5731 break;
5732 ++rst->count;
5733 }
5734 if (got_int)
5735 break;
5736 }
5737 else
5738 status = RA_NOMATCH;
5739
5740 /* If it could match, try it. */
5741 if (rst->nextb == NUL || *reginput == rst->nextb
5742 || *reginput == rst->nextb_ic)
5743 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005744 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005745 scan = regnext(rp->rs_scan);
5746 status = RA_CONT;
5747 break;
5748 }
5749 }
5750 if (status != RA_CONT)
5751 {
5752 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005753 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005754 regstack.ga_len -= sizeof(regstar_T);
5755 status = RA_NOMATCH;
5756 }
5757 }
5758 break;
5759 }
5760
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005761 /* If we want to continue the inner loop or didn't pop a state
5762 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005763 if (status == RA_CONT || rp == (regitem_T *)
5764 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5765 break;
5766 }
5767
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005768 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005769 if (status == RA_CONT)
5770 continue;
5771
5772 /*
5773 * If the regstack is empty or something failed we are done.
5774 */
5775 if (regstack.ga_len == 0 || status == RA_FAIL)
5776 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005777 if (scan == NULL)
5778 {
5779 /*
5780 * We get here only if there's trouble -- normally "case END" is
5781 * the terminating point.
5782 */
5783 EMSG(_(e_re_corr));
5784#ifdef DEBUG
5785 printf("Premature EOL\n");
5786#endif
5787 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005788 if (status == RA_FAIL)
5789 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005790 return (status == RA_MATCH);
5791 }
5792
5793 } /* End of loop until the regstack is empty. */
5794
5795 /* NOTREACHED */
5796}
5797
5798/*
5799 * Push an item onto the regstack.
5800 * Returns pointer to new item. Returns NULL when out of memory.
5801 */
5802 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005803regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005804 regstate_T state;
5805 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005806{
5807 regitem_T *rp;
5808
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005809 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005810 {
5811 EMSG(_(e_maxmempat));
5812 return NULL;
5813 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005814 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005815 return NULL;
5816
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005817 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005818 rp->rs_state = state;
5819 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005820
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005821 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005822 return rp;
5823}
5824
5825/*
5826 * Pop an item from the regstack.
5827 */
5828 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005829regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005830 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005831{
5832 regitem_T *rp;
5833
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005834 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005835 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005836
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005837 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005838}
5839
Bram Moolenaar071d4272004-06-13 20:20:40 +00005840/*
5841 * regrepeat - repeatedly match something simple, return how many.
5842 * Advances reginput (and reglnum) to just after the matched chars.
5843 */
5844 static int
5845regrepeat(p, maxcount)
5846 char_u *p;
5847 long maxcount; /* maximum number of matches allowed */
5848{
5849 long count = 0;
5850 char_u *scan;
5851 char_u *opnd;
5852 int mask;
5853 int testval = 0;
5854
5855 scan = reginput; /* Make local copy of reginput for speed. */
5856 opnd = OPERAND(p);
5857 switch (OP(p))
5858 {
5859 case ANY:
5860 case ANY + ADD_NL:
5861 while (count < maxcount)
5862 {
5863 /* Matching anything means we continue until end-of-line (or
5864 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5865 while (*scan != NUL && count < maxcount)
5866 {
5867 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005868 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005869 }
Bram Moolenaar640009d2006-10-17 16:48:26 +00005870 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5871 || reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005872 break;
5873 ++count; /* count the line-break */
5874 reg_nextline();
5875 scan = reginput;
5876 if (got_int)
5877 break;
5878 }
5879 break;
5880
5881 case IDENT:
5882 case IDENT + ADD_NL:
5883 testval = TRUE;
5884 /*FALLTHROUGH*/
5885 case SIDENT:
5886 case SIDENT + ADD_NL:
5887 while (count < maxcount)
5888 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005889 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005890 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005891 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005892 }
5893 else if (*scan == NUL)
5894 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005895 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5896 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005897 break;
5898 reg_nextline();
5899 scan = reginput;
5900 if (got_int)
5901 break;
5902 }
5903 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5904 ++scan;
5905 else
5906 break;
5907 ++count;
5908 }
5909 break;
5910
5911 case KWORD:
5912 case KWORD + ADD_NL:
5913 testval = TRUE;
5914 /*FALLTHROUGH*/
5915 case SKWORD:
5916 case SKWORD + ADD_NL:
5917 while (count < maxcount)
5918 {
Bram Moolenaarf813a182013-01-30 13:59:37 +01005919 if (vim_iswordp_buf(scan, reg_buf)
5920 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005921 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005922 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005923 }
5924 else if (*scan == NUL)
5925 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005926 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5927 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005928 break;
5929 reg_nextline();
5930 scan = reginput;
5931 if (got_int)
5932 break;
5933 }
5934 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5935 ++scan;
5936 else
5937 break;
5938 ++count;
5939 }
5940 break;
5941
5942 case FNAME:
5943 case FNAME + ADD_NL:
5944 testval = TRUE;
5945 /*FALLTHROUGH*/
5946 case SFNAME:
5947 case SFNAME + ADD_NL:
5948 while (count < maxcount)
5949 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005950 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005951 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005952 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005953 }
5954 else if (*scan == NUL)
5955 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005956 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5957 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005958 break;
5959 reg_nextline();
5960 scan = reginput;
5961 if (got_int)
5962 break;
5963 }
5964 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5965 ++scan;
5966 else
5967 break;
5968 ++count;
5969 }
5970 break;
5971
5972 case PRINT:
5973 case PRINT + ADD_NL:
5974 testval = TRUE;
5975 /*FALLTHROUGH*/
5976 case SPRINT:
5977 case SPRINT + ADD_NL:
5978 while (count < maxcount)
5979 {
5980 if (*scan == NUL)
5981 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005982 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5983 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005984 break;
5985 reg_nextline();
5986 scan = reginput;
5987 if (got_int)
5988 break;
5989 }
5990 else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5991 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005992 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005993 }
5994 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5995 ++scan;
5996 else
5997 break;
5998 ++count;
5999 }
6000 break;
6001
6002 case WHITE:
6003 case WHITE + ADD_NL:
6004 testval = mask = RI_WHITE;
6005do_class:
6006 while (count < maxcount)
6007 {
6008#ifdef FEAT_MBYTE
6009 int l;
6010#endif
6011 if (*scan == NUL)
6012 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00006013 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6014 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006015 break;
6016 reg_nextline();
6017 scan = reginput;
6018 if (got_int)
6019 break;
6020 }
6021#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006022 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006023 {
6024 if (testval != 0)
6025 break;
6026 scan += l;
6027 }
6028#endif
6029 else if ((class_tab[*scan] & mask) == testval)
6030 ++scan;
6031 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6032 ++scan;
6033 else
6034 break;
6035 ++count;
6036 }
6037 break;
6038
6039 case NWHITE:
6040 case NWHITE + ADD_NL:
6041 mask = RI_WHITE;
6042 goto do_class;
6043 case DIGIT:
6044 case DIGIT + ADD_NL:
6045 testval = mask = RI_DIGIT;
6046 goto do_class;
6047 case NDIGIT:
6048 case NDIGIT + ADD_NL:
6049 mask = RI_DIGIT;
6050 goto do_class;
6051 case HEX:
6052 case HEX + ADD_NL:
6053 testval = mask = RI_HEX;
6054 goto do_class;
6055 case NHEX:
6056 case NHEX + ADD_NL:
6057 mask = RI_HEX;
6058 goto do_class;
6059 case OCTAL:
6060 case OCTAL + ADD_NL:
6061 testval = mask = RI_OCTAL;
6062 goto do_class;
6063 case NOCTAL:
6064 case NOCTAL + ADD_NL:
6065 mask = RI_OCTAL;
6066 goto do_class;
6067 case WORD:
6068 case WORD + ADD_NL:
6069 testval = mask = RI_WORD;
6070 goto do_class;
6071 case NWORD:
6072 case NWORD + ADD_NL:
6073 mask = RI_WORD;
6074 goto do_class;
6075 case HEAD:
6076 case HEAD + ADD_NL:
6077 testval = mask = RI_HEAD;
6078 goto do_class;
6079 case NHEAD:
6080 case NHEAD + ADD_NL:
6081 mask = RI_HEAD;
6082 goto do_class;
6083 case ALPHA:
6084 case ALPHA + ADD_NL:
6085 testval = mask = RI_ALPHA;
6086 goto do_class;
6087 case NALPHA:
6088 case NALPHA + ADD_NL:
6089 mask = RI_ALPHA;
6090 goto do_class;
6091 case LOWER:
6092 case LOWER + ADD_NL:
6093 testval = mask = RI_LOWER;
6094 goto do_class;
6095 case NLOWER:
6096 case NLOWER + ADD_NL:
6097 mask = RI_LOWER;
6098 goto do_class;
6099 case UPPER:
6100 case UPPER + ADD_NL:
6101 testval = mask = RI_UPPER;
6102 goto do_class;
6103 case NUPPER:
6104 case NUPPER + ADD_NL:
6105 mask = RI_UPPER;
6106 goto do_class;
6107
6108 case EXACTLY:
6109 {
6110 int cu, cl;
6111
6112 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006113 * would have been used for it. It does handle single-byte
6114 * characters, such as latin1. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006115 if (ireg_ic)
6116 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006117 cu = MB_TOUPPER(*opnd);
6118 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006119 while (count < maxcount && (*scan == cu || *scan == cl))
6120 {
6121 count++;
6122 scan++;
6123 }
6124 }
6125 else
6126 {
6127 cu = *opnd;
6128 while (count < maxcount && *scan == cu)
6129 {
6130 count++;
6131 scan++;
6132 }
6133 }
6134 break;
6135 }
6136
6137#ifdef FEAT_MBYTE
6138 case MULTIBYTECODE:
6139 {
6140 int i, len, cf = 0;
6141
6142 /* Safety check (just in case 'encoding' was changed since
6143 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006144 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006145 {
6146 if (ireg_ic && enc_utf8)
6147 cf = utf_fold(utf_ptr2char(opnd));
6148 while (count < maxcount)
6149 {
6150 for (i = 0; i < len; ++i)
6151 if (opnd[i] != scan[i])
6152 break;
6153 if (i < len && (!ireg_ic || !enc_utf8
6154 || utf_fold(utf_ptr2char(scan)) != cf))
6155 break;
6156 scan += len;
6157 ++count;
6158 }
6159 }
6160 }
6161 break;
6162#endif
6163
6164 case ANYOF:
6165 case ANYOF + ADD_NL:
6166 testval = TRUE;
6167 /*FALLTHROUGH*/
6168
6169 case ANYBUT:
6170 case ANYBUT + ADD_NL:
6171 while (count < maxcount)
6172 {
6173#ifdef FEAT_MBYTE
6174 int len;
6175#endif
6176 if (*scan == NUL)
6177 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00006178 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6179 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006180 break;
6181 reg_nextline();
6182 scan = reginput;
6183 if (got_int)
6184 break;
6185 }
6186 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6187 ++scan;
6188#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006189 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006190 {
6191 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6192 break;
6193 scan += len;
6194 }
6195#endif
6196 else
6197 {
6198 if ((cstrchr(opnd, *scan) == NULL) == testval)
6199 break;
6200 ++scan;
6201 }
6202 ++count;
6203 }
6204 break;
6205
6206 case NEWL:
6207 while (count < maxcount
Bram Moolenaar640009d2006-10-17 16:48:26 +00006208 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
6209 && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006210 {
6211 count++;
6212 if (reg_line_lbr)
6213 ADVANCE_REGINPUT();
6214 else
6215 reg_nextline();
6216 scan = reginput;
6217 if (got_int)
6218 break;
6219 }
6220 break;
6221
6222 default: /* Oh dear. Called inappropriately. */
6223 EMSG(_(e_re_corr));
6224#ifdef DEBUG
6225 printf("Called regrepeat with op code %d\n", OP(p));
6226#endif
6227 break;
6228 }
6229
6230 reginput = scan;
6231
6232 return (int)count;
6233}
6234
6235/*
6236 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006237 * Returns NULL when calculating size, when there is no next item and when
6238 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006239 */
6240 static char_u *
6241regnext(p)
6242 char_u *p;
6243{
6244 int offset;
6245
Bram Moolenaard3005802009-11-25 17:21:32 +00006246 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006247 return NULL;
6248
6249 offset = NEXT(p);
6250 if (offset == 0)
6251 return NULL;
6252
Bram Moolenaar582fd852005-03-28 20:58:01 +00006253 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006254 return p - offset;
6255 else
6256 return p + offset;
6257}
6258
6259/*
6260 * Check the regexp program for its magic number.
6261 * Return TRUE if it's wrong.
6262 */
6263 static int
6264prog_magic_wrong()
6265{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006266 regprog_T *prog;
6267
6268 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
6269 if (prog->engine == &nfa_regengine)
6270 /* For NFA matcher we don't check the magic */
6271 return FALSE;
6272
6273 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006274 {
6275 EMSG(_(e_re_corr));
6276 return TRUE;
6277 }
6278 return FALSE;
6279}
6280
6281/*
6282 * Cleanup the subexpressions, if this wasn't done yet.
6283 * This construction is used to clear the subexpressions only when they are
6284 * used (to increase speed).
6285 */
6286 static void
6287cleanup_subexpr()
6288{
6289 if (need_clear_subexpr)
6290 {
6291 if (REG_MULTI)
6292 {
6293 /* Use 0xff to set lnum to -1 */
6294 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6295 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6296 }
6297 else
6298 {
6299 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6300 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
6301 }
6302 need_clear_subexpr = FALSE;
6303 }
6304}
6305
6306#ifdef FEAT_SYN_HL
6307 static void
6308cleanup_zsubexpr()
6309{
6310 if (need_clear_zsubexpr)
6311 {
6312 if (REG_MULTI)
6313 {
6314 /* Use 0xff to set lnum to -1 */
6315 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6316 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6317 }
6318 else
6319 {
6320 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6321 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6322 }
6323 need_clear_zsubexpr = FALSE;
6324 }
6325}
6326#endif
6327
6328/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006329 * Save the current subexpr to "bp", so that they can be restored
6330 * later by restore_subexpr().
6331 */
6332 static void
6333save_subexpr(bp)
6334 regbehind_T *bp;
6335{
6336 int i;
6337
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006338 /* When "need_clear_subexpr" is set we don't need to save the values, only
6339 * remember that this flag needs to be set again when restoring. */
6340 bp->save_need_clear_subexpr = need_clear_subexpr;
6341 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006342 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006343 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006344 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006345 if (REG_MULTI)
6346 {
6347 bp->save_start[i].se_u.pos = reg_startpos[i];
6348 bp->save_end[i].se_u.pos = reg_endpos[i];
6349 }
6350 else
6351 {
6352 bp->save_start[i].se_u.ptr = reg_startp[i];
6353 bp->save_end[i].se_u.ptr = reg_endp[i];
6354 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006355 }
6356 }
6357}
6358
6359/*
6360 * Restore the subexpr from "bp".
6361 */
6362 static void
6363restore_subexpr(bp)
6364 regbehind_T *bp;
6365{
6366 int i;
6367
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006368 /* Only need to restore saved values when they are not to be cleared. */
6369 need_clear_subexpr = bp->save_need_clear_subexpr;
6370 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006371 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006372 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006373 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006374 if (REG_MULTI)
6375 {
6376 reg_startpos[i] = bp->save_start[i].se_u.pos;
6377 reg_endpos[i] = bp->save_end[i].se_u.pos;
6378 }
6379 else
6380 {
6381 reg_startp[i] = bp->save_start[i].se_u.ptr;
6382 reg_endp[i] = bp->save_end[i].se_u.ptr;
6383 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006384 }
6385 }
6386}
6387
6388/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006389 * Advance reglnum, regline and reginput to the next line.
6390 */
6391 static void
6392reg_nextline()
6393{
6394 regline = reg_getline(++reglnum);
6395 reginput = regline;
6396 fast_breakcheck();
6397}
6398
6399/*
6400 * Save the input line and position in a regsave_T.
6401 */
6402 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006403reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006404 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006405 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006406{
6407 if (REG_MULTI)
6408 {
6409 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6410 save->rs_u.pos.lnum = reglnum;
6411 }
6412 else
6413 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006414 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006415}
6416
6417/*
6418 * Restore the input line and position from a regsave_T.
6419 */
6420 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006421reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006422 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006423 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006424{
6425 if (REG_MULTI)
6426 {
6427 if (reglnum != save->rs_u.pos.lnum)
6428 {
6429 /* only call reg_getline() when the line number changed to save
6430 * a bit of time */
6431 reglnum = save->rs_u.pos.lnum;
6432 regline = reg_getline(reglnum);
6433 }
6434 reginput = regline + save->rs_u.pos.col;
6435 }
6436 else
6437 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006438 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006439}
6440
6441/*
6442 * Return TRUE if current position is equal to saved position.
6443 */
6444 static int
6445reg_save_equal(save)
6446 regsave_T *save;
6447{
6448 if (REG_MULTI)
6449 return reglnum == save->rs_u.pos.lnum
6450 && reginput == regline + save->rs_u.pos.col;
6451 return reginput == save->rs_u.ptr;
6452}
6453
6454/*
6455 * Tentatively set the sub-expression start to the current position (after
6456 * calling regmatch() they will have changed). Need to save the existing
6457 * values for when there is no match.
6458 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6459 * depending on REG_MULTI.
6460 */
6461 static void
6462save_se_multi(savep, posp)
6463 save_se_T *savep;
6464 lpos_T *posp;
6465{
6466 savep->se_u.pos = *posp;
6467 posp->lnum = reglnum;
6468 posp->col = (colnr_T)(reginput - regline);
6469}
6470
6471 static void
6472save_se_one(savep, pp)
6473 save_se_T *savep;
6474 char_u **pp;
6475{
6476 savep->se_u.ptr = *pp;
6477 *pp = reginput;
6478}
6479
6480/*
6481 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6482 */
6483 static int
6484re_num_cmp(val, scan)
6485 long_u val;
6486 char_u *scan;
6487{
6488 long_u n = OPERAND_MIN(scan);
6489
6490 if (OPERAND_CMP(scan) == '>')
6491 return val > n;
6492 if (OPERAND_CMP(scan) == '<')
6493 return val < n;
6494 return val == n;
6495}
6496
6497
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006498#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006499
6500/*
6501 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6502 */
6503 static void
6504regdump(pattern, r)
6505 char_u *pattern;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006506 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006507{
6508 char_u *s;
6509 int op = EXACTLY; /* Arbitrary non-END op. */
6510 char_u *next;
6511 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006512 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006513
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006514#ifdef BT_REGEXP_LOG
6515 f = fopen("bt_regexp_log.log", "a");
6516#else
6517 f = stdout;
6518#endif
6519 if (f == NULL)
6520 return;
6521 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006522
6523 s = r->program + 1;
6524 /*
6525 * Loop until we find the END that isn't before a referred next (an END
6526 * can also appear in a NOMATCH operand).
6527 */
6528 while (op != END || s <= end)
6529 {
6530 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006531 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006532 next = regnext(s);
6533 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006534 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006535 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006536 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006537 if (end < next)
6538 end = next;
6539 if (op == BRACE_LIMITS)
6540 {
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006541 /* Two ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006542 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006543 s += 8;
6544 }
Bram Moolenaar5b84ddc2013-06-05 16:33:10 +02006545 else if (op == BEHIND || op == NOBEHIND)
6546 {
6547 /* one int */
6548 fprintf(f, " count %ld", OPERAND_MIN(s));
6549 s += 4;
6550 }
Bram Moolenaar6d3a5d72013-06-06 18:04:51 +02006551 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6552 {
6553 /* one int plus comperator */
6554 fprintf(f, " count %ld", OPERAND_MIN(s));
6555 s += 5;
6556 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00006557 s += 3;
6558 if (op == ANYOF || op == ANYOF + ADD_NL
6559 || op == ANYBUT || op == ANYBUT + ADD_NL
6560 || op == EXACTLY)
6561 {
6562 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006563 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006564 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006565 fprintf(f, "%c", *s++);
6566 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006567 s++;
6568 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006569 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006570 }
6571
6572 /* Header fields of interest. */
6573 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006574 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006575 ? (char *)transchar(r->regstart)
6576 : "multibyte", r->regstart);
6577 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006578 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006579 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006580 fprintf(f, "must have \"%s\"", r->regmust);
6581 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006582
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006583#ifdef BT_REGEXP_LOG
6584 fclose(f);
6585#endif
6586}
6587#endif /* BT_REGEXP_DUMP */
6588
6589#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006590/*
6591 * regprop - printable representation of opcode
6592 */
6593 static char_u *
6594regprop(op)
6595 char_u *op;
6596{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006597 char *p;
6598 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006599
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006600 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006601
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006602 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006603 {
6604 case BOL:
6605 p = "BOL";
6606 break;
6607 case EOL:
6608 p = "EOL";
6609 break;
6610 case RE_BOF:
6611 p = "BOF";
6612 break;
6613 case RE_EOF:
6614 p = "EOF";
6615 break;
6616 case CURSOR:
6617 p = "CURSOR";
6618 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006619 case RE_VISUAL:
6620 p = "RE_VISUAL";
6621 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006622 case RE_LNUM:
6623 p = "RE_LNUM";
6624 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006625 case RE_MARK:
6626 p = "RE_MARK";
6627 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006628 case RE_COL:
6629 p = "RE_COL";
6630 break;
6631 case RE_VCOL:
6632 p = "RE_VCOL";
6633 break;
6634 case BOW:
6635 p = "BOW";
6636 break;
6637 case EOW:
6638 p = "EOW";
6639 break;
6640 case ANY:
6641 p = "ANY";
6642 break;
6643 case ANY + ADD_NL:
6644 p = "ANY+NL";
6645 break;
6646 case ANYOF:
6647 p = "ANYOF";
6648 break;
6649 case ANYOF + ADD_NL:
6650 p = "ANYOF+NL";
6651 break;
6652 case ANYBUT:
6653 p = "ANYBUT";
6654 break;
6655 case ANYBUT + ADD_NL:
6656 p = "ANYBUT+NL";
6657 break;
6658 case IDENT:
6659 p = "IDENT";
6660 break;
6661 case IDENT + ADD_NL:
6662 p = "IDENT+NL";
6663 break;
6664 case SIDENT:
6665 p = "SIDENT";
6666 break;
6667 case SIDENT + ADD_NL:
6668 p = "SIDENT+NL";
6669 break;
6670 case KWORD:
6671 p = "KWORD";
6672 break;
6673 case KWORD + ADD_NL:
6674 p = "KWORD+NL";
6675 break;
6676 case SKWORD:
6677 p = "SKWORD";
6678 break;
6679 case SKWORD + ADD_NL:
6680 p = "SKWORD+NL";
6681 break;
6682 case FNAME:
6683 p = "FNAME";
6684 break;
6685 case FNAME + ADD_NL:
6686 p = "FNAME+NL";
6687 break;
6688 case SFNAME:
6689 p = "SFNAME";
6690 break;
6691 case SFNAME + ADD_NL:
6692 p = "SFNAME+NL";
6693 break;
6694 case PRINT:
6695 p = "PRINT";
6696 break;
6697 case PRINT + ADD_NL:
6698 p = "PRINT+NL";
6699 break;
6700 case SPRINT:
6701 p = "SPRINT";
6702 break;
6703 case SPRINT + ADD_NL:
6704 p = "SPRINT+NL";
6705 break;
6706 case WHITE:
6707 p = "WHITE";
6708 break;
6709 case WHITE + ADD_NL:
6710 p = "WHITE+NL";
6711 break;
6712 case NWHITE:
6713 p = "NWHITE";
6714 break;
6715 case NWHITE + ADD_NL:
6716 p = "NWHITE+NL";
6717 break;
6718 case DIGIT:
6719 p = "DIGIT";
6720 break;
6721 case DIGIT + ADD_NL:
6722 p = "DIGIT+NL";
6723 break;
6724 case NDIGIT:
6725 p = "NDIGIT";
6726 break;
6727 case NDIGIT + ADD_NL:
6728 p = "NDIGIT+NL";
6729 break;
6730 case HEX:
6731 p = "HEX";
6732 break;
6733 case HEX + ADD_NL:
6734 p = "HEX+NL";
6735 break;
6736 case NHEX:
6737 p = "NHEX";
6738 break;
6739 case NHEX + ADD_NL:
6740 p = "NHEX+NL";
6741 break;
6742 case OCTAL:
6743 p = "OCTAL";
6744 break;
6745 case OCTAL + ADD_NL:
6746 p = "OCTAL+NL";
6747 break;
6748 case NOCTAL:
6749 p = "NOCTAL";
6750 break;
6751 case NOCTAL + ADD_NL:
6752 p = "NOCTAL+NL";
6753 break;
6754 case WORD:
6755 p = "WORD";
6756 break;
6757 case WORD + ADD_NL:
6758 p = "WORD+NL";
6759 break;
6760 case NWORD:
6761 p = "NWORD";
6762 break;
6763 case NWORD + ADD_NL:
6764 p = "NWORD+NL";
6765 break;
6766 case HEAD:
6767 p = "HEAD";
6768 break;
6769 case HEAD + ADD_NL:
6770 p = "HEAD+NL";
6771 break;
6772 case NHEAD:
6773 p = "NHEAD";
6774 break;
6775 case NHEAD + ADD_NL:
6776 p = "NHEAD+NL";
6777 break;
6778 case ALPHA:
6779 p = "ALPHA";
6780 break;
6781 case ALPHA + ADD_NL:
6782 p = "ALPHA+NL";
6783 break;
6784 case NALPHA:
6785 p = "NALPHA";
6786 break;
6787 case NALPHA + ADD_NL:
6788 p = "NALPHA+NL";
6789 break;
6790 case LOWER:
6791 p = "LOWER";
6792 break;
6793 case LOWER + ADD_NL:
6794 p = "LOWER+NL";
6795 break;
6796 case NLOWER:
6797 p = "NLOWER";
6798 break;
6799 case NLOWER + ADD_NL:
6800 p = "NLOWER+NL";
6801 break;
6802 case UPPER:
6803 p = "UPPER";
6804 break;
6805 case UPPER + ADD_NL:
6806 p = "UPPER+NL";
6807 break;
6808 case NUPPER:
6809 p = "NUPPER";
6810 break;
6811 case NUPPER + ADD_NL:
6812 p = "NUPPER+NL";
6813 break;
6814 case BRANCH:
6815 p = "BRANCH";
6816 break;
6817 case EXACTLY:
6818 p = "EXACTLY";
6819 break;
6820 case NOTHING:
6821 p = "NOTHING";
6822 break;
6823 case BACK:
6824 p = "BACK";
6825 break;
6826 case END:
6827 p = "END";
6828 break;
6829 case MOPEN + 0:
6830 p = "MATCH START";
6831 break;
6832 case MOPEN + 1:
6833 case MOPEN + 2:
6834 case MOPEN + 3:
6835 case MOPEN + 4:
6836 case MOPEN + 5:
6837 case MOPEN + 6:
6838 case MOPEN + 7:
6839 case MOPEN + 8:
6840 case MOPEN + 9:
6841 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6842 p = NULL;
6843 break;
6844 case MCLOSE + 0:
6845 p = "MATCH END";
6846 break;
6847 case MCLOSE + 1:
6848 case MCLOSE + 2:
6849 case MCLOSE + 3:
6850 case MCLOSE + 4:
6851 case MCLOSE + 5:
6852 case MCLOSE + 6:
6853 case MCLOSE + 7:
6854 case MCLOSE + 8:
6855 case MCLOSE + 9:
6856 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6857 p = NULL;
6858 break;
6859 case BACKREF + 1:
6860 case BACKREF + 2:
6861 case BACKREF + 3:
6862 case BACKREF + 4:
6863 case BACKREF + 5:
6864 case BACKREF + 6:
6865 case BACKREF + 7:
6866 case BACKREF + 8:
6867 case BACKREF + 9:
6868 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6869 p = NULL;
6870 break;
6871 case NOPEN:
6872 p = "NOPEN";
6873 break;
6874 case NCLOSE:
6875 p = "NCLOSE";
6876 break;
6877#ifdef FEAT_SYN_HL
6878 case ZOPEN + 1:
6879 case ZOPEN + 2:
6880 case ZOPEN + 3:
6881 case ZOPEN + 4:
6882 case ZOPEN + 5:
6883 case ZOPEN + 6:
6884 case ZOPEN + 7:
6885 case ZOPEN + 8:
6886 case ZOPEN + 9:
6887 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6888 p = NULL;
6889 break;
6890 case ZCLOSE + 1:
6891 case ZCLOSE + 2:
6892 case ZCLOSE + 3:
6893 case ZCLOSE + 4:
6894 case ZCLOSE + 5:
6895 case ZCLOSE + 6:
6896 case ZCLOSE + 7:
6897 case ZCLOSE + 8:
6898 case ZCLOSE + 9:
6899 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6900 p = NULL;
6901 break;
6902 case ZREF + 1:
6903 case ZREF + 2:
6904 case ZREF + 3:
6905 case ZREF + 4:
6906 case ZREF + 5:
6907 case ZREF + 6:
6908 case ZREF + 7:
6909 case ZREF + 8:
6910 case ZREF + 9:
6911 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6912 p = NULL;
6913 break;
6914#endif
6915 case STAR:
6916 p = "STAR";
6917 break;
6918 case PLUS:
6919 p = "PLUS";
6920 break;
6921 case NOMATCH:
6922 p = "NOMATCH";
6923 break;
6924 case MATCH:
6925 p = "MATCH";
6926 break;
6927 case BEHIND:
6928 p = "BEHIND";
6929 break;
6930 case NOBEHIND:
6931 p = "NOBEHIND";
6932 break;
6933 case SUBPAT:
6934 p = "SUBPAT";
6935 break;
6936 case BRACE_LIMITS:
6937 p = "BRACE_LIMITS";
6938 break;
6939 case BRACE_SIMPLE:
6940 p = "BRACE_SIMPLE";
6941 break;
6942 case BRACE_COMPLEX + 0:
6943 case BRACE_COMPLEX + 1:
6944 case BRACE_COMPLEX + 2:
6945 case BRACE_COMPLEX + 3:
6946 case BRACE_COMPLEX + 4:
6947 case BRACE_COMPLEX + 5:
6948 case BRACE_COMPLEX + 6:
6949 case BRACE_COMPLEX + 7:
6950 case BRACE_COMPLEX + 8:
6951 case BRACE_COMPLEX + 9:
6952 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6953 p = NULL;
6954 break;
6955#ifdef FEAT_MBYTE
6956 case MULTIBYTECODE:
6957 p = "MULTIBYTECODE";
6958 break;
6959#endif
6960 case NEWL:
6961 p = "NEWL";
6962 break;
6963 default:
6964 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6965 p = NULL;
6966 break;
6967 }
6968 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006969 STRCAT(buf, p);
6970 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006971}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006972#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006973
6974#ifdef FEAT_MBYTE
6975static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6976
6977typedef struct
6978{
6979 int a, b, c;
6980} decomp_T;
6981
6982
6983/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006984static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006985{
6986 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6987 {0x5d0,0,0}, /* 0xfb21 alt alef */
6988 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6989 {0x5d4,0,0}, /* 0xfb23 alt he */
6990 {0x5db,0,0}, /* 0xfb24 alt kaf */
6991 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6992 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6993 {0x5e8,0,0}, /* 0xfb27 alt resh */
6994 {0x5ea,0,0}, /* 0xfb28 alt tav */
6995 {'+', 0, 0}, /* 0xfb29 alt plus */
6996 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6997 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6998 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6999 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
7000 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
7001 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
7002 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
7003 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
7004 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
7005 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
7006 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
7007 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
7008 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
7009 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
7010 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
7011 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
7012 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
7013 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
7014 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
7015 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
7016 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
7017 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
7018 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
7019 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
7020 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
7021 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
7022 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
7023 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
7024 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
7025 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
7026 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7027 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7028 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7029 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7030 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7031 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7032 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7033 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7034};
7035
7036 static void
7037mb_decompose(c, c1, c2, c3)
7038 int c, *c1, *c2, *c3;
7039{
7040 decomp_T d;
7041
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007042 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007043 {
7044 d = decomp_table[c - 0xfb20];
7045 *c1 = d.a;
7046 *c2 = d.b;
7047 *c3 = d.c;
7048 }
7049 else
7050 {
7051 *c1 = c;
7052 *c2 = *c3 = 0;
7053 }
7054}
7055#endif
7056
7057/*
7058 * Compare two strings, ignore case if ireg_ic set.
7059 * Return 0 if strings match, non-zero otherwise.
7060 * Correct the length "*n" when composing characters are ignored.
7061 */
7062 static int
7063cstrncmp(s1, s2, n)
7064 char_u *s1, *s2;
7065 int *n;
7066{
7067 int result;
7068
7069 if (!ireg_ic)
7070 result = STRNCMP(s1, s2, *n);
7071 else
7072 result = MB_STRNICMP(s1, s2, *n);
7073
7074#ifdef FEAT_MBYTE
7075 /* if it failed and it's utf8 and we want to combineignore: */
7076 if (result != 0 && enc_utf8 && ireg_icombine)
7077 {
7078 char_u *str1, *str2;
7079 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007080 int junk;
7081
7082 /* we have to handle the strcmp ourselves, since it is necessary to
7083 * deal with the composing characters by ignoring them: */
7084 str1 = s1;
7085 str2 = s2;
7086 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007087 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007088 {
7089 c1 = mb_ptr2char_adv(&str1);
7090 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007091
7092 /* decompose the character if necessary, into 'base' characters
7093 * because I don't care about Arabic, I will hard-code the Hebrew
7094 * which I *do* care about! So sue me... */
7095 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
7096 {
7097 /* decomposition necessary? */
7098 mb_decompose(c1, &c11, &junk, &junk);
7099 mb_decompose(c2, &c12, &junk, &junk);
7100 c1 = c11;
7101 c2 = c12;
7102 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
7103 break;
7104 }
7105 }
7106 result = c2 - c1;
7107 if (result == 0)
7108 *n = (int)(str2 - s2);
7109 }
7110#endif
7111
7112 return result;
7113}
7114
7115/*
7116 * cstrchr: This function is used a lot for simple searches, keep it fast!
7117 */
7118 static char_u *
7119cstrchr(s, c)
7120 char_u *s;
7121 int c;
7122{
7123 char_u *p;
7124 int cc;
7125
7126 if (!ireg_ic
7127#ifdef FEAT_MBYTE
7128 || (!enc_utf8 && mb_char2len(c) > 1)
7129#endif
7130 )
7131 return vim_strchr(s, c);
7132
7133 /* tolower() and toupper() can be slow, comparing twice should be a lot
7134 * faster (esp. when using MS Visual C++!).
7135 * For UTF-8 need to use folded case. */
7136#ifdef FEAT_MBYTE
7137 if (enc_utf8 && c > 0x80)
7138 cc = utf_fold(c);
7139 else
7140#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007141 if (MB_ISUPPER(c))
7142 cc = MB_TOLOWER(c);
7143 else if (MB_ISLOWER(c))
7144 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007145 else
7146 return vim_strchr(s, c);
7147
7148#ifdef FEAT_MBYTE
7149 if (has_mbyte)
7150 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007151 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007152 {
7153 if (enc_utf8 && c > 0x80)
7154 {
7155 if (utf_fold(utf_ptr2char(p)) == cc)
7156 return p;
7157 }
7158 else if (*p == c || *p == cc)
7159 return p;
7160 }
7161 }
7162 else
7163#endif
7164 /* Faster version for when there are no multi-byte characters. */
7165 for (p = s; *p != NUL; ++p)
7166 if (*p == c || *p == cc)
7167 return p;
7168
7169 return NULL;
7170}
7171
7172/***************************************************************
7173 * regsub stuff *
7174 ***************************************************************/
7175
7176/* This stuff below really confuses cc on an SGI -- webb */
7177#ifdef __sgi
7178# undef __ARGS
7179# define __ARGS(x) ()
7180#endif
7181
7182/*
7183 * We should define ftpr as a pointer to a function returning a pointer to
7184 * a function returning a pointer to a function ...
7185 * This is impossible, so we declare a pointer to a function returning a
7186 * pointer to a function returning void. This should work for all compilers.
7187 */
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007188typedef void (*(*fptr_T) __ARGS((int *, int)))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007189
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007190static fptr_T do_upper __ARGS((int *, int));
7191static fptr_T do_Upper __ARGS((int *, int));
7192static fptr_T do_lower __ARGS((int *, int));
7193static fptr_T do_Lower __ARGS((int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007194
7195static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
7196
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007197 static fptr_T
Bram Moolenaar071d4272004-06-13 20:20:40 +00007198do_upper(d, c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007199 int *d;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007200 int c;
7201{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007202 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007203
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007204 return (fptr_T)NULL;
7205}
7206
7207 static fptr_T
7208do_Upper(d, c)
7209 int *d;
7210 int c;
7211{
7212 *d = MB_TOUPPER(c);
7213
7214 return (fptr_T)do_Upper;
7215}
7216
7217 static fptr_T
7218do_lower(d, c)
7219 int *d;
7220 int c;
7221{
7222 *d = MB_TOLOWER(c);
7223
7224 return (fptr_T)NULL;
7225}
7226
7227 static fptr_T
7228do_Lower(d, c)
7229 int *d;
7230 int c;
7231{
7232 *d = MB_TOLOWER(c);
7233
7234 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007235}
7236
7237/*
7238 * regtilde(): Replace tildes in the pattern by the old pattern.
7239 *
7240 * Short explanation of the tilde: It stands for the previous replacement
7241 * pattern. If that previous pattern also contains a ~ we should go back a
7242 * step further... But we insert the previous pattern into the current one
7243 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007244 * This still does not handle the case where "magic" changes. So require the
7245 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007246 *
7247 * The tildes are parsed once before the first call to vim_regsub().
7248 */
7249 char_u *
7250regtilde(source, magic)
7251 char_u *source;
7252 int magic;
7253{
7254 char_u *newsub = source;
7255 char_u *tmpsub;
7256 char_u *p;
7257 int len;
7258 int prevlen;
7259
7260 for (p = newsub; *p; ++p)
7261 {
7262 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7263 {
7264 if (reg_prev_sub != NULL)
7265 {
7266 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7267 prevlen = (int)STRLEN(reg_prev_sub);
7268 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7269 if (tmpsub != NULL)
7270 {
7271 /* copy prefix */
7272 len = (int)(p - newsub); /* not including ~ */
7273 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007274 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007275 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7276 /* copy postfix */
7277 if (!magic)
7278 ++p; /* back off \ */
7279 STRCPY(tmpsub + len + prevlen, p + 1);
7280
7281 if (newsub != source) /* already allocated newsub */
7282 vim_free(newsub);
7283 newsub = tmpsub;
7284 p = newsub + len + prevlen;
7285 }
7286 }
7287 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007288 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007289 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007290 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007291 --p;
7292 }
7293 else
7294 {
7295 if (*p == '\\' && p[1]) /* skip escaped characters */
7296 ++p;
7297#ifdef FEAT_MBYTE
7298 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007299 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007300#endif
7301 }
7302 }
7303
7304 vim_free(reg_prev_sub);
7305 if (newsub != source) /* newsub was allocated, just keep it */
7306 reg_prev_sub = newsub;
7307 else /* no ~ found, need to save newsub */
7308 reg_prev_sub = vim_strsave(newsub);
7309 return newsub;
7310}
7311
7312#ifdef FEAT_EVAL
7313static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7314
7315/* These pointers are used instead of reg_match and reg_mmatch for
7316 * reg_submatch(). Needed for when the substitution string is an expression
7317 * that contains a call to substitute() and submatch(). */
7318static regmatch_T *submatch_match;
7319static regmmatch_T *submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007320static linenr_T submatch_firstlnum;
7321static linenr_T submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007322static int submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007323#endif
7324
7325#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
7326/*
7327 * vim_regsub() - perform substitutions after a vim_regexec() or
7328 * vim_regexec_multi() match.
7329 *
7330 * If "copy" is TRUE really copy into "dest".
7331 * If "copy" is FALSE nothing is copied, this is just to find out the length
7332 * of the result.
7333 *
7334 * If "backslash" is TRUE, a backslash will be removed later, need to double
7335 * them to keep them, and insert a backslash before a CR to avoid it being
7336 * replaced with a line break later.
7337 *
7338 * Note: The matched text must not change between the call of
7339 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7340 * references invalid!
7341 *
7342 * Returns the size of the replacement, including terminating NUL.
7343 */
7344 int
7345vim_regsub(rmp, source, dest, copy, magic, backslash)
7346 regmatch_T *rmp;
7347 char_u *source;
7348 char_u *dest;
7349 int copy;
7350 int magic;
7351 int backslash;
7352{
7353 reg_match = rmp;
7354 reg_mmatch = NULL;
7355 reg_maxline = 0;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01007356 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007357 return vim_regsub_both(source, dest, copy, magic, backslash);
7358}
7359#endif
7360
7361 int
7362vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
7363 regmmatch_T *rmp;
7364 linenr_T lnum;
7365 char_u *source;
7366 char_u *dest;
7367 int copy;
7368 int magic;
7369 int backslash;
7370{
7371 reg_match = NULL;
7372 reg_mmatch = rmp;
7373 reg_buf = curbuf; /* always works on the current buffer! */
7374 reg_firstlnum = lnum;
7375 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7376 return vim_regsub_both(source, dest, copy, magic, backslash);
7377}
7378
7379 static int
7380vim_regsub_both(source, dest, copy, magic, backslash)
7381 char_u *source;
7382 char_u *dest;
7383 int copy;
7384 int magic;
7385 int backslash;
7386{
7387 char_u *src;
7388 char_u *dst;
7389 char_u *s;
7390 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007391 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007392 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007393 fptr_T func_all = (fptr_T)NULL;
7394 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007395 linenr_T clnum = 0; /* init for GCC */
7396 int len = 0; /* init for GCC */
7397#ifdef FEAT_EVAL
7398 static char_u *eval_result = NULL;
7399#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007400
7401 /* Be paranoid... */
7402 if (source == NULL || dest == NULL)
7403 {
7404 EMSG(_(e_null));
7405 return 0;
7406 }
7407 if (prog_magic_wrong())
7408 return 0;
7409 src = source;
7410 dst = dest;
7411
7412 /*
7413 * When the substitute part starts with "\=" evaluate it as an expression.
7414 */
7415 if (source[0] == '\\' && source[1] == '='
7416#ifdef FEAT_EVAL
7417 && !can_f_submatch /* can't do this recursively */
7418#endif
7419 )
7420 {
7421#ifdef FEAT_EVAL
7422 /* To make sure that the length doesn't change between checking the
7423 * length and copying the string, and to speed up things, the
7424 * resulting string is saved from the call with "copy" == FALSE to the
7425 * call with "copy" == TRUE. */
7426 if (copy)
7427 {
7428 if (eval_result != NULL)
7429 {
7430 STRCPY(dest, eval_result);
7431 dst += STRLEN(eval_result);
7432 vim_free(eval_result);
7433 eval_result = NULL;
7434 }
7435 }
7436 else
7437 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007438 win_T *save_reg_win;
7439 int save_ireg_ic;
7440
7441 vim_free(eval_result);
7442
7443 /* The expression may contain substitute(), which calls us
7444 * recursively. Make sure submatch() gets the text from the first
7445 * level. Don't need to save "reg_buf", because
7446 * vim_regexec_multi() can't be called recursively. */
7447 submatch_match = reg_match;
7448 submatch_mmatch = reg_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007449 submatch_firstlnum = reg_firstlnum;
7450 submatch_maxline = reg_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007451 submatch_line_lbr = reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007452 save_reg_win = reg_win;
7453 save_ireg_ic = ireg_ic;
7454 can_f_submatch = TRUE;
7455
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007456 eval_result = eval_to_string(source + 2, NULL, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007457 if (eval_result != NULL)
7458 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007459 int had_backslash = FALSE;
7460
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00007461 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007462 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007463 /* Change NL to CR, so that it becomes a line break,
7464 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007465 * Skip over a backslashed character. */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007466 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007467 *s = CAR;
7468 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007469 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007470 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007471 /* Change NL to CR here too, so that this works:
7472 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7473 * abc\
7474 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007475 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007476 */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007477 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007478 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007479 had_backslash = TRUE;
7480 }
7481 }
7482 if (had_backslash && backslash)
7483 {
7484 /* Backslashes will be consumed, need to double them. */
7485 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7486 if (s != NULL)
7487 {
7488 vim_free(eval_result);
7489 eval_result = s;
7490 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007491 }
7492
7493 dst += STRLEN(eval_result);
7494 }
7495
7496 reg_match = submatch_match;
7497 reg_mmatch = submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007498 reg_firstlnum = submatch_firstlnum;
7499 reg_maxline = submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007500 reg_line_lbr = submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007501 reg_win = save_reg_win;
7502 ireg_ic = save_ireg_ic;
7503 can_f_submatch = FALSE;
7504 }
7505#endif
7506 }
7507 else
7508 while ((c = *src++) != NUL)
7509 {
7510 if (c == '&' && magic)
7511 no = 0;
7512 else if (c == '\\' && *src != NUL)
7513 {
7514 if (*src == '&' && !magic)
7515 {
7516 ++src;
7517 no = 0;
7518 }
7519 else if ('0' <= *src && *src <= '9')
7520 {
7521 no = *src++ - '0';
7522 }
7523 else if (vim_strchr((char_u *)"uUlLeE", *src))
7524 {
7525 switch (*src++)
7526 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007527 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007528 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007529 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007530 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007531 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007532 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007533 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007534 continue;
7535 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007536 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007537 continue;
7538 }
7539 }
7540 }
7541 if (no < 0) /* Ordinary character. */
7542 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007543 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7544 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007545 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007546 if (copy)
7547 {
7548 *dst++ = c;
7549 *dst++ = *src++;
7550 *dst++ = *src++;
7551 }
7552 else
7553 {
7554 dst += 3;
7555 src += 2;
7556 }
7557 continue;
7558 }
7559
Bram Moolenaar071d4272004-06-13 20:20:40 +00007560 if (c == '\\' && *src != NUL)
7561 {
7562 /* Check for abbreviations -- webb */
7563 switch (*src)
7564 {
7565 case 'r': c = CAR; ++src; break;
7566 case 'n': c = NL; ++src; break;
7567 case 't': c = TAB; ++src; break;
7568 /* Oh no! \e already has meaning in subst pat :-( */
7569 /* case 'e': c = ESC; ++src; break; */
7570 case 'b': c = Ctrl_H; ++src; break;
7571
7572 /* If "backslash" is TRUE the backslash will be removed
7573 * later. Used to insert a literal CR. */
7574 default: if (backslash)
7575 {
7576 if (copy)
7577 *dst = '\\';
7578 ++dst;
7579 }
7580 c = *src++;
7581 }
7582 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007583#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007584 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007585 c = mb_ptr2char(src - 1);
7586#endif
7587
Bram Moolenaardb552d602006-03-23 22:59:57 +00007588 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007589 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007590 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007591 func_one = (fptr_T)(func_one(&cc, c));
7592 else if (func_all != (fptr_T)NULL)
7593 /* Turbo C complains without the typecast */
7594 func_all = (fptr_T)(func_all(&cc, c));
7595 else /* just copy */
7596 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007597
7598#ifdef FEAT_MBYTE
7599 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007600 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007601 int totlen = mb_ptr2len(src - 1);
7602
Bram Moolenaar071d4272004-06-13 20:20:40 +00007603 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007604 mb_char2bytes(cc, dst);
7605 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007606 if (enc_utf8)
7607 {
7608 int clen = utf_ptr2len(src - 1);
7609
7610 /* If the character length is shorter than "totlen", there
7611 * are composing characters; copy them as-is. */
7612 if (clen < totlen)
7613 {
7614 if (copy)
7615 mch_memmove(dst + 1, src - 1 + clen,
7616 (size_t)(totlen - clen));
7617 dst += totlen - clen;
7618 }
7619 }
7620 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007621 }
7622 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007623#endif
7624 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007625 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007626 dst++;
7627 }
7628 else
7629 {
7630 if (REG_MULTI)
7631 {
7632 clnum = reg_mmatch->startpos[no].lnum;
7633 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7634 s = NULL;
7635 else
7636 {
7637 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7638 if (reg_mmatch->endpos[no].lnum == clnum)
7639 len = reg_mmatch->endpos[no].col
7640 - reg_mmatch->startpos[no].col;
7641 else
7642 len = (int)STRLEN(s);
7643 }
7644 }
7645 else
7646 {
7647 s = reg_match->startp[no];
7648 if (reg_match->endp[no] == NULL)
7649 s = NULL;
7650 else
7651 len = (int)(reg_match->endp[no] - s);
7652 }
7653 if (s != NULL)
7654 {
7655 for (;;)
7656 {
7657 if (len == 0)
7658 {
7659 if (REG_MULTI)
7660 {
7661 if (reg_mmatch->endpos[no].lnum == clnum)
7662 break;
7663 if (copy)
7664 *dst = CAR;
7665 ++dst;
7666 s = reg_getline(++clnum);
7667 if (reg_mmatch->endpos[no].lnum == clnum)
7668 len = reg_mmatch->endpos[no].col;
7669 else
7670 len = (int)STRLEN(s);
7671 }
7672 else
7673 break;
7674 }
7675 else if (*s == NUL) /* we hit NUL. */
7676 {
7677 if (copy)
7678 EMSG(_(e_re_damg));
7679 goto exit;
7680 }
7681 else
7682 {
7683 if (backslash && (*s == CAR || *s == '\\'))
7684 {
7685 /*
7686 * Insert a backslash in front of a CR, otherwise
7687 * it will be replaced by a line break.
7688 * Number of backslashes will be halved later,
7689 * double them here.
7690 */
7691 if (copy)
7692 {
7693 dst[0] = '\\';
7694 dst[1] = *s;
7695 }
7696 dst += 2;
7697 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007698 else
7699 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007700#ifdef FEAT_MBYTE
7701 if (has_mbyte)
7702 c = mb_ptr2char(s);
7703 else
7704#endif
7705 c = *s;
7706
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007707 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007708 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007709 func_one = (fptr_T)(func_one(&cc, c));
7710 else if (func_all != (fptr_T)NULL)
7711 /* Turbo C complains without the typecast */
7712 func_all = (fptr_T)(func_all(&cc, c));
7713 else /* just copy */
7714 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007715
7716#ifdef FEAT_MBYTE
7717 if (has_mbyte)
7718 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007719 int l;
7720
7721 /* Copy composing characters separately, one
7722 * at a time. */
7723 if (enc_utf8)
7724 l = utf_ptr2len(s) - 1;
7725 else
7726 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007727
7728 s += l;
7729 len -= l;
7730 if (copy)
7731 mb_char2bytes(cc, dst);
7732 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007733 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007734 else
7735#endif
7736 if (copy)
7737 *dst = cc;
7738 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007739 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007740
Bram Moolenaar071d4272004-06-13 20:20:40 +00007741 ++s;
7742 --len;
7743 }
7744 }
7745 }
7746 no = -1;
7747 }
7748 }
7749 if (copy)
7750 *dst = NUL;
7751
7752exit:
7753 return (int)((dst - dest) + 1);
7754}
7755
7756#ifdef FEAT_EVAL
Bram Moolenaard32a3192009-11-26 19:40:49 +00007757static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7758
Bram Moolenaar071d4272004-06-13 20:20:40 +00007759/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007760 * Call reg_getline() with the line numbers from the submatch. If a
7761 * substitute() was used the reg_maxline and other values have been
7762 * overwritten.
7763 */
7764 static char_u *
7765reg_getline_submatch(lnum)
7766 linenr_T lnum;
7767{
7768 char_u *s;
7769 linenr_T save_first = reg_firstlnum;
7770 linenr_T save_max = reg_maxline;
7771
7772 reg_firstlnum = submatch_firstlnum;
7773 reg_maxline = submatch_maxline;
7774
7775 s = reg_getline(lnum);
7776
7777 reg_firstlnum = save_first;
7778 reg_maxline = save_max;
7779 return s;
7780}
7781
7782/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007783 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007784 * allocated memory.
7785 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7786 */
7787 char_u *
7788reg_submatch(no)
7789 int no;
7790{
7791 char_u *retval = NULL;
7792 char_u *s;
7793 int len;
7794 int round;
7795 linenr_T lnum;
7796
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007797 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007798 return NULL;
7799
7800 if (submatch_match == NULL)
7801 {
7802 /*
7803 * First round: compute the length and allocate memory.
7804 * Second round: copy the text.
7805 */
7806 for (round = 1; round <= 2; ++round)
7807 {
7808 lnum = submatch_mmatch->startpos[no].lnum;
7809 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7810 return NULL;
7811
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007812 s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007813 if (s == NULL) /* anti-crash check, cannot happen? */
7814 break;
7815 if (submatch_mmatch->endpos[no].lnum == lnum)
7816 {
7817 /* Within one line: take form start to end col. */
7818 len = submatch_mmatch->endpos[no].col
7819 - submatch_mmatch->startpos[no].col;
7820 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007821 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007822 ++len;
7823 }
7824 else
7825 {
7826 /* Multiple lines: take start line from start col, middle
7827 * lines completely and end line up to end col. */
7828 len = (int)STRLEN(s);
7829 if (round == 2)
7830 {
7831 STRCPY(retval, s);
7832 retval[len] = '\n';
7833 }
7834 ++len;
7835 ++lnum;
7836 while (lnum < submatch_mmatch->endpos[no].lnum)
7837 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007838 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007839 if (round == 2)
7840 STRCPY(retval + len, s);
7841 len += (int)STRLEN(s);
7842 if (round == 2)
7843 retval[len] = '\n';
7844 ++len;
7845 }
7846 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007847 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00007848 submatch_mmatch->endpos[no].col);
7849 len += submatch_mmatch->endpos[no].col;
7850 if (round == 2)
7851 retval[len] = NUL;
7852 ++len;
7853 }
7854
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007855 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007856 {
7857 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007858 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007859 return NULL;
7860 }
7861 }
7862 }
7863 else
7864 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00007865 s = submatch_match->startp[no];
7866 if (s == NULL || submatch_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007867 retval = NULL;
7868 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007869 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007870 }
7871
7872 return retval;
7873}
7874#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007875
7876static regengine_T bt_regengine =
7877{
7878 bt_regcomp,
7879 bt_regexec,
7880#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7881 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7882 bt_regexec_nl,
7883#endif
7884 bt_regexec_multi
7885#ifdef DEBUG
7886 ,(char_u *)""
7887#endif
7888};
7889
7890
7891#include "regexp_nfa.c"
7892
7893static regengine_T nfa_regengine =
7894{
7895 nfa_regcomp,
7896 nfa_regexec,
7897#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7898 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7899 nfa_regexec_nl,
7900#endif
7901 nfa_regexec_multi
7902#ifdef DEBUG
7903 ,(char_u *)""
7904#endif
7905};
7906
7907/* Which regexp engine to use? Needed for vim_regcomp().
7908 * Must match with 'regexpengine'. */
7909static int regexp_engine = 0;
7910#define AUTOMATIC_ENGINE 0
7911#define BACKTRACKING_ENGINE 1
7912#define NFA_ENGINE 2
7913#ifdef DEBUG
7914static char_u regname[][30] = {
7915 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02007916 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007917 "NFA Regexp Engine"
7918 };
7919#endif
7920
7921/*
7922 * Compile a regular expression into internal code.
7923 * Returns the program in allocated memory. Returns NULL for an error.
7924 */
7925 regprog_T *
7926vim_regcomp(expr_arg, re_flags)
7927 char_u *expr_arg;
7928 int re_flags;
7929{
7930 regprog_T *prog = NULL;
7931 char_u *expr = expr_arg;
7932
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007933 regexp_engine = p_re;
7934
7935 /* Check for prefix "\%#=", that sets the regexp engine */
7936 if (STRNCMP(expr, "\\%#=", 4) == 0)
7937 {
7938 int newengine = expr[4] - '0';
7939
7940 if (newengine == AUTOMATIC_ENGINE
7941 || newengine == BACKTRACKING_ENGINE
7942 || newengine == NFA_ENGINE)
7943 {
7944 regexp_engine = expr[4] - '0';
7945 expr += 5;
7946#ifdef DEBUG
7947 EMSG3("New regexp mode selected (%d): %s", regexp_engine,
7948 regname[newengine]);
7949#endif
7950 }
7951 else
7952 {
7953 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
7954 regexp_engine = AUTOMATIC_ENGINE;
7955 }
7956 }
7957#ifdef DEBUG
7958 bt_regengine.expr = expr;
7959 nfa_regengine.expr = expr;
7960#endif
7961
7962 /*
7963 * First try the NFA engine, unless backtracking was requested.
7964 */
7965 if (regexp_engine != BACKTRACKING_ENGINE)
7966 prog = nfa_regengine.regcomp(expr, re_flags);
7967 else
7968 prog = bt_regengine.regcomp(expr, re_flags);
7969
7970 if (prog == NULL) /* error compiling regexp with initial engine */
7971 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007972#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007973 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
7974 {
7975 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007976 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007977 if (f)
7978 {
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02007979 fprintf(f, "Syntax error in \"%s\"\n", expr);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007980 fclose(f);
7981 }
7982 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007983 EMSG2("(NFA) Could not open \"%s\" to write !!!",
7984 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007985 }
7986#endif
7987 /*
7988 * If NFA engine failed, then revert to the backtracking engine.
7989 * Except when there was a syntax error, which was properly handled by
7990 * NFA engine.
7991 */
7992 if (regexp_engine == AUTOMATIC_ENGINE)
Bram Moolenaarcd2d8bb2013-06-05 21:42:53 +02007993 prog = bt_regengine.regcomp(expr, re_flags);
7994 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007995
7996 return prog;
7997}
7998
7999/*
8000 * Match a regexp against a string.
8001 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
8002 * Uses curbuf for line count and 'iskeyword'.
8003 *
8004 * Return TRUE if there is a match, FALSE if not.
8005 */
8006 int
8007vim_regexec(rmp, line, col)
8008 regmatch_T *rmp;
8009 char_u *line; /* string to match against */
8010 colnr_T col; /* column to start looking for match */
8011{
8012 return rmp->regprog->engine->regexec(rmp, line, col);
8013}
8014
8015#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8016 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8017/*
8018 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
8019 */
8020 int
8021vim_regexec_nl(rmp, line, col)
8022 regmatch_T *rmp;
8023 char_u *line;
8024 colnr_T col;
8025{
8026 return rmp->regprog->engine->regexec_nl(rmp, line, col);
8027}
8028#endif
8029
8030/*
8031 * Match a regexp against multiple lines.
8032 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
8033 * Uses curbuf for line count and 'iskeyword'.
8034 *
8035 * Return zero if there is no match. Return number of lines contained in the
8036 * match otherwise.
8037 */
8038 long
8039vim_regexec_multi(rmp, win, buf, lnum, col, tm)
8040 regmmatch_T *rmp;
8041 win_T *win; /* window in which to search or NULL */
8042 buf_T *buf; /* buffer in which to search */
8043 linenr_T lnum; /* nr of line to start looking for match */
8044 colnr_T col; /* column to start looking for match */
8045 proftime_T *tm; /* timeout limit or NULL */
8046{
8047 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
8048}