blob: f0b93256630c395b2bb0169bec4494c05808f31e [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
247
Bram Moolenaar071d4272004-06-13 20:20:40 +0000248/*
249 * Magic characters have a special meaning, they don't match literally.
250 * Magic characters are negative. This separates them from literal characters
251 * (possibly multi-byte). Only ASCII characters can be Magic.
252 */
253#define Magic(x) ((int)(x) - 256)
254#define un_Magic(x) ((x) + 256)
255#define is_Magic(x) ((x) < 0)
256
257static int no_Magic __ARGS((int x));
258static int toggle_Magic __ARGS((int x));
259
260 static int
261no_Magic(x)
262 int x;
263{
264 if (is_Magic(x))
265 return un_Magic(x);
266 return x;
267}
268
269 static int
270toggle_Magic(x)
271 int x;
272{
273 if (is_Magic(x))
274 return un_Magic(x);
275 return Magic(x);
276}
277
278/*
279 * The first byte of the regexp internal "program" is actually this magic
280 * number; the start node begins in the second byte. It's used to catch the
281 * most severe mutilation of the program by the caller.
282 */
283
284#define REGMAGIC 0234
285
286/*
287 * Opcode notes:
288 *
289 * BRANCH The set of branches constituting a single choice are hooked
290 * together with their "next" pointers, since precedence prevents
291 * anything being concatenated to any individual branch. The
292 * "next" pointer of the last BRANCH in a choice points to the
293 * thing following the whole choice. This is also where the
294 * final "next" pointer of each individual branch points; each
295 * branch starts with the operand node of a BRANCH node.
296 *
297 * BACK Normal "next" pointers all implicitly point forward; BACK
298 * exists to make loop structures possible.
299 *
300 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
301 * BRANCH structures using BACK. Simple cases (one character
302 * per match) are implemented with STAR and PLUS for speed
303 * and to minimize recursive plunges.
304 *
305 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
306 * node, and defines the min and max limits to be used for that
307 * node.
308 *
309 * MOPEN,MCLOSE ...are numbered at compile time.
310 * ZOPEN,ZCLOSE ...ditto
311 */
312
313/*
314 * A node is one char of opcode followed by two chars of "next" pointer.
315 * "Next" pointers are stored as two 8-bit bytes, high order first. The
316 * value is a positive offset from the opcode of the node containing it.
317 * An operand, if any, simply follows the node. (Note that much of the
318 * code generation knows about this implicit relationship.)
319 *
320 * Using two bytes for the "next" pointer is vast overkill for most things,
321 * but allows patterns to get big without disasters.
322 */
323#define OP(p) ((int)*(p))
324#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
325#define OPERAND(p) ((p) + 3)
326/* Obtain an operand that was stored as four bytes, MSB first. */
327#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
328 + ((long)(p)[5] << 8) + (long)(p)[6])
329/* Obtain a second operand stored as four bytes. */
330#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
331/* Obtain a second single-byte operand stored after a four bytes operand. */
332#define OPERAND_CMP(p) (p)[7]
333
334/*
335 * Utility definitions.
336 */
337#define UCHARAT(p) ((int)*(char_u *)(p))
338
339/* Used for an error (down from) vim_regcomp(): give the error message, set
340 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000341#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000342#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200343#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
344#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
345#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346
347#define MAX_LIMIT (32767L << 16L)
348
349static int re_multi_type __ARGS((int));
350static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
351static char_u *cstrchr __ARGS((char_u *, int));
352
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200353#ifdef BT_REGEXP_DUMP
354static void regdump __ARGS((char_u *, bt_regprog_T *));
355#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +0000357static char_u *regprop __ARGS((char_u *));
358#endif
359
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200360static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
361static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
362static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
363static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
364
Bram Moolenaar071d4272004-06-13 20:20:40 +0000365#define NOT_MULTI 0
366#define MULTI_ONE 1
367#define MULTI_MULT 2
368/*
369 * Return NOT_MULTI if c is not a "multi" operator.
370 * Return MULTI_ONE if c is a single "multi" operator.
371 * Return MULTI_MULT if c is a multi "multi" operator.
372 */
373 static int
374re_multi_type(c)
375 int c;
376{
377 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
378 return MULTI_ONE;
379 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
380 return MULTI_MULT;
381 return NOT_MULTI;
382}
383
384/*
385 * Flags to be passed up and down.
386 */
387#define HASWIDTH 0x1 /* Known never to match null string. */
388#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
389#define SPSTART 0x4 /* Starts with * or +. */
390#define HASNL 0x8 /* Contains some \n. */
391#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
392#define WORST 0 /* Worst case. */
393
394/*
395 * When regcode is set to this value, code is not emitted and size is computed
396 * instead.
397 */
398#define JUST_CALC_SIZE ((char_u *) -1)
399
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000400static char_u *reg_prev_sub = NULL;
401
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402/*
403 * REGEXP_INRANGE contains all characters which are always special in a []
404 * range after '\'.
405 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
406 * These are:
407 * \n - New line (NL).
408 * \r - Carriage Return (CR).
409 * \t - Tab (TAB).
410 * \e - Escape (ESC).
411 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000412 * \d - Character code in decimal, eg \d123
413 * \o - Character code in octal, eg \o80
414 * \x - Character code in hex, eg \x4a
415 * \u - Multibyte character code, eg \u20ac
416 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417 */
418static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000419static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000420
421static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000422static int get_char_class __ARGS((char_u **pp));
423static int get_equi_class __ARGS((char_u **pp));
424static void reg_equi_class __ARGS((int c));
425static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426static char_u *skip_anyof __ARGS((char_u *p));
427static void init_class_tab __ARGS((void));
428
429/*
430 * Translate '\x' to its control character, except "\n", which is Magic.
431 */
432 static int
433backslash_trans(c)
434 int c;
435{
436 switch (c)
437 {
438 case 'r': return CAR;
439 case 't': return TAB;
440 case 'e': return ESC;
441 case 'b': return BS;
442 }
443 return c;
444}
445
446/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000447 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
449 * recognized. Otherwise "pp" is advanced to after the item.
450 */
451 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000452get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 char_u **pp;
454{
455 static const char *(class_names[]) =
456 {
457 "alnum:]",
458#define CLASS_ALNUM 0
459 "alpha:]",
460#define CLASS_ALPHA 1
461 "blank:]",
462#define CLASS_BLANK 2
463 "cntrl:]",
464#define CLASS_CNTRL 3
465 "digit:]",
466#define CLASS_DIGIT 4
467 "graph:]",
468#define CLASS_GRAPH 5
469 "lower:]",
470#define CLASS_LOWER 6
471 "print:]",
472#define CLASS_PRINT 7
473 "punct:]",
474#define CLASS_PUNCT 8
475 "space:]",
476#define CLASS_SPACE 9
477 "upper:]",
478#define CLASS_UPPER 10
479 "xdigit:]",
480#define CLASS_XDIGIT 11
481 "tab:]",
482#define CLASS_TAB 12
483 "return:]",
484#define CLASS_RETURN 13
485 "backspace:]",
486#define CLASS_BACKSPACE 14
487 "escape:]",
488#define CLASS_ESCAPE 15
489 };
490#define CLASS_NONE 99
491 int i;
492
493 if ((*pp)[1] == ':')
494 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000495 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
497 {
498 *pp += STRLEN(class_names[i]) + 2;
499 return i;
500 }
501 }
502 return CLASS_NONE;
503}
504
505/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506 * Specific version of character class functions.
507 * Using a table to keep this fast.
508 */
509static short class_tab[256];
510
511#define RI_DIGIT 0x01
512#define RI_HEX 0x02
513#define RI_OCTAL 0x04
514#define RI_WORD 0x08
515#define RI_HEAD 0x10
516#define RI_ALPHA 0x20
517#define RI_LOWER 0x40
518#define RI_UPPER 0x80
519#define RI_WHITE 0x100
520
521 static void
522init_class_tab()
523{
524 int i;
525 static int done = FALSE;
526
527 if (done)
528 return;
529
530 for (i = 0; i < 256; ++i)
531 {
532 if (i >= '0' && i <= '7')
533 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
534 else if (i >= '8' && i <= '9')
535 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
536 else if (i >= 'a' && i <= 'f')
537 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
538#ifdef EBCDIC
539 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
540 || (i >= 's' && i <= 'z'))
541#else
542 else if (i >= 'g' && i <= 'z')
543#endif
544 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
545 else if (i >= 'A' && i <= 'F')
546 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
547#ifdef EBCDIC
548 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
549 || (i >= 'S' && i <= 'Z'))
550#else
551 else if (i >= 'G' && i <= 'Z')
552#endif
553 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
554 else if (i == '_')
555 class_tab[i] = RI_WORD + RI_HEAD;
556 else
557 class_tab[i] = 0;
558 }
559 class_tab[' '] |= RI_WHITE;
560 class_tab['\t'] |= RI_WHITE;
561 done = TRUE;
562}
563
564#ifdef FEAT_MBYTE
565# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
566# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
567# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
568# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
569# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
570# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
571# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
572# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
573# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
574#else
575# define ri_digit(c) (class_tab[c] & RI_DIGIT)
576# define ri_hex(c) (class_tab[c] & RI_HEX)
577# define ri_octal(c) (class_tab[c] & RI_OCTAL)
578# define ri_word(c) (class_tab[c] & RI_WORD)
579# define ri_head(c) (class_tab[c] & RI_HEAD)
580# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
581# define ri_lower(c) (class_tab[c] & RI_LOWER)
582# define ri_upper(c) (class_tab[c] & RI_UPPER)
583# define ri_white(c) (class_tab[c] & RI_WHITE)
584#endif
585
586/* flags for regflags */
587#define RF_ICASE 1 /* ignore case */
588#define RF_NOICASE 2 /* don't ignore case */
589#define RF_HASNL 4 /* can match a NL */
590#define RF_ICOMBINE 8 /* ignore combining characters */
591#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
592
593/*
594 * Global work variables for vim_regcomp().
595 */
596
597static char_u *regparse; /* Input-scan pointer. */
598static int prevchr_len; /* byte length of previous char */
599static int num_complex_braces; /* Complex \{...} count */
600static int regnpar; /* () count. */
601#ifdef FEAT_SYN_HL
602static int regnzpar; /* \z() count. */
603static int re_has_z; /* \z item detected */
604#endif
605static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
606static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000607static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000608static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
609static unsigned regflags; /* RF_ flags for prog */
610static long brace_min[10]; /* Minimums for complex brace repeats */
611static long brace_max[10]; /* Maximums for complex brace repeats */
612static int brace_count[10]; /* Current counts for complex brace repeats */
613#if defined(FEAT_SYN_HL) || defined(PROTO)
614static int had_eol; /* TRUE when EOL found by vim_regcomp() */
615#endif
616static int one_exactly = FALSE; /* only do one char for EXACTLY */
617
618static int reg_magic; /* magicness of the pattern: */
619#define MAGIC_NONE 1 /* "\V" very unmagic */
620#define MAGIC_OFF 2 /* "\M" or 'magic' off */
621#define MAGIC_ON 3 /* "\m" or 'magic' */
622#define MAGIC_ALL 4 /* "\v" very magic */
623
624static int reg_string; /* matching with a string instead of a buffer
625 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000626static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000627
628/*
629 * META contains all characters that may be magic, except '^' and '$'.
630 */
631
632#ifdef EBCDIC
633static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
634#else
635/* META[] is used often enough to justify turning it into a table. */
636static char_u META_flags[] = {
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639/* % & ( ) * + . */
640 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
641/* 1 2 3 4 5 6 7 8 9 < = > ? */
642 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
643/* @ A C D F H I K L M O */
644 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
645/* P S U V W X Z [ _ */
646 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
647/* a c d f h i k l m n o */
648 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
649/* p s u v w x z { | ~ */
650 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
651};
652#endif
653
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200654static int curchr; /* currently parsed character */
655/* Previous character. Note: prevchr is sometimes -1 when we are not at the
656 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
657 * because ^ was taken to be magic -- webb */
658static int prevchr;
659static int prevprevchr; /* previous-previous character */
660static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000661
662/* arguments for reg() */
663#define REG_NOPAREN 0 /* toplevel reg() */
664#define REG_PAREN 1 /* \(\) */
665#define REG_ZPAREN 2 /* \z(\) */
666#define REG_NPAREN 3 /* \%(\) */
667
668/*
669 * Forward declarations for vim_regcomp()'s friends.
670 */
671static void initchr __ARGS((char_u *));
672static int getchr __ARGS((void));
673static void skipchr_keepstart __ARGS((void));
674static int peekchr __ARGS((void));
675static void skipchr __ARGS((void));
676static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000677static int gethexchrs __ARGS((int maxinputlen));
678static int getoctchrs __ARGS((void));
679static int getdecchrs __ARGS((void));
680static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000681static void regcomp_start __ARGS((char_u *expr, int flags));
682static char_u *reg __ARGS((int, int *));
683static char_u *regbranch __ARGS((int *flagp));
684static char_u *regconcat __ARGS((int *flagp));
685static char_u *regpiece __ARGS((int *));
686static char_u *regatom __ARGS((int *));
687static char_u *regnode __ARGS((int));
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000688#ifdef FEAT_MBYTE
689static int use_multibytecode __ARGS((int c));
690#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000691static int prog_magic_wrong __ARGS((void));
692static char_u *regnext __ARGS((char_u *));
693static void regc __ARGS((int b));
694#ifdef FEAT_MBYTE
695static void regmbc __ARGS((int c));
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200696# define REGMBC(x) regmbc(x);
697# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000698#else
699# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200700# define REGMBC(x)
701# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000702#endif
703static void reginsert __ARGS((int, char_u *));
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200704static void reginsert_nr __ARGS((int op, long val, char_u *opnd));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000705static void reginsert_limits __ARGS((int, long, long, char_u *));
706static char_u *re_put_long __ARGS((char_u *pr, long_u val));
707static int read_limits __ARGS((long *, long *));
708static void regtail __ARGS((char_u *, char_u *));
709static void regoptail __ARGS((char_u *, char_u *));
710
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200711static regengine_T bt_regengine;
712static regengine_T nfa_regengine;
713
Bram Moolenaar071d4272004-06-13 20:20:40 +0000714/*
715 * Return TRUE if compiled regular expression "prog" can match a line break.
716 */
717 int
718re_multiline(prog)
719 regprog_T *prog;
720{
721 return (prog->regflags & RF_HASNL);
722}
723
724/*
725 * Return TRUE if compiled regular expression "prog" looks before the start
726 * position (pattern contains "\@<=" or "\@<!").
727 */
728 int
729re_lookbehind(prog)
730 regprog_T *prog;
731{
732 return (prog->regflags & RF_LOOKBH);
733}
734
735/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000736 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
737 * Returns a character representing the class. Zero means that no item was
738 * recognized. Otherwise "pp" is advanced to after the item.
739 */
740 static int
741get_equi_class(pp)
742 char_u **pp;
743{
744 int c;
745 int l = 1;
746 char_u *p = *pp;
747
748 if (p[1] == '=')
749 {
750#ifdef FEAT_MBYTE
751 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000752 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000753#endif
754 if (p[l + 2] == '=' && p[l + 3] == ']')
755 {
756#ifdef FEAT_MBYTE
757 if (has_mbyte)
758 c = mb_ptr2char(p + 2);
759 else
760#endif
761 c = p[2];
762 *pp += l + 4;
763 return c;
764 }
765 }
766 return 0;
767}
768
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200769#ifdef EBCDIC
770/*
771 * Table for equivalence class "c". (IBM-1047)
772 */
773char *EQUIVAL_CLASS_C[16] = {
774 "A\x62\x63\x64\x65\x66\x67",
775 "C\x68",
776 "E\x71\x72\x73\x74",
777 "I\x75\x76\x77\x78",
778 "N\x69",
779 "O\xEB\xEC\xED\xEE\xEF",
780 "U\xFB\xFC\xFD\xFE",
781 "Y\xBA",
782 "a\x42\x43\x44\x45\x46\x47",
783 "c\x48",
784 "e\x51\x52\x53\x54",
785 "i\x55\x56\x57\x58",
786 "n\x49",
787 "o\xCB\xCC\xCD\xCE\xCF",
788 "u\xDB\xDC\xDD\xDE",
789 "y\x8D\xDF",
790};
791#endif
792
Bram Moolenaardf177f62005-02-22 08:39:57 +0000793/*
794 * Produce the bytes for equivalence class "c".
795 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200796 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000797 */
798 static void
799reg_equi_class(c)
800 int c;
801{
802#ifdef FEAT_MBYTE
803 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000804 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000805#endif
806 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200807#ifdef EBCDIC
808 int i;
809
810 /* This might be slower than switch/case below. */
811 for (i = 0; i < 16; i++)
812 {
813 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
814 {
815 char *p = EQUIVAL_CLASS_C[i];
816
817 while (*p != 0)
818 regmbc(*p++);
819 return;
820 }
821 }
822#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000823 switch (c)
824 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000825 case 'A': case '\300': case '\301': case '\302':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200826 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
827 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000828 case '\303': case '\304': case '\305':
829 regmbc('A'); regmbc('\300'); regmbc('\301');
830 regmbc('\302'); regmbc('\303'); regmbc('\304');
831 regmbc('\305');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200832 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
833 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
834 REGMBC(0x1ea2)
835 return;
836 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
837 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000838 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000839 case 'C': case '\307':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200840 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000841 regmbc('C'); regmbc('\307');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200842 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
843 REGMBC(0x10c)
844 return;
845 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
846 CASEMBC(0x1e0e) CASEMBC(0x1e10)
847 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
848 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000849 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000850 case 'E': case '\310': case '\311': case '\312': case '\313':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200851 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
852 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000853 regmbc('E'); regmbc('\310'); regmbc('\311');
854 regmbc('\312'); regmbc('\313');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200855 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
856 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
857 REGMBC(0x1ebc)
858 return;
859 case 'F': CASEMBC(0x1e1e)
860 regmbc('F'); REGMBC(0x1e1e)
861 return;
862 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
863 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
864 CASEMBC(0x1e20)
865 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
866 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
867 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
868 return;
869 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
870 CASEMBC(0x1e26) CASEMBC(0x1e28)
871 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
872 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000873 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000874 case 'I': case '\314': case '\315': case '\316': case '\317':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200875 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
876 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000877 regmbc('I'); regmbc('\314'); regmbc('\315');
878 regmbc('\316'); regmbc('\317');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200879 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
880 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
881 REGMBC(0x1ec8)
882 return;
883 case 'J': CASEMBC(0x134)
884 regmbc('J'); REGMBC(0x134)
885 return;
886 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
887 CASEMBC(0x1e34)
888 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
889 REGMBC(0x1e30) REGMBC(0x1e34)
890 return;
891 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
892 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
893 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
894 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
895 REGMBC(0x1e3a)
896 return;
897 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
898 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000899 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000900 case 'N': case '\321':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200901 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
902 CASEMBC(0x1e48)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000903 regmbc('N'); regmbc('\321');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200904 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
905 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000906 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000907 case 'O': case '\322': case '\323': case '\324': case '\325':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200908 case '\326': case '\330':
909 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
910 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000911 regmbc('O'); regmbc('\322'); regmbc('\323');
912 regmbc('\324'); regmbc('\325'); regmbc('\326');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200913 regmbc('\330');
914 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
915 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
916 REGMBC(0x1ec) REGMBC(0x1ece)
917 return;
918 case 'P': case 0x1e54: case 0x1e56:
919 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
920 return;
921 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
922 CASEMBC(0x1e58) CASEMBC(0x1e5e)
923 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
924 REGMBC(0x1e58) REGMBC(0x1e5e)
925 return;
926 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
927 CASEMBC(0x160) CASEMBC(0x1e60)
928 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
929 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
930 return;
931 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
932 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
933 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
934 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000935 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000936 case 'U': case '\331': case '\332': case '\333': case '\334':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200937 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
938 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
939 CASEMBC(0x1ee6)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000940 regmbc('U'); regmbc('\331'); regmbc('\332');
941 regmbc('\333'); regmbc('\334');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200942 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
943 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
944 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
945 return;
946 case 'V': CASEMBC(0x1e7c)
947 regmbc('V'); REGMBC(0x1e7c)
948 return;
949 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
950 CASEMBC(0x1e84) CASEMBC(0x1e86)
951 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
952 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
953 return;
954 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
955 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000956 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000957 case 'Y': case '\335':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200958 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
959 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000960 regmbc('Y'); regmbc('\335');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200961 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
962 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
963 return;
964 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
965 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
966 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
967 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
968 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000969 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000970 case 'a': case '\340': case '\341': case '\342':
971 case '\343': case '\344': case '\345':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200972 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
973 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000974 regmbc('a'); regmbc('\340'); regmbc('\341');
975 regmbc('\342'); regmbc('\343'); regmbc('\344');
976 regmbc('\345');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200977 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
978 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
979 REGMBC(0x1ea3)
980 return;
981 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
982 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000983 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000984 case 'c': case '\347':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200985 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000986 regmbc('c'); regmbc('\347');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200987 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
988 REGMBC(0x10d)
989 return;
990 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1d0b)
991 CASEMBC(0x1e11)
992 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
993 REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000994 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000995 case 'e': case '\350': case '\351': case '\352': case '\353':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200996 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
997 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000998 regmbc('e'); regmbc('\350'); regmbc('\351');
999 regmbc('\352'); regmbc('\353');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001000 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1001 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1002 REGMBC(0x1ebd)
1003 return;
1004 case 'f': CASEMBC(0x1e1f)
1005 regmbc('f'); REGMBC(0x1e1f)
1006 return;
1007 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1008 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1009 CASEMBC(0x1e21)
1010 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1011 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1012 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1013 return;
1014 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1015 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1016 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1017 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1018 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001019 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001020 case 'i': case '\354': case '\355': case '\356': case '\357':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001021 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1022 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001023 regmbc('i'); regmbc('\354'); regmbc('\355');
1024 regmbc('\356'); regmbc('\357');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001025 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1026 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1027 return;
1028 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1029 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1030 return;
1031 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1032 CASEMBC(0x1e35)
1033 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1034 REGMBC(0x1e31) REGMBC(0x1e35)
1035 return;
1036 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1037 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1038 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1039 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1040 REGMBC(0x1e3b)
1041 return;
1042 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1043 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001044 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001045 case 'n': case '\361':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001046 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1047 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001048 regmbc('n'); regmbc('\361');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001049 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1050 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001051 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001052 case 'o': case '\362': case '\363': case '\364': case '\365':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001053 case '\366': case '\370':
1054 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1055 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001056 regmbc('o'); regmbc('\362'); regmbc('\363');
1057 regmbc('\364'); regmbc('\365'); regmbc('\366');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001058 regmbc('\370');
1059 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1060 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1061 REGMBC(0x1ed) REGMBC(0x1ecf)
1062 return;
1063 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1064 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1065 return;
1066 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1067 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1068 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1069 REGMBC(0x1e59) REGMBC(0x1e5f)
1070 return;
1071 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1072 CASEMBC(0x161) CASEMBC(0x1e61)
1073 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1074 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1075 return;
1076 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1077 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1078 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1079 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001080 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001081 case 'u': case '\371': case '\372': case '\373': case '\374':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001082 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1083 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1084 CASEMBC(0x1ee7)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001085 regmbc('u'); regmbc('\371'); regmbc('\372');
1086 regmbc('\373'); regmbc('\374');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001087 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1088 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1089 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1090 return;
1091 case 'v': CASEMBC(0x1e7d)
1092 regmbc('v'); REGMBC(0x1e7d)
1093 return;
1094 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1095 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1096 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1097 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1098 REGMBC(0x1e98)
1099 return;
1100 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1101 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001102 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001103 case 'y': case '\375': case '\377':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001104 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1105 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001106 regmbc('y'); regmbc('\375'); regmbc('\377');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001107 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1108 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1109 return;
1110 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1111 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1112 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1113 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1114 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001115 return;
1116 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001117#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001118 }
1119 regmbc(c);
1120}
1121
1122/*
1123 * Check for a collating element "[.a.]". "pp" points to the '['.
1124 * Returns a character. Zero means that no item was recognized. Otherwise
1125 * "pp" is advanced to after the item.
1126 * Currently only single characters are recognized!
1127 */
1128 static int
1129get_coll_element(pp)
1130 char_u **pp;
1131{
1132 int c;
1133 int l = 1;
1134 char_u *p = *pp;
1135
1136 if (p[1] == '.')
1137 {
1138#ifdef FEAT_MBYTE
1139 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001140 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001141#endif
1142 if (p[l + 2] == '.' && p[l + 3] == ']')
1143 {
1144#ifdef FEAT_MBYTE
1145 if (has_mbyte)
1146 c = mb_ptr2char(p + 2);
1147 else
1148#endif
1149 c = p[2];
1150 *pp += l + 4;
1151 return c;
1152 }
1153 }
1154 return 0;
1155}
1156
1157
1158/*
1159 * Skip over a "[]" range.
1160 * "p" must point to the character after the '['.
1161 * The returned pointer is on the matching ']', or the terminating NUL.
1162 */
1163 static char_u *
1164skip_anyof(p)
1165 char_u *p;
1166{
1167 int cpo_lit; /* 'cpoptions' contains 'l' flag */
1168 int cpo_bsl; /* 'cpoptions' contains '\' flag */
1169#ifdef FEAT_MBYTE
1170 int l;
1171#endif
1172
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00001173 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1174 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaardf177f62005-02-22 08:39:57 +00001175
1176 if (*p == '^') /* Complement of range. */
1177 ++p;
1178 if (*p == ']' || *p == '-')
1179 ++p;
1180 while (*p != NUL && *p != ']')
1181 {
1182#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001183 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001184 p += l;
1185 else
1186#endif
1187 if (*p == '-')
1188 {
1189 ++p;
1190 if (*p != ']' && *p != NUL)
1191 mb_ptr_adv(p);
1192 }
1193 else if (*p == '\\'
1194 && !cpo_bsl
1195 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
1196 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
1197 p += 2;
1198 else if (*p == '[')
1199 {
1200 if (get_char_class(&p) == CLASS_NONE
1201 && get_equi_class(&p) == 0
1202 && get_coll_element(&p) == 0)
1203 ++p; /* It was not a class name */
1204 }
1205 else
1206 ++p;
1207 }
1208
1209 return p;
1210}
1211
1212/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001213 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001214 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001215 * Take care of characters with a backslash in front of it.
1216 * Skip strings inside [ and ].
1217 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1218 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1219 * is changed in-place.
1220 */
1221 char_u *
1222skip_regexp(startp, dirc, magic, newp)
1223 char_u *startp;
1224 int dirc;
1225 int magic;
1226 char_u **newp;
1227{
1228 int mymagic;
1229 char_u *p = startp;
1230
1231 if (magic)
1232 mymagic = MAGIC_ON;
1233 else
1234 mymagic = MAGIC_OFF;
1235
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001236 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001237 {
1238 if (p[0] == dirc) /* found end of regexp */
1239 break;
1240 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1241 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1242 {
1243 p = skip_anyof(p + 1);
1244 if (p[0] == NUL)
1245 break;
1246 }
1247 else if (p[0] == '\\' && p[1] != NUL)
1248 {
1249 if (dirc == '?' && newp != NULL && p[1] == '?')
1250 {
1251 /* change "\?" to "?", make a copy first. */
1252 if (*newp == NULL)
1253 {
1254 *newp = vim_strsave(startp);
1255 if (*newp != NULL)
1256 p = *newp + (p - startp);
1257 }
1258 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001259 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001260 else
1261 ++p;
1262 }
1263 else
1264 ++p; /* skip next character */
1265 if (*p == 'v')
1266 mymagic = MAGIC_ALL;
1267 else if (*p == 'V')
1268 mymagic = MAGIC_NONE;
1269 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001270 }
1271 return p;
1272}
1273
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001274static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
1275
Bram Moolenaar071d4272004-06-13 20:20:40 +00001276/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001277 * bt_regcomp() - compile a regular expression into internal code for the
1278 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001279 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001280 *
1281 * We can't allocate space until we know how big the compiled form will be,
1282 * but we can't compile it (and thus know how big it is) until we've got a
1283 * place to put the code. So we cheat: we compile it twice, once with code
1284 * generation turned off and size counting turned on, and once "for real".
1285 * This also means that we don't allocate space until we are sure that the
1286 * thing really will compile successfully, and we never have to move the
1287 * code and thus invalidate pointers into it. (Note that it has to be in
1288 * one piece because vim_free() must be able to free it all.)
1289 *
1290 * Whether upper/lower case is to be ignored is decided when executing the
1291 * program, it does not matter here.
1292 *
1293 * Beware that the optimization-preparation code in here knows about some
1294 * of the structure of the compiled regexp.
1295 * "re_flags": RE_MAGIC and/or RE_STRING.
1296 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001297 static regprog_T *
1298bt_regcomp(expr, re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001299 char_u *expr;
1300 int re_flags;
1301{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001302 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001303 char_u *scan;
1304 char_u *longest;
1305 int len;
1306 int flags;
1307
1308 if (expr == NULL)
1309 EMSG_RET_NULL(_(e_null));
1310
1311 init_class_tab();
1312
1313 /*
1314 * First pass: determine size, legality.
1315 */
1316 regcomp_start(expr, re_flags);
1317 regcode = JUST_CALC_SIZE;
1318 regc(REGMAGIC);
1319 if (reg(REG_NOPAREN, &flags) == NULL)
1320 return NULL;
1321
1322 /* Small enough for pointer-storage convention? */
1323#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1324 if (regsize >= 65536L - 256L)
1325 EMSG_RET_NULL(_("E339: Pattern too long"));
1326#endif
1327
1328 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001329 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001330 if (r == NULL)
1331 return NULL;
1332
1333 /*
1334 * Second pass: emit code.
1335 */
1336 regcomp_start(expr, re_flags);
1337 regcode = r->program;
1338 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001339 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001340 {
1341 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001342 if (reg_toolong)
1343 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001344 return NULL;
1345 }
1346
1347 /* Dig out information for optimizations. */
1348 r->regstart = NUL; /* Worst-case defaults. */
1349 r->reganch = 0;
1350 r->regmust = NULL;
1351 r->regmlen = 0;
1352 r->regflags = regflags;
1353 if (flags & HASNL)
1354 r->regflags |= RF_HASNL;
1355 if (flags & HASLOOKBH)
1356 r->regflags |= RF_LOOKBH;
1357#ifdef FEAT_SYN_HL
1358 /* Remember whether this pattern has any \z specials in it. */
1359 r->reghasz = re_has_z;
1360#endif
1361 scan = r->program + 1; /* First BRANCH. */
1362 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1363 {
1364 scan = OPERAND(scan);
1365
1366 /* Starting-point info. */
1367 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1368 {
1369 r->reganch++;
1370 scan = regnext(scan);
1371 }
1372
1373 if (OP(scan) == EXACTLY)
1374 {
1375#ifdef FEAT_MBYTE
1376 if (has_mbyte)
1377 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1378 else
1379#endif
1380 r->regstart = *OPERAND(scan);
1381 }
1382 else if ((OP(scan) == BOW
1383 || OP(scan) == EOW
1384 || OP(scan) == NOTHING
1385 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1386 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1387 && OP(regnext(scan)) == EXACTLY)
1388 {
1389#ifdef FEAT_MBYTE
1390 if (has_mbyte)
1391 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1392 else
1393#endif
1394 r->regstart = *OPERAND(regnext(scan));
1395 }
1396
1397 /*
1398 * If there's something expensive in the r.e., find the longest
1399 * literal string that must appear and make it the regmust. Resolve
1400 * ties in favor of later strings, since the regstart check works
1401 * with the beginning of the r.e. and avoiding duplication
1402 * strengthens checking. Not a strong reason, but sufficient in the
1403 * absence of others.
1404 */
1405 /*
1406 * When the r.e. starts with BOW, it is faster to look for a regmust
1407 * first. Used a lot for "#" and "*" commands. (Added by mool).
1408 */
1409 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1410 && !(flags & HASNL))
1411 {
1412 longest = NULL;
1413 len = 0;
1414 for (; scan != NULL; scan = regnext(scan))
1415 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1416 {
1417 longest = OPERAND(scan);
1418 len = (int)STRLEN(OPERAND(scan));
1419 }
1420 r->regmust = longest;
1421 r->regmlen = len;
1422 }
1423 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001424#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001425 regdump(expr, r);
1426#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001427 r->engine = &bt_regengine;
1428 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001429}
1430
1431/*
1432 * Setup to parse the regexp. Used once to get the length and once to do it.
1433 */
1434 static void
1435regcomp_start(expr, re_flags)
1436 char_u *expr;
1437 int re_flags; /* see vim_regcomp() */
1438{
1439 initchr(expr);
1440 if (re_flags & RE_MAGIC)
1441 reg_magic = MAGIC_ON;
1442 else
1443 reg_magic = MAGIC_OFF;
1444 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001445 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001446
1447 num_complex_braces = 0;
1448 regnpar = 1;
1449 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1450#ifdef FEAT_SYN_HL
1451 regnzpar = 1;
1452 re_has_z = 0;
1453#endif
1454 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001455 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001456 regflags = 0;
1457#if defined(FEAT_SYN_HL) || defined(PROTO)
1458 had_eol = FALSE;
1459#endif
1460}
1461
1462#if defined(FEAT_SYN_HL) || defined(PROTO)
1463/*
1464 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1465 * found. This is messy, but it works fine.
1466 */
1467 int
1468vim_regcomp_had_eol()
1469{
1470 return had_eol;
1471}
1472#endif
1473
1474/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001475 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001476 *
1477 * Caller must absorb opening parenthesis.
1478 *
1479 * Combining parenthesis handling with the base level of regular expression
1480 * is a trifle forced, but the need to tie the tails of the branches to what
1481 * follows makes it hard to avoid.
1482 */
1483 static char_u *
1484reg(paren, flagp)
1485 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1486 int *flagp;
1487{
1488 char_u *ret;
1489 char_u *br;
1490 char_u *ender;
1491 int parno = 0;
1492 int flags;
1493
1494 *flagp = HASWIDTH; /* Tentatively. */
1495
1496#ifdef FEAT_SYN_HL
1497 if (paren == REG_ZPAREN)
1498 {
1499 /* Make a ZOPEN node. */
1500 if (regnzpar >= NSUBEXP)
1501 EMSG_RET_NULL(_("E50: Too many \\z("));
1502 parno = regnzpar;
1503 regnzpar++;
1504 ret = regnode(ZOPEN + parno);
1505 }
1506 else
1507#endif
1508 if (paren == REG_PAREN)
1509 {
1510 /* Make a MOPEN node. */
1511 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001512 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001513 parno = regnpar;
1514 ++regnpar;
1515 ret = regnode(MOPEN + parno);
1516 }
1517 else if (paren == REG_NPAREN)
1518 {
1519 /* Make a NOPEN node. */
1520 ret = regnode(NOPEN);
1521 }
1522 else
1523 ret = NULL;
1524
1525 /* Pick up the branches, linking them together. */
1526 br = regbranch(&flags);
1527 if (br == NULL)
1528 return NULL;
1529 if (ret != NULL)
1530 regtail(ret, br); /* [MZ]OPEN -> first. */
1531 else
1532 ret = br;
1533 /* If one of the branches can be zero-width, the whole thing can.
1534 * If one of the branches has * at start or matches a line-break, the
1535 * whole thing can. */
1536 if (!(flags & HASWIDTH))
1537 *flagp &= ~HASWIDTH;
1538 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1539 while (peekchr() == Magic('|'))
1540 {
1541 skipchr();
1542 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001543 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001544 return NULL;
1545 regtail(ret, br); /* BRANCH -> BRANCH. */
1546 if (!(flags & HASWIDTH))
1547 *flagp &= ~HASWIDTH;
1548 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1549 }
1550
1551 /* Make a closing node, and hook it on the end. */
1552 ender = regnode(
1553#ifdef FEAT_SYN_HL
1554 paren == REG_ZPAREN ? ZCLOSE + parno :
1555#endif
1556 paren == REG_PAREN ? MCLOSE + parno :
1557 paren == REG_NPAREN ? NCLOSE : END);
1558 regtail(ret, ender);
1559
1560 /* Hook the tails of the branches to the closing node. */
1561 for (br = ret; br != NULL; br = regnext(br))
1562 regoptail(br, ender);
1563
1564 /* Check for proper termination. */
1565 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1566 {
1567#ifdef FEAT_SYN_HL
1568 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001569 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001570 else
1571#endif
1572 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001573 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001574 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001575 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001576 }
1577 else if (paren == REG_NOPAREN && peekchr() != NUL)
1578 {
1579 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001580 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001581 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001582 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001583 /* NOTREACHED */
1584 }
1585 /*
1586 * Here we set the flag allowing back references to this set of
1587 * parentheses.
1588 */
1589 if (paren == REG_PAREN)
1590 had_endbrace[parno] = TRUE; /* have seen the close paren */
1591 return ret;
1592}
1593
1594/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001595 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001596 * Implements the & operator.
1597 */
1598 static char_u *
1599regbranch(flagp)
1600 int *flagp;
1601{
1602 char_u *ret;
1603 char_u *chain = NULL;
1604 char_u *latest;
1605 int flags;
1606
1607 *flagp = WORST | HASNL; /* Tentatively. */
1608
1609 ret = regnode(BRANCH);
1610 for (;;)
1611 {
1612 latest = regconcat(&flags);
1613 if (latest == NULL)
1614 return NULL;
1615 /* If one of the branches has width, the whole thing has. If one of
1616 * the branches anchors at start-of-line, the whole thing does.
1617 * If one of the branches uses look-behind, the whole thing does. */
1618 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1619 /* If one of the branches doesn't match a line-break, the whole thing
1620 * doesn't. */
1621 *flagp &= ~HASNL | (flags & HASNL);
1622 if (chain != NULL)
1623 regtail(chain, latest);
1624 if (peekchr() != Magic('&'))
1625 break;
1626 skipchr();
1627 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001628 if (reg_toolong)
1629 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001630 reginsert(MATCH, latest);
1631 chain = latest;
1632 }
1633
1634 return ret;
1635}
1636
1637/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001638 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001639 * Implements the concatenation operator.
1640 */
1641 static char_u *
1642regconcat(flagp)
1643 int *flagp;
1644{
1645 char_u *first = NULL;
1646 char_u *chain = NULL;
1647 char_u *latest;
1648 int flags;
1649 int cont = TRUE;
1650
1651 *flagp = WORST; /* Tentatively. */
1652
1653 while (cont)
1654 {
1655 switch (peekchr())
1656 {
1657 case NUL:
1658 case Magic('|'):
1659 case Magic('&'):
1660 case Magic(')'):
1661 cont = FALSE;
1662 break;
1663 case Magic('Z'):
1664#ifdef FEAT_MBYTE
1665 regflags |= RF_ICOMBINE;
1666#endif
1667 skipchr_keepstart();
1668 break;
1669 case Magic('c'):
1670 regflags |= RF_ICASE;
1671 skipchr_keepstart();
1672 break;
1673 case Magic('C'):
1674 regflags |= RF_NOICASE;
1675 skipchr_keepstart();
1676 break;
1677 case Magic('v'):
1678 reg_magic = MAGIC_ALL;
1679 skipchr_keepstart();
1680 curchr = -1;
1681 break;
1682 case Magic('m'):
1683 reg_magic = MAGIC_ON;
1684 skipchr_keepstart();
1685 curchr = -1;
1686 break;
1687 case Magic('M'):
1688 reg_magic = MAGIC_OFF;
1689 skipchr_keepstart();
1690 curchr = -1;
1691 break;
1692 case Magic('V'):
1693 reg_magic = MAGIC_NONE;
1694 skipchr_keepstart();
1695 curchr = -1;
1696 break;
1697 default:
1698 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001699 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001700 return NULL;
1701 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1702 if (chain == NULL) /* First piece. */
1703 *flagp |= flags & SPSTART;
1704 else
1705 regtail(chain, latest);
1706 chain = latest;
1707 if (first == NULL)
1708 first = latest;
1709 break;
1710 }
1711 }
1712 if (first == NULL) /* Loop ran zero times. */
1713 first = regnode(NOTHING);
1714 return first;
1715}
1716
1717/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001718 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001719 *
1720 * Note that the branching code sequences used for = and the general cases
1721 * of * and + are somewhat optimized: they use the same NOTHING node as
1722 * both the endmarker for their branch list and the body of the last branch.
1723 * It might seem that this node could be dispensed with entirely, but the
1724 * endmarker role is not redundant.
1725 */
1726 static char_u *
1727regpiece(flagp)
1728 int *flagp;
1729{
1730 char_u *ret;
1731 int op;
1732 char_u *next;
1733 int flags;
1734 long minval;
1735 long maxval;
1736
1737 ret = regatom(&flags);
1738 if (ret == NULL)
1739 return NULL;
1740
1741 op = peekchr();
1742 if (re_multi_type(op) == NOT_MULTI)
1743 {
1744 *flagp = flags;
1745 return ret;
1746 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001747 /* default flags */
1748 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1749
1750 skipchr();
1751 switch (op)
1752 {
1753 case Magic('*'):
1754 if (flags & SIMPLE)
1755 reginsert(STAR, ret);
1756 else
1757 {
1758 /* Emit x* as (x&|), where & means "self". */
1759 reginsert(BRANCH, ret); /* Either x */
1760 regoptail(ret, regnode(BACK)); /* and loop */
1761 regoptail(ret, ret); /* back */
1762 regtail(ret, regnode(BRANCH)); /* or */
1763 regtail(ret, regnode(NOTHING)); /* null. */
1764 }
1765 break;
1766
1767 case Magic('+'):
1768 if (flags & SIMPLE)
1769 reginsert(PLUS, ret);
1770 else
1771 {
1772 /* Emit x+ as x(&|), where & means "self". */
1773 next = regnode(BRANCH); /* Either */
1774 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001775 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001776 regtail(next, regnode(BRANCH)); /* or */
1777 regtail(ret, regnode(NOTHING)); /* null. */
1778 }
1779 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1780 break;
1781
1782 case Magic('@'):
1783 {
1784 int lop = END;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001785 int nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001786
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001787 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001788 switch (no_Magic(getchr()))
1789 {
1790 case '=': lop = MATCH; break; /* \@= */
1791 case '!': lop = NOMATCH; break; /* \@! */
1792 case '>': lop = SUBPAT; break; /* \@> */
1793 case '<': switch (no_Magic(getchr()))
1794 {
1795 case '=': lop = BEHIND; break; /* \@<= */
1796 case '!': lop = NOBEHIND; break; /* \@<! */
1797 }
1798 }
1799 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001800 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001801 reg_magic == MAGIC_ALL);
1802 /* Look behind must match with behind_pos. */
1803 if (lop == BEHIND || lop == NOBEHIND)
1804 {
1805 regtail(ret, regnode(BHPOS));
1806 *flagp |= HASLOOKBH;
1807 }
1808 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001809 if (lop == BEHIND || lop == NOBEHIND)
1810 {
1811 if (nr < 0)
1812 nr = 0; /* no limit is same as zero limit */
1813 reginsert_nr(lop, nr, ret);
1814 }
1815 else
1816 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001817 break;
1818 }
1819
1820 case Magic('?'):
1821 case Magic('='):
1822 /* Emit x= as (x|) */
1823 reginsert(BRANCH, ret); /* Either x */
1824 regtail(ret, regnode(BRANCH)); /* or */
1825 next = regnode(NOTHING); /* null. */
1826 regtail(ret, next);
1827 regoptail(ret, next);
1828 break;
1829
1830 case Magic('{'):
1831 if (!read_limits(&minval, &maxval))
1832 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001833 if (flags & SIMPLE)
1834 {
1835 reginsert(BRACE_SIMPLE, ret);
1836 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1837 }
1838 else
1839 {
1840 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001841 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001842 reg_magic == MAGIC_ALL);
1843 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1844 regoptail(ret, regnode(BACK));
1845 regoptail(ret, ret);
1846 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1847 ++num_complex_braces;
1848 }
1849 if (minval > 0 && maxval > 0)
1850 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1851 break;
1852 }
1853 if (re_multi_type(peekchr()) != NOT_MULTI)
1854 {
1855 /* Can't have a multi follow a multi. */
1856 if (peekchr() == Magic('*'))
1857 sprintf((char *)IObuff, _("E61: Nested %s*"),
1858 reg_magic >= MAGIC_ON ? "" : "\\");
1859 else
1860 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1861 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1862 EMSG_RET_NULL(IObuff);
1863 }
1864
1865 return ret;
1866}
1867
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001868/* When making changes to classchars also change nfa_classcodes. */
1869static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1870static int classcodes[] = {
1871 ANY, IDENT, SIDENT, KWORD, SKWORD,
1872 FNAME, SFNAME, PRINT, SPRINT,
1873 WHITE, NWHITE, DIGIT, NDIGIT,
1874 HEX, NHEX, OCTAL, NOCTAL,
1875 WORD, NWORD, HEAD, NHEAD,
1876 ALPHA, NALPHA, LOWER, NLOWER,
1877 UPPER, NUPPER
1878};
1879
Bram Moolenaar071d4272004-06-13 20:20:40 +00001880/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001881 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001882 *
1883 * Optimization: gobbles an entire sequence of ordinary characters so that
1884 * it can turn them into a single node, which is smaller to store and
1885 * faster to run. Don't do this when one_exactly is set.
1886 */
1887 static char_u *
1888regatom(flagp)
1889 int *flagp;
1890{
1891 char_u *ret;
1892 int flags;
1893 int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001894 int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001895 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001896 char_u *p;
1897 int extra = 0;
1898
1899 *flagp = WORST; /* Tentatively. */
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00001900 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1901 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001902
1903 c = getchr();
1904 switch (c)
1905 {
1906 case Magic('^'):
1907 ret = regnode(BOL);
1908 break;
1909
1910 case Magic('$'):
1911 ret = regnode(EOL);
1912#if defined(FEAT_SYN_HL) || defined(PROTO)
1913 had_eol = TRUE;
1914#endif
1915 break;
1916
1917 case Magic('<'):
1918 ret = regnode(BOW);
1919 break;
1920
1921 case Magic('>'):
1922 ret = regnode(EOW);
1923 break;
1924
1925 case Magic('_'):
1926 c = no_Magic(getchr());
1927 if (c == '^') /* "\_^" is start-of-line */
1928 {
1929 ret = regnode(BOL);
1930 break;
1931 }
1932 if (c == '$') /* "\_$" is end-of-line */
1933 {
1934 ret = regnode(EOL);
1935#if defined(FEAT_SYN_HL) || defined(PROTO)
1936 had_eol = TRUE;
1937#endif
1938 break;
1939 }
1940
1941 extra = ADD_NL;
1942 *flagp |= HASNL;
1943
1944 /* "\_[" is character range plus newline */
1945 if (c == '[')
1946 goto collection;
1947
1948 /* "\_x" is character class plus newline */
1949 /*FALLTHROUGH*/
1950
1951 /*
1952 * Character classes.
1953 */
1954 case Magic('.'):
1955 case Magic('i'):
1956 case Magic('I'):
1957 case Magic('k'):
1958 case Magic('K'):
1959 case Magic('f'):
1960 case Magic('F'):
1961 case Magic('p'):
1962 case Magic('P'):
1963 case Magic('s'):
1964 case Magic('S'):
1965 case Magic('d'):
1966 case Magic('D'):
1967 case Magic('x'):
1968 case Magic('X'):
1969 case Magic('o'):
1970 case Magic('O'):
1971 case Magic('w'):
1972 case Magic('W'):
1973 case Magic('h'):
1974 case Magic('H'):
1975 case Magic('a'):
1976 case Magic('A'):
1977 case Magic('l'):
1978 case Magic('L'):
1979 case Magic('u'):
1980 case Magic('U'):
1981 p = vim_strchr(classchars, no_Magic(c));
1982 if (p == NULL)
1983 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00001984#ifdef FEAT_MBYTE
1985 /* When '.' is followed by a composing char ignore the dot, so that
1986 * the composing char is matched here. */
1987 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1988 {
1989 c = getchr();
1990 goto do_multibyte;
1991 }
1992#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00001993 ret = regnode(classcodes[p - classchars] + extra);
1994 *flagp |= HASWIDTH | SIMPLE;
1995 break;
1996
1997 case Magic('n'):
1998 if (reg_string)
1999 {
2000 /* In a string "\n" matches a newline character. */
2001 ret = regnode(EXACTLY);
2002 regc(NL);
2003 regc(NUL);
2004 *flagp |= HASWIDTH | SIMPLE;
2005 }
2006 else
2007 {
2008 /* In buffer text "\n" matches the end of a line. */
2009 ret = regnode(NEWL);
2010 *flagp |= HASWIDTH | HASNL;
2011 }
2012 break;
2013
2014 case Magic('('):
2015 if (one_exactly)
2016 EMSG_ONE_RET_NULL;
2017 ret = reg(REG_PAREN, &flags);
2018 if (ret == NULL)
2019 return NULL;
2020 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2021 break;
2022
2023 case NUL:
2024 case Magic('|'):
2025 case Magic('&'):
2026 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002027 if (one_exactly)
2028 EMSG_ONE_RET_NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002029 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
2030 /* NOTREACHED */
2031
2032 case Magic('='):
2033 case Magic('?'):
2034 case Magic('+'):
2035 case Magic('@'):
2036 case Magic('{'):
2037 case Magic('*'):
2038 c = no_Magic(c);
2039 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2040 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2041 ? "" : "\\", c);
2042 EMSG_RET_NULL(IObuff);
2043 /* NOTREACHED */
2044
2045 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002046 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002047 {
2048 char_u *lp;
2049
2050 ret = regnode(EXACTLY);
2051 lp = reg_prev_sub;
2052 while (*lp != NUL)
2053 regc(*lp++);
2054 regc(NUL);
2055 if (*reg_prev_sub != NUL)
2056 {
2057 *flagp |= HASWIDTH;
2058 if ((lp - reg_prev_sub) == 1)
2059 *flagp |= SIMPLE;
2060 }
2061 }
2062 else
2063 EMSG_RET_NULL(_(e_nopresub));
2064 break;
2065
2066 case Magic('1'):
2067 case Magic('2'):
2068 case Magic('3'):
2069 case Magic('4'):
2070 case Magic('5'):
2071 case Magic('6'):
2072 case Magic('7'):
2073 case Magic('8'):
2074 case Magic('9'):
2075 {
2076 int refnum;
2077
2078 refnum = c - Magic('0');
2079 /*
2080 * Check if the back reference is legal. We must have seen the
2081 * close brace.
2082 * TODO: Should also check that we don't refer to something
2083 * that is repeated (+*=): what instance of the repetition
2084 * should we match?
2085 */
2086 if (!had_endbrace[refnum])
2087 {
2088 /* Trick: check if "@<=" or "@<!" follows, in which case
2089 * the \1 can appear before the referenced match. */
2090 for (p = regparse; *p != NUL; ++p)
2091 if (p[0] == '@' && p[1] == '<'
2092 && (p[2] == '!' || p[2] == '='))
2093 break;
2094 if (*p == NUL)
2095 EMSG_RET_NULL(_("E65: Illegal back reference"));
2096 }
2097 ret = regnode(BACKREF + refnum);
2098 }
2099 break;
2100
Bram Moolenaar071d4272004-06-13 20:20:40 +00002101 case Magic('z'):
2102 {
2103 c = no_Magic(getchr());
2104 switch (c)
2105 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002106#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002107 case '(': if (reg_do_extmatch != REX_SET)
2108 EMSG_RET_NULL(_("E66: \\z( not allowed here"));
2109 if (one_exactly)
2110 EMSG_ONE_RET_NULL;
2111 ret = reg(REG_ZPAREN, &flags);
2112 if (ret == NULL)
2113 return NULL;
2114 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2115 re_has_z = REX_SET;
2116 break;
2117
2118 case '1':
2119 case '2':
2120 case '3':
2121 case '4':
2122 case '5':
2123 case '6':
2124 case '7':
2125 case '8':
2126 case '9': if (reg_do_extmatch != REX_USE)
2127 EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
2128 ret = regnode(ZREF + c - '0');
2129 re_has_z = REX_USE;
2130 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002131#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002132
2133 case 's': ret = regnode(MOPEN + 0);
2134 break;
2135
2136 case 'e': ret = regnode(MCLOSE + 0);
2137 break;
2138
2139 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2140 }
2141 }
2142 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002143
2144 case Magic('%'):
2145 {
2146 c = no_Magic(getchr());
2147 switch (c)
2148 {
2149 /* () without a back reference */
2150 case '(':
2151 if (one_exactly)
2152 EMSG_ONE_RET_NULL;
2153 ret = reg(REG_NPAREN, &flags);
2154 if (ret == NULL)
2155 return NULL;
2156 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2157 break;
2158
2159 /* Catch \%^ and \%$ regardless of where they appear in the
2160 * pattern -- regardless of whether or not it makes sense. */
2161 case '^':
2162 ret = regnode(RE_BOF);
2163 break;
2164
2165 case '$':
2166 ret = regnode(RE_EOF);
2167 break;
2168
2169 case '#':
2170 ret = regnode(CURSOR);
2171 break;
2172
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002173 case 'V':
2174 ret = regnode(RE_VISUAL);
2175 break;
2176
Bram Moolenaar071d4272004-06-13 20:20:40 +00002177 /* \%[abc]: Emit as a list of branches, all ending at the last
2178 * branch which matches nothing. */
2179 case '[':
2180 if (one_exactly) /* doesn't nest */
2181 EMSG_ONE_RET_NULL;
2182 {
2183 char_u *lastbranch;
2184 char_u *lastnode = NULL;
2185 char_u *br;
2186
2187 ret = NULL;
2188 while ((c = getchr()) != ']')
2189 {
2190 if (c == NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002191 EMSG2_RET_NULL(_("E69: Missing ] after %s%%["),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002192 reg_magic == MAGIC_ALL);
2193 br = regnode(BRANCH);
2194 if (ret == NULL)
2195 ret = br;
2196 else
2197 regtail(lastnode, br);
2198
2199 ungetchr();
2200 one_exactly = TRUE;
2201 lastnode = regatom(flagp);
2202 one_exactly = FALSE;
2203 if (lastnode == NULL)
2204 return NULL;
2205 }
2206 if (ret == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002207 EMSG2_RET_NULL(_("E70: Empty %s%%[]"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002208 reg_magic == MAGIC_ALL);
2209 lastbranch = regnode(BRANCH);
2210 br = regnode(NOTHING);
2211 if (ret != JUST_CALC_SIZE)
2212 {
2213 regtail(lastnode, br);
2214 regtail(lastbranch, br);
2215 /* connect all branches to the NOTHING
2216 * branch at the end */
2217 for (br = ret; br != lastnode; )
2218 {
2219 if (OP(br) == BRANCH)
2220 {
2221 regtail(br, lastbranch);
2222 br = OPERAND(br);
2223 }
2224 else
2225 br = regnext(br);
2226 }
2227 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002228 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002229 break;
2230 }
2231
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002232 case 'd': /* %d123 decimal */
2233 case 'o': /* %o123 octal */
2234 case 'x': /* %xab hex 2 */
2235 case 'u': /* %uabcd hex 4 */
2236 case 'U': /* %U1234abcd hex 8 */
2237 {
2238 int i;
2239
2240 switch (c)
2241 {
2242 case 'd': i = getdecchrs(); break;
2243 case 'o': i = getoctchrs(); break;
2244 case 'x': i = gethexchrs(2); break;
2245 case 'u': i = gethexchrs(4); break;
2246 case 'U': i = gethexchrs(8); break;
2247 default: i = -1; break;
2248 }
2249
2250 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002251 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002252 _("E678: Invalid character after %s%%[dxouU]"),
2253 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002254#ifdef FEAT_MBYTE
2255 if (use_multibytecode(i))
2256 ret = regnode(MULTIBYTECODE);
2257 else
2258#endif
2259 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002260 if (i == 0)
2261 regc(0x0a);
2262 else
2263#ifdef FEAT_MBYTE
2264 regmbc(i);
2265#else
2266 regc(i);
2267#endif
2268 regc(NUL);
2269 *flagp |= HASWIDTH;
2270 break;
2271 }
2272
Bram Moolenaar071d4272004-06-13 20:20:40 +00002273 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002274 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2275 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002276 {
2277 long_u n = 0;
2278 int cmp;
2279
2280 cmp = c;
2281 if (cmp == '<' || cmp == '>')
2282 c = getchr();
2283 while (VIM_ISDIGIT(c))
2284 {
2285 n = n * 10 + (c - '0');
2286 c = getchr();
2287 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002288 if (c == '\'' && n == 0)
2289 {
2290 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2291 c = getchr();
2292 ret = regnode(RE_MARK);
2293 if (ret == JUST_CALC_SIZE)
2294 regsize += 2;
2295 else
2296 {
2297 *regcode++ = c;
2298 *regcode++ = cmp;
2299 }
2300 break;
2301 }
2302 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002303 {
2304 if (c == 'l')
2305 ret = regnode(RE_LNUM);
2306 else if (c == 'c')
2307 ret = regnode(RE_COL);
2308 else
2309 ret = regnode(RE_VCOL);
2310 if (ret == JUST_CALC_SIZE)
2311 regsize += 5;
2312 else
2313 {
2314 /* put the number and the optional
2315 * comparator after the opcode */
2316 regcode = re_put_long(regcode, n);
2317 *regcode++ = cmp;
2318 }
2319 break;
2320 }
2321 }
2322
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002323 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002324 reg_magic == MAGIC_ALL);
2325 }
2326 }
2327 break;
2328
2329 case Magic('['):
2330collection:
2331 {
2332 char_u *lp;
2333
2334 /*
2335 * If there is no matching ']', we assume the '[' is a normal
2336 * character. This makes 'incsearch' and ":help [" work.
2337 */
2338 lp = skip_anyof(regparse);
2339 if (*lp == ']') /* there is a matching ']' */
2340 {
2341 int startc = -1; /* > 0 when next '-' is a range */
2342 int endc;
2343
2344 /*
2345 * In a character class, different parsing rules apply.
2346 * Not even \ is special anymore, nothing is.
2347 */
2348 if (*regparse == '^') /* Complement of range. */
2349 {
2350 ret = regnode(ANYBUT + extra);
2351 regparse++;
2352 }
2353 else
2354 ret = regnode(ANYOF + extra);
2355
2356 /* At the start ']' and '-' mean the literal character. */
2357 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002358 {
2359 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002360 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002361 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002362
2363 while (*regparse != NUL && *regparse != ']')
2364 {
2365 if (*regparse == '-')
2366 {
2367 ++regparse;
2368 /* The '-' is not used for a range at the end and
2369 * after or before a '\n'. */
2370 if (*regparse == ']' || *regparse == NUL
2371 || startc == -1
2372 || (regparse[0] == '\\' && regparse[1] == 'n'))
2373 {
2374 regc('-');
2375 startc = '-'; /* [--x] is a range */
2376 }
2377 else
2378 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002379 /* Also accept "a-[.z.]" */
2380 endc = 0;
2381 if (*regparse == '[')
2382 endc = get_coll_element(&regparse);
2383 if (endc == 0)
2384 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002385#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002386 if (has_mbyte)
2387 endc = mb_ptr2char_adv(&regparse);
2388 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002389#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002390 endc = *regparse++;
2391 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002392
2393 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002394 if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002395 endc = coll_get_char();
2396
Bram Moolenaar071d4272004-06-13 20:20:40 +00002397 if (startc > endc)
2398 EMSG_RET_NULL(_(e_invrange));
2399#ifdef FEAT_MBYTE
2400 if (has_mbyte && ((*mb_char2len)(startc) > 1
2401 || (*mb_char2len)(endc) > 1))
2402 {
2403 /* Limit to a range of 256 chars */
2404 if (endc > startc + 256)
2405 EMSG_RET_NULL(_(e_invrange));
2406 while (++startc <= endc)
2407 regmbc(startc);
2408 }
2409 else
2410#endif
2411 {
2412#ifdef EBCDIC
2413 int alpha_only = FALSE;
2414
2415 /* for alphabetical range skip the gaps
2416 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2417 if (isalpha(startc) && isalpha(endc))
2418 alpha_only = TRUE;
2419#endif
2420 while (++startc <= endc)
2421#ifdef EBCDIC
2422 if (!alpha_only || isalpha(startc))
2423#endif
2424 regc(startc);
2425 }
2426 startc = -1;
2427 }
2428 }
2429 /*
2430 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2431 * accepts "\t", "\e", etc., but only when the 'l' flag in
2432 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002433 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002434 */
2435 else if (*regparse == '\\'
Bram Moolenaardf177f62005-02-22 08:39:57 +00002436 && !cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002437 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2438 || (!cpo_lit
2439 && vim_strchr(REGEXP_ABBR,
2440 regparse[1]) != NULL)))
2441 {
2442 regparse++;
2443 if (*regparse == 'n')
2444 {
2445 /* '\n' in range: also match NL */
2446 if (ret != JUST_CALC_SIZE)
2447 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002448 /* Using \n inside [^] does not change what
2449 * matches. "[^\n]" is the same as ".". */
2450 if (*ret == ANYOF)
2451 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002452 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002453 *flagp |= HASNL;
2454 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002455 /* else: must have had a \n already */
2456 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002457 regparse++;
2458 startc = -1;
2459 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002460 else if (*regparse == 'd'
2461 || *regparse == 'o'
2462 || *regparse == 'x'
2463 || *regparse == 'u'
2464 || *regparse == 'U')
2465 {
2466 startc = coll_get_char();
2467 if (startc == 0)
2468 regc(0x0a);
2469 else
2470#ifdef FEAT_MBYTE
2471 regmbc(startc);
2472#else
2473 regc(startc);
2474#endif
2475 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002476 else
2477 {
2478 startc = backslash_trans(*regparse++);
2479 regc(startc);
2480 }
2481 }
2482 else if (*regparse == '[')
2483 {
2484 int c_class;
2485 int cu;
2486
Bram Moolenaardf177f62005-02-22 08:39:57 +00002487 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002488 startc = -1;
2489 /* Characters assumed to be 8 bits! */
2490 switch (c_class)
2491 {
2492 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002493 c_class = get_equi_class(&regparse);
2494 if (c_class != 0)
2495 {
2496 /* produce equivalence class */
2497 reg_equi_class(c_class);
2498 }
2499 else if ((c_class =
2500 get_coll_element(&regparse)) != 0)
2501 {
2502 /* produce a collating element */
2503 regmbc(c_class);
2504 }
2505 else
2506 {
2507 /* literal '[', allow [[-x] as a range */
2508 startc = *regparse++;
2509 regc(startc);
2510 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002511 break;
2512 case CLASS_ALNUM:
2513 for (cu = 1; cu <= 255; cu++)
2514 if (isalnum(cu))
2515 regc(cu);
2516 break;
2517 case CLASS_ALPHA:
2518 for (cu = 1; cu <= 255; cu++)
2519 if (isalpha(cu))
2520 regc(cu);
2521 break;
2522 case CLASS_BLANK:
2523 regc(' ');
2524 regc('\t');
2525 break;
2526 case CLASS_CNTRL:
2527 for (cu = 1; cu <= 255; cu++)
2528 if (iscntrl(cu))
2529 regc(cu);
2530 break;
2531 case CLASS_DIGIT:
2532 for (cu = 1; cu <= 255; cu++)
2533 if (VIM_ISDIGIT(cu))
2534 regc(cu);
2535 break;
2536 case CLASS_GRAPH:
2537 for (cu = 1; cu <= 255; cu++)
2538 if (isgraph(cu))
2539 regc(cu);
2540 break;
2541 case CLASS_LOWER:
2542 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002543 if (MB_ISLOWER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002544 regc(cu);
2545 break;
2546 case CLASS_PRINT:
2547 for (cu = 1; cu <= 255; cu++)
2548 if (vim_isprintc(cu))
2549 regc(cu);
2550 break;
2551 case CLASS_PUNCT:
2552 for (cu = 1; cu <= 255; cu++)
2553 if (ispunct(cu))
2554 regc(cu);
2555 break;
2556 case CLASS_SPACE:
2557 for (cu = 9; cu <= 13; cu++)
2558 regc(cu);
2559 regc(' ');
2560 break;
2561 case CLASS_UPPER:
2562 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002563 if (MB_ISUPPER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002564 regc(cu);
2565 break;
2566 case CLASS_XDIGIT:
2567 for (cu = 1; cu <= 255; cu++)
2568 if (vim_isxdigit(cu))
2569 regc(cu);
2570 break;
2571 case CLASS_TAB:
2572 regc('\t');
2573 break;
2574 case CLASS_RETURN:
2575 regc('\r');
2576 break;
2577 case CLASS_BACKSPACE:
2578 regc('\b');
2579 break;
2580 case CLASS_ESCAPE:
2581 regc('\033');
2582 break;
2583 }
2584 }
2585 else
2586 {
2587#ifdef FEAT_MBYTE
2588 if (has_mbyte)
2589 {
2590 int len;
2591
2592 /* produce a multibyte character, including any
2593 * following composing characters */
2594 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002595 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002596 if (enc_utf8 && utf_char2len(startc) != len)
2597 startc = -1; /* composing chars */
2598 while (--len >= 0)
2599 regc(*regparse++);
2600 }
2601 else
2602#endif
2603 {
2604 startc = *regparse++;
2605 regc(startc);
2606 }
2607 }
2608 }
2609 regc(NUL);
2610 prevchr_len = 1; /* last char was the ']' */
2611 if (*regparse != ']')
2612 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2613 skipchr(); /* let's be friends with the lexer again */
2614 *flagp |= HASWIDTH | SIMPLE;
2615 break;
2616 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002617 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002618 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002619 }
2620 /* FALLTHROUGH */
2621
2622 default:
2623 {
2624 int len;
2625
2626#ifdef FEAT_MBYTE
2627 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002628 * before a multi and when it's a composing char. */
2629 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002630 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002631do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002632 ret = regnode(MULTIBYTECODE);
2633 regmbc(c);
2634 *flagp |= HASWIDTH | SIMPLE;
2635 break;
2636 }
2637#endif
2638
2639 ret = regnode(EXACTLY);
2640
2641 /*
2642 * Append characters as long as:
2643 * - there is no following multi, we then need the character in
2644 * front of it as a single character operand
2645 * - not running into a Magic character
2646 * - "one_exactly" is not set
2647 * But always emit at least one character. Might be a Multi,
2648 * e.g., a "[" without matching "]".
2649 */
2650 for (len = 0; c != NUL && (len == 0
2651 || (re_multi_type(peekchr()) == NOT_MULTI
2652 && !one_exactly
2653 && !is_Magic(c))); ++len)
2654 {
2655 c = no_Magic(c);
2656#ifdef FEAT_MBYTE
2657 if (has_mbyte)
2658 {
2659 regmbc(c);
2660 if (enc_utf8)
2661 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002662 int l;
2663
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002664 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002665 for (;;)
2666 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002667 l = utf_ptr2len(regparse);
2668 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002669 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002670 regmbc(utf_ptr2char(regparse));
2671 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002672 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002673 }
2674 }
2675 else
2676#endif
2677 regc(c);
2678 c = getchr();
2679 }
2680 ungetchr();
2681
2682 regc(NUL);
2683 *flagp |= HASWIDTH;
2684 if (len == 1)
2685 *flagp |= SIMPLE;
2686 }
2687 break;
2688 }
2689
2690 return ret;
2691}
2692
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002693#ifdef FEAT_MBYTE
2694/*
2695 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2696 * character "c".
2697 */
2698 static int
2699use_multibytecode(c)
2700 int c;
2701{
2702 return has_mbyte && (*mb_char2len)(c) > 1
2703 && (re_multi_type(peekchr()) != NOT_MULTI
2704 || (enc_utf8 && utf_iscomposing(c)));
2705}
2706#endif
2707
Bram Moolenaar071d4272004-06-13 20:20:40 +00002708/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002709 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002710 * Return pointer to generated code.
2711 */
2712 static char_u *
2713regnode(op)
2714 int op;
2715{
2716 char_u *ret;
2717
2718 ret = regcode;
2719 if (ret == JUST_CALC_SIZE)
2720 regsize += 3;
2721 else
2722 {
2723 *regcode++ = op;
2724 *regcode++ = NUL; /* Null "next" pointer. */
2725 *regcode++ = NUL;
2726 }
2727 return ret;
2728}
2729
2730/*
2731 * Emit (if appropriate) a byte of code
2732 */
2733 static void
2734regc(b)
2735 int b;
2736{
2737 if (regcode == JUST_CALC_SIZE)
2738 regsize++;
2739 else
2740 *regcode++ = b;
2741}
2742
2743#ifdef FEAT_MBYTE
2744/*
2745 * Emit (if appropriate) a multi-byte character of code
2746 */
2747 static void
2748regmbc(c)
2749 int c;
2750{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002751 if (!has_mbyte && c > 0xff)
2752 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002753 if (regcode == JUST_CALC_SIZE)
2754 regsize += (*mb_char2len)(c);
2755 else
2756 regcode += (*mb_char2bytes)(c, regcode);
2757}
2758#endif
2759
2760/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002761 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002762 *
2763 * Means relocating the operand.
2764 */
2765 static void
2766reginsert(op, opnd)
2767 int op;
2768 char_u *opnd;
2769{
2770 char_u *src;
2771 char_u *dst;
2772 char_u *place;
2773
2774 if (regcode == JUST_CALC_SIZE)
2775 {
2776 regsize += 3;
2777 return;
2778 }
2779 src = regcode;
2780 regcode += 3;
2781 dst = regcode;
2782 while (src > opnd)
2783 *--dst = *--src;
2784
2785 place = opnd; /* Op node, where operand used to be. */
2786 *place++ = op;
2787 *place++ = NUL;
2788 *place = NUL;
2789}
2790
2791/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002792 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002793 * Add a number to the operator.
2794 */
2795 static void
2796reginsert_nr(op, val, opnd)
2797 int op;
2798 long val;
2799 char_u *opnd;
2800{
2801 char_u *src;
2802 char_u *dst;
2803 char_u *place;
2804
2805 if (regcode == JUST_CALC_SIZE)
2806 {
2807 regsize += 7;
2808 return;
2809 }
2810 src = regcode;
2811 regcode += 7;
2812 dst = regcode;
2813 while (src > opnd)
2814 *--dst = *--src;
2815
2816 place = opnd; /* Op node, where operand used to be. */
2817 *place++ = op;
2818 *place++ = NUL;
2819 *place++ = NUL;
2820 place = re_put_long(place, (long_u)val);
2821}
2822
2823/*
2824 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002825 * The operator has the given limit values as operands. Also set next pointer.
2826 *
2827 * Means relocating the operand.
2828 */
2829 static void
2830reginsert_limits(op, minval, maxval, opnd)
2831 int op;
2832 long minval;
2833 long maxval;
2834 char_u *opnd;
2835{
2836 char_u *src;
2837 char_u *dst;
2838 char_u *place;
2839
2840 if (regcode == JUST_CALC_SIZE)
2841 {
2842 regsize += 11;
2843 return;
2844 }
2845 src = regcode;
2846 regcode += 11;
2847 dst = regcode;
2848 while (src > opnd)
2849 *--dst = *--src;
2850
2851 place = opnd; /* Op node, where operand used to be. */
2852 *place++ = op;
2853 *place++ = NUL;
2854 *place++ = NUL;
2855 place = re_put_long(place, (long_u)minval);
2856 place = re_put_long(place, (long_u)maxval);
2857 regtail(opnd, place);
2858}
2859
2860/*
2861 * Write a long as four bytes at "p" and return pointer to the next char.
2862 */
2863 static char_u *
2864re_put_long(p, val)
2865 char_u *p;
2866 long_u val;
2867{
2868 *p++ = (char_u) ((val >> 24) & 0377);
2869 *p++ = (char_u) ((val >> 16) & 0377);
2870 *p++ = (char_u) ((val >> 8) & 0377);
2871 *p++ = (char_u) (val & 0377);
2872 return p;
2873}
2874
2875/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002876 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002877 */
2878 static void
2879regtail(p, val)
2880 char_u *p;
2881 char_u *val;
2882{
2883 char_u *scan;
2884 char_u *temp;
2885 int offset;
2886
2887 if (p == JUST_CALC_SIZE)
2888 return;
2889
2890 /* Find last node. */
2891 scan = p;
2892 for (;;)
2893 {
2894 temp = regnext(scan);
2895 if (temp == NULL)
2896 break;
2897 scan = temp;
2898 }
2899
Bram Moolenaar582fd852005-03-28 20:58:01 +00002900 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002901 offset = (int)(scan - val);
2902 else
2903 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002904 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002905 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002906 * values in too many places. */
2907 if (offset > 0xffff)
2908 reg_toolong = TRUE;
2909 else
2910 {
2911 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2912 *(scan + 2) = (char_u) (offset & 0377);
2913 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002914}
2915
2916/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002917 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002918 */
2919 static void
2920regoptail(p, val)
2921 char_u *p;
2922 char_u *val;
2923{
2924 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2925 if (p == NULL || p == JUST_CALC_SIZE
2926 || (OP(p) != BRANCH
2927 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2928 return;
2929 regtail(OPERAND(p), val);
2930}
2931
2932/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002933 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002934 */
2935
Bram Moolenaar071d4272004-06-13 20:20:40 +00002936static int at_start; /* True when on the first character */
2937static int prev_at_start; /* True when on the second character */
2938
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002939/*
2940 * Start parsing at "str".
2941 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002942 static void
2943initchr(str)
2944 char_u *str;
2945{
2946 regparse = str;
2947 prevchr_len = 0;
2948 curchr = prevprevchr = prevchr = nextchr = -1;
2949 at_start = TRUE;
2950 prev_at_start = FALSE;
2951}
2952
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002953/*
2954 * Get the next character without advancing.
2955 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002956 static int
2957peekchr()
2958{
Bram Moolenaardf177f62005-02-22 08:39:57 +00002959 static int after_slash = FALSE;
2960
Bram Moolenaar071d4272004-06-13 20:20:40 +00002961 if (curchr == -1)
2962 {
2963 switch (curchr = regparse[0])
2964 {
2965 case '.':
2966 case '[':
2967 case '~':
2968 /* magic when 'magic' is on */
2969 if (reg_magic >= MAGIC_ON)
2970 curchr = Magic(curchr);
2971 break;
2972 case '(':
2973 case ')':
2974 case '{':
2975 case '%':
2976 case '+':
2977 case '=':
2978 case '?':
2979 case '@':
2980 case '!':
2981 case '&':
2982 case '|':
2983 case '<':
2984 case '>':
2985 case '#': /* future ext. */
2986 case '"': /* future ext. */
2987 case '\'': /* future ext. */
2988 case ',': /* future ext. */
2989 case '-': /* future ext. */
2990 case ':': /* future ext. */
2991 case ';': /* future ext. */
2992 case '`': /* future ext. */
2993 case '/': /* Can't be used in / command */
2994 /* magic only after "\v" */
2995 if (reg_magic == MAGIC_ALL)
2996 curchr = Magic(curchr);
2997 break;
2998 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00002999 /* * is not magic as the very first character, eg "?*ptr", when
3000 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3001 * "\(\*" is not magic, thus must be magic if "after_slash" */
3002 if (reg_magic >= MAGIC_ON
3003 && !at_start
3004 && !(prev_at_start && prevchr == Magic('^'))
3005 && (after_slash
3006 || (prevchr != Magic('(')
3007 && prevchr != Magic('&')
3008 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003009 curchr = Magic('*');
3010 break;
3011 case '^':
3012 /* '^' is only magic as the very first character and if it's after
3013 * "\(", "\|", "\&' or "\n" */
3014 if (reg_magic >= MAGIC_OFF
3015 && (at_start
3016 || reg_magic == MAGIC_ALL
3017 || prevchr == Magic('(')
3018 || prevchr == Magic('|')
3019 || prevchr == Magic('&')
3020 || prevchr == Magic('n')
3021 || (no_Magic(prevchr) == '('
3022 && prevprevchr == Magic('%'))))
3023 {
3024 curchr = Magic('^');
3025 at_start = TRUE;
3026 prev_at_start = FALSE;
3027 }
3028 break;
3029 case '$':
3030 /* '$' is only magic as the very last char and if it's in front of
3031 * either "\|", "\)", "\&", or "\n" */
3032 if (reg_magic >= MAGIC_OFF)
3033 {
3034 char_u *p = regparse + 1;
3035
3036 /* ignore \c \C \m and \M after '$' */
3037 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
3038 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
3039 p += 2;
3040 if (p[0] == NUL
3041 || (p[0] == '\\'
3042 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3043 || p[1] == 'n'))
3044 || reg_magic == MAGIC_ALL)
3045 curchr = Magic('$');
3046 }
3047 break;
3048 case '\\':
3049 {
3050 int c = regparse[1];
3051
3052 if (c == NUL)
3053 curchr = '\\'; /* trailing '\' */
3054 else if (
3055#ifdef EBCDIC
3056 vim_strchr(META, c)
3057#else
3058 c <= '~' && META_flags[c]
3059#endif
3060 )
3061 {
3062 /*
3063 * META contains everything that may be magic sometimes,
3064 * except ^ and $ ("\^" and "\$" are only magic after
3065 * "\v"). We now fetch the next character and toggle its
3066 * magicness. Therefore, \ is so meta-magic that it is
3067 * not in META.
3068 */
3069 curchr = -1;
3070 prev_at_start = at_start;
3071 at_start = FALSE; /* be able to say "/\*ptr" */
3072 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003073 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003074 peekchr();
3075 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003076 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003077 curchr = toggle_Magic(curchr);
3078 }
3079 else if (vim_strchr(REGEXP_ABBR, c))
3080 {
3081 /*
3082 * Handle abbreviations, like "\t" for TAB -- webb
3083 */
3084 curchr = backslash_trans(c);
3085 }
3086 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3087 curchr = toggle_Magic(c);
3088 else
3089 {
3090 /*
3091 * Next character can never be (made) magic?
3092 * Then backslashing it won't do anything.
3093 */
3094#ifdef FEAT_MBYTE
3095 if (has_mbyte)
3096 curchr = (*mb_ptr2char)(regparse + 1);
3097 else
3098#endif
3099 curchr = c;
3100 }
3101 break;
3102 }
3103
3104#ifdef FEAT_MBYTE
3105 default:
3106 if (has_mbyte)
3107 curchr = (*mb_ptr2char)(regparse);
3108#endif
3109 }
3110 }
3111
3112 return curchr;
3113}
3114
3115/*
3116 * Eat one lexed character. Do this in a way that we can undo it.
3117 */
3118 static void
3119skipchr()
3120{
3121 /* peekchr() eats a backslash, do the same here */
3122 if (*regparse == '\\')
3123 prevchr_len = 1;
3124 else
3125 prevchr_len = 0;
3126 if (regparse[prevchr_len] != NUL)
3127 {
3128#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003129 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003130 /* exclude composing chars that mb_ptr2len does include */
3131 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003132 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003133 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003134 else
3135#endif
3136 ++prevchr_len;
3137 }
3138 regparse += prevchr_len;
3139 prev_at_start = at_start;
3140 at_start = FALSE;
3141 prevprevchr = prevchr;
3142 prevchr = curchr;
3143 curchr = nextchr; /* use previously unget char, or -1 */
3144 nextchr = -1;
3145}
3146
3147/*
3148 * Skip a character while keeping the value of prev_at_start for at_start.
3149 * prevchr and prevprevchr are also kept.
3150 */
3151 static void
3152skipchr_keepstart()
3153{
3154 int as = prev_at_start;
3155 int pr = prevchr;
3156 int prpr = prevprevchr;
3157
3158 skipchr();
3159 at_start = as;
3160 prevchr = pr;
3161 prevprevchr = prpr;
3162}
3163
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003164/*
3165 * Get the next character from the pattern. We know about magic and such, so
3166 * therefore we need a lexical analyzer.
3167 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003168 static int
3169getchr()
3170{
3171 int chr = peekchr();
3172
3173 skipchr();
3174 return chr;
3175}
3176
3177/*
3178 * put character back. Works only once!
3179 */
3180 static void
3181ungetchr()
3182{
3183 nextchr = curchr;
3184 curchr = prevchr;
3185 prevchr = prevprevchr;
3186 at_start = prev_at_start;
3187 prev_at_start = FALSE;
3188
3189 /* Backup regparse, so that it's at the same position as before the
3190 * getchr(). */
3191 regparse -= prevchr_len;
3192}
3193
3194/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003195 * Get and return the value of the hex string at the current position.
3196 * Return -1 if there is no valid hex number.
3197 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003198 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003199 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003200 * The parameter controls the maximum number of input characters. This will be
3201 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3202 */
3203 static int
3204gethexchrs(maxinputlen)
3205 int maxinputlen;
3206{
3207 int nr = 0;
3208 int c;
3209 int i;
3210
3211 for (i = 0; i < maxinputlen; ++i)
3212 {
3213 c = regparse[0];
3214 if (!vim_isxdigit(c))
3215 break;
3216 nr <<= 4;
3217 nr |= hex2nr(c);
3218 ++regparse;
3219 }
3220
3221 if (i == 0)
3222 return -1;
3223 return nr;
3224}
3225
3226/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003227 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003228 * current position. Return -1 for invalid. Consumes all digits.
3229 */
3230 static int
3231getdecchrs()
3232{
3233 int nr = 0;
3234 int c;
3235 int i;
3236
3237 for (i = 0; ; ++i)
3238 {
3239 c = regparse[0];
3240 if (c < '0' || c > '9')
3241 break;
3242 nr *= 10;
3243 nr += c - '0';
3244 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003245 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003246 }
3247
3248 if (i == 0)
3249 return -1;
3250 return nr;
3251}
3252
3253/*
3254 * get and return the value of the octal string immediately after the current
3255 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3256 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3257 * treat 8 or 9 as recognised characters. Position is updated:
3258 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003259 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003260 */
3261 static int
3262getoctchrs()
3263{
3264 int nr = 0;
3265 int c;
3266 int i;
3267
3268 for (i = 0; i < 3 && nr < 040; ++i)
3269 {
3270 c = regparse[0];
3271 if (c < '0' || c > '7')
3272 break;
3273 nr <<= 3;
3274 nr |= hex2nr(c);
3275 ++regparse;
3276 }
3277
3278 if (i == 0)
3279 return -1;
3280 return nr;
3281}
3282
3283/*
3284 * Get a number after a backslash that is inside [].
3285 * When nothing is recognized return a backslash.
3286 */
3287 static int
3288coll_get_char()
3289{
3290 int nr = -1;
3291
3292 switch (*regparse++)
3293 {
3294 case 'd': nr = getdecchrs(); break;
3295 case 'o': nr = getoctchrs(); break;
3296 case 'x': nr = gethexchrs(2); break;
3297 case 'u': nr = gethexchrs(4); break;
3298 case 'U': nr = gethexchrs(8); break;
3299 }
3300 if (nr < 0)
3301 {
3302 /* If getting the number fails be backwards compatible: the character
3303 * is a backslash. */
3304 --regparse;
3305 nr = '\\';
3306 }
3307 return nr;
3308}
3309
3310/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003311 * read_limits - Read two integers to be taken as a minimum and maximum.
3312 * If the first character is '-', then the range is reversed.
3313 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3314 * missing, a very big number is the default.
3315 */
3316 static int
3317read_limits(minval, maxval)
3318 long *minval;
3319 long *maxval;
3320{
3321 int reverse = FALSE;
3322 char_u *first_char;
3323 long tmp;
3324
3325 if (*regparse == '-')
3326 {
3327 /* Starts with '-', so reverse the range later */
3328 regparse++;
3329 reverse = TRUE;
3330 }
3331 first_char = regparse;
3332 *minval = getdigits(&regparse);
3333 if (*regparse == ',') /* There is a comma */
3334 {
3335 if (vim_isdigit(*++regparse))
3336 *maxval = getdigits(&regparse);
3337 else
3338 *maxval = MAX_LIMIT;
3339 }
3340 else if (VIM_ISDIGIT(*first_char))
3341 *maxval = *minval; /* It was \{n} or \{-n} */
3342 else
3343 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3344 if (*regparse == '\\')
3345 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003346 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003347 {
3348 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3349 reg_magic == MAGIC_ALL ? "" : "\\");
3350 EMSG_RET_FAIL(IObuff);
3351 }
3352
3353 /*
3354 * Reverse the range if there was a '-', or make sure it is in the right
3355 * order otherwise.
3356 */
3357 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3358 {
3359 tmp = *minval;
3360 *minval = *maxval;
3361 *maxval = tmp;
3362 }
3363 skipchr(); /* let's be friends with the lexer again */
3364 return OK;
3365}
3366
3367/*
3368 * vim_regexec and friends
3369 */
3370
3371/*
3372 * Global work variables for vim_regexec().
3373 */
3374
3375/* The current match-position is remembered with these variables: */
3376static linenr_T reglnum; /* line number, relative to first line */
3377static char_u *regline; /* start of current line */
3378static char_u *reginput; /* current input, points into "regline" */
3379
3380static int need_clear_subexpr; /* subexpressions still need to be
3381 * cleared */
3382#ifdef FEAT_SYN_HL
3383static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3384 * still need to be cleared */
3385#endif
3386
Bram Moolenaar071d4272004-06-13 20:20:40 +00003387/*
3388 * Structure used to save the current input state, when it needs to be
3389 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003390 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003391 */
3392typedef struct
3393{
3394 union
3395 {
3396 char_u *ptr; /* reginput pointer, for single-line regexp */
3397 lpos_T pos; /* reginput pos, for multi-line regexp */
3398 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003399 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003400} regsave_T;
3401
3402/* struct to save start/end pointer/position in for \(\) */
3403typedef struct
3404{
3405 union
3406 {
3407 char_u *ptr;
3408 lpos_T pos;
3409 } se_u;
3410} save_se_T;
3411
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003412/* used for BEHIND and NOBEHIND matching */
3413typedef struct regbehind_S
3414{
3415 regsave_T save_after;
3416 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003417 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003418 save_se_T save_start[NSUBEXP];
3419 save_se_T save_end[NSUBEXP];
3420} regbehind_T;
3421
Bram Moolenaar071d4272004-06-13 20:20:40 +00003422static char_u *reg_getline __ARGS((linenr_T lnum));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003423static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3424static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003425static void cleanup_subexpr __ARGS((void));
3426#ifdef FEAT_SYN_HL
3427static void cleanup_zsubexpr __ARGS((void));
3428#endif
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003429static void save_subexpr __ARGS((regbehind_T *bp));
3430static void restore_subexpr __ARGS((regbehind_T *bp));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003431static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003432static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3433static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003434static int reg_save_equal __ARGS((regsave_T *save));
3435static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3436static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3437
3438/* Save the sub-expressions before attempting a match. */
3439#define save_se(savep, posp, pp) \
3440 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3441
3442/* After a failed match restore the sub-expressions. */
3443#define restore_se(savep, posp, pp) { \
3444 if (REG_MULTI) \
3445 *(posp) = (savep)->se_u.pos; \
3446 else \
3447 *(pp) = (savep)->se_u.ptr; }
3448
3449static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003450static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003451static int regrepeat __ARGS((char_u *p, long maxcount));
3452
3453#ifdef DEBUG
3454int regnarrate = 0;
3455#endif
3456
3457/*
3458 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3459 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3460 * contains '\c' or '\C' the value is overruled.
3461 */
3462static int ireg_ic;
3463
3464#ifdef FEAT_MBYTE
3465/*
3466 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3467 * in the regexp. Defaults to false, always.
3468 */
3469static int ireg_icombine;
3470#endif
3471
3472/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003473 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3474 * there is no maximum.
3475 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003476static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003477
3478/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003479 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3480 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003481 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003482 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003483static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003484static unsigned reg_tofreelen;
3485
3486/*
3487 * These variables are set when executing a regexp to speed up the execution.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003488 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003489 * done:
3490 * single-line multi-line
3491 * reg_match &regmatch_T NULL
3492 * reg_mmatch NULL &regmmatch_T
3493 * reg_startp reg_match->startp <invalid>
3494 * reg_endp reg_match->endp <invalid>
3495 * reg_startpos <invalid> reg_mmatch->startpos
3496 * reg_endpos <invalid> reg_mmatch->endpos
3497 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003498 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003499 * reg_firstlnum <invalid> first line in which to search
3500 * reg_maxline 0 last line nr
3501 * reg_line_lbr FALSE or TRUE FALSE
3502 */
3503static regmatch_T *reg_match;
3504static regmmatch_T *reg_mmatch;
3505static char_u **reg_startp = NULL;
3506static char_u **reg_endp = NULL;
3507static lpos_T *reg_startpos = NULL;
3508static lpos_T *reg_endpos = NULL;
3509static win_T *reg_win;
3510static buf_T *reg_buf;
3511static linenr_T reg_firstlnum;
3512static linenr_T reg_maxline;
3513static int reg_line_lbr; /* "\n" in string is line break */
3514
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003515/* Values for rs_state in regitem_T. */
3516typedef enum regstate_E
3517{
3518 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3519 , RS_MOPEN /* MOPEN + [0-9] */
3520 , RS_MCLOSE /* MCLOSE + [0-9] */
3521#ifdef FEAT_SYN_HL
3522 , RS_ZOPEN /* ZOPEN + [0-9] */
3523 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3524#endif
3525 , RS_BRANCH /* BRANCH */
3526 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3527 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3528 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3529 , RS_NOMATCH /* NOMATCH */
3530 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3531 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3532 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3533 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3534} regstate_T;
3535
3536/*
3537 * When there are alternatives a regstate_T is put on the regstack to remember
3538 * what we are doing.
3539 * Before it may be another type of item, depending on rs_state, to remember
3540 * more things.
3541 */
3542typedef struct regitem_S
3543{
3544 regstate_T rs_state; /* what we are doing, one of RS_ above */
3545 char_u *rs_scan; /* current node in program */
3546 union
3547 {
3548 save_se_T sesave;
3549 regsave_T regsave;
3550 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003551 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003552} regitem_T;
3553
3554static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3555static void regstack_pop __ARGS((char_u **scan));
3556
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003557/* used for STAR, PLUS and BRACE_SIMPLE matching */
3558typedef struct regstar_S
3559{
3560 int nextb; /* next byte */
3561 int nextb_ic; /* next byte reverse case */
3562 long count;
3563 long minval;
3564 long maxval;
3565} regstar_T;
3566
3567/* used to store input position when a BACK was encountered, so that we now if
3568 * we made any progress since the last time. */
3569typedef struct backpos_S
3570{
3571 char_u *bp_scan; /* "scan" where BACK was encountered */
3572 regsave_T bp_pos; /* last input position */
3573} backpos_T;
3574
3575/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003576 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3577 * to avoid invoking malloc() and free() often.
3578 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3579 * or regbehind_T.
3580 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003581 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003582static garray_T regstack = {0, 0, 0, 0, NULL};
3583static garray_T backpos = {0, 0, 0, 0, NULL};
3584
3585/*
3586 * Both for regstack and backpos tables we use the following strategy of
3587 * allocation (to reduce malloc/free calls):
3588 * - Initial size is fairly small.
3589 * - When needed, the tables are grown bigger (8 times at first, double after
3590 * that).
3591 * - After executing the match we free the memory only if the array has grown.
3592 * Thus the memory is kept allocated when it's at the initial size.
3593 * This makes it fast while not keeping a lot of memory allocated.
3594 * A three times speed increase was observed when using many simple patterns.
3595 */
3596#define REGSTACK_INITIAL 2048
3597#define BACKPOS_INITIAL 64
3598
3599#if defined(EXITFREE) || defined(PROTO)
3600 void
3601free_regexp_stuff()
3602{
3603 ga_clear(&regstack);
3604 ga_clear(&backpos);
3605 vim_free(reg_tofree);
3606 vim_free(reg_prev_sub);
3607}
3608#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003609
Bram Moolenaar071d4272004-06-13 20:20:40 +00003610/*
3611 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3612 */
3613 static char_u *
3614reg_getline(lnum)
3615 linenr_T lnum;
3616{
3617 /* when looking behind for a match/no-match lnum is negative. But we
3618 * can't go before line 1 */
3619 if (reg_firstlnum + lnum < 1)
3620 return NULL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00003621 if (lnum > reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003622 /* Must have matched the "\n" in the last line. */
3623 return (char_u *)"";
Bram Moolenaar071d4272004-06-13 20:20:40 +00003624 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3625}
3626
3627static regsave_T behind_pos;
3628
3629#ifdef FEAT_SYN_HL
3630static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3631static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3632static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3633static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3634#endif
3635
3636/* TRUE if using multi-line regexp. */
3637#define REG_MULTI (reg_match == NULL)
3638
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003639static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
3640
Bram Moolenaar071d4272004-06-13 20:20:40 +00003641/*
3642 * Match a regexp against a string.
3643 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3644 * Uses curbuf for line count and 'iskeyword'.
3645 *
3646 * Return TRUE if there is a match, FALSE if not.
3647 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003648 static int
3649bt_regexec(rmp, line, col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003650 regmatch_T *rmp;
3651 char_u *line; /* string to match against */
3652 colnr_T col; /* column to start looking for match */
3653{
3654 reg_match = rmp;
3655 reg_mmatch = NULL;
3656 reg_maxline = 0;
3657 reg_line_lbr = FALSE;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003658 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003659 reg_win = NULL;
3660 ireg_ic = rmp->rm_ic;
3661#ifdef FEAT_MBYTE
3662 ireg_icombine = FALSE;
3663#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003664 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003665 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003666}
3667
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003668#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3669 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003670
3671static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
3672
Bram Moolenaar071d4272004-06-13 20:20:40 +00003673/*
3674 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3675 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003676 static int
3677bt_regexec_nl(rmp, line, col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003678 regmatch_T *rmp;
3679 char_u *line; /* string to match against */
3680 colnr_T col; /* column to start looking for match */
3681{
3682 reg_match = rmp;
3683 reg_mmatch = NULL;
3684 reg_maxline = 0;
3685 reg_line_lbr = TRUE;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003686 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003687 reg_win = NULL;
3688 ireg_ic = rmp->rm_ic;
3689#ifdef FEAT_MBYTE
3690 ireg_icombine = FALSE;
3691#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003692 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003693 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003694}
3695#endif
3696
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003697static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
3698
Bram Moolenaar071d4272004-06-13 20:20:40 +00003699/*
3700 * Match a regexp against multiple lines.
3701 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3702 * Uses curbuf for line count and 'iskeyword'.
3703 *
3704 * Return zero if there is no match. Return number of lines contained in the
3705 * match otherwise.
3706 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003707 static long
3708bt_regexec_multi(rmp, win, buf, lnum, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003709 regmmatch_T *rmp;
3710 win_T *win; /* window in which to search or NULL */
3711 buf_T *buf; /* buffer in which to search */
3712 linenr_T lnum; /* nr of line to start looking for match */
3713 colnr_T col; /* column to start looking for match */
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003714 proftime_T *tm; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003715{
3716 long r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003717
3718 reg_match = NULL;
3719 reg_mmatch = rmp;
3720 reg_buf = buf;
3721 reg_win = win;
3722 reg_firstlnum = lnum;
3723 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3724 reg_line_lbr = FALSE;
3725 ireg_ic = rmp->rmm_ic;
3726#ifdef FEAT_MBYTE
3727 ireg_icombine = FALSE;
3728#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003729 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003730
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003731 r = bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003732
3733 return r;
3734}
3735
3736/*
3737 * Match a regexp against a string ("line" points to the string) or multiple
3738 * lines ("line" is NULL, use reg_getline()).
3739 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003740 static long
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003741bt_regexec_both(line, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003742 char_u *line;
3743 colnr_T col; /* column to start looking for match */
Bram Moolenaar78a15312009-05-15 19:33:18 +00003744 proftime_T *tm UNUSED; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003745{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003746 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003747 char_u *s;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003748 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003749
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003750 /* Create "regstack" and "backpos" if they are not allocated yet.
3751 * We allocate *_INITIAL amount of bytes first and then set the grow size
3752 * to much bigger value to avoid many malloc calls in case of deep regular
3753 * expressions. */
3754 if (regstack.ga_data == NULL)
3755 {
3756 /* Use an item size of 1 byte, since we push different things
3757 * onto the regstack. */
3758 ga_init2(&regstack, 1, REGSTACK_INITIAL);
3759 ga_grow(&regstack, REGSTACK_INITIAL);
3760 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3761 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003762
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003763 if (backpos.ga_data == NULL)
3764 {
3765 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3766 ga_grow(&backpos, BACKPOS_INITIAL);
3767 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3768 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003769
Bram Moolenaar071d4272004-06-13 20:20:40 +00003770 if (REG_MULTI)
3771 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003772 prog = (bt_regprog_T *)reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003773 line = reg_getline((linenr_T)0);
3774 reg_startpos = reg_mmatch->startpos;
3775 reg_endpos = reg_mmatch->endpos;
3776 }
3777 else
3778 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003779 prog = (bt_regprog_T *)reg_match->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003780 reg_startp = reg_match->startp;
3781 reg_endp = reg_match->endp;
3782 }
3783
3784 /* Be paranoid... */
3785 if (prog == NULL || line == NULL)
3786 {
3787 EMSG(_(e_null));
3788 goto theend;
3789 }
3790
3791 /* Check validity of program. */
3792 if (prog_magic_wrong())
3793 goto theend;
3794
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003795 /* If the start column is past the maximum column: no need to try. */
3796 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3797 goto theend;
3798
Bram Moolenaar071d4272004-06-13 20:20:40 +00003799 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3800 if (prog->regflags & RF_ICASE)
3801 ireg_ic = TRUE;
3802 else if (prog->regflags & RF_NOICASE)
3803 ireg_ic = FALSE;
3804
3805#ifdef FEAT_MBYTE
3806 /* If pattern contains "\Z" overrule value of ireg_icombine */
3807 if (prog->regflags & RF_ICOMBINE)
3808 ireg_icombine = TRUE;
3809#endif
3810
3811 /* If there is a "must appear" string, look for it. */
3812 if (prog->regmust != NULL)
3813 {
3814 int c;
3815
3816#ifdef FEAT_MBYTE
3817 if (has_mbyte)
3818 c = (*mb_ptr2char)(prog->regmust);
3819 else
3820#endif
3821 c = *prog->regmust;
3822 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003823
3824 /*
3825 * This is used very often, esp. for ":global". Use three versions of
3826 * the loop to avoid overhead of conditions.
3827 */
3828 if (!ireg_ic
3829#ifdef FEAT_MBYTE
3830 && !has_mbyte
3831#endif
3832 )
3833 while ((s = vim_strbyte(s, c)) != NULL)
3834 {
3835 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3836 break; /* Found it. */
3837 ++s;
3838 }
3839#ifdef FEAT_MBYTE
3840 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3841 while ((s = vim_strchr(s, c)) != NULL)
3842 {
3843 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3844 break; /* Found it. */
3845 mb_ptr_adv(s);
3846 }
3847#endif
3848 else
3849 while ((s = cstrchr(s, c)) != NULL)
3850 {
3851 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3852 break; /* Found it. */
3853 mb_ptr_adv(s);
3854 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003855 if (s == NULL) /* Not present. */
3856 goto theend;
3857 }
3858
3859 regline = line;
3860 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003861 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003862
3863 /* Simplest case: Anchored match need be tried only once. */
3864 if (prog->reganch)
3865 {
3866 int c;
3867
3868#ifdef FEAT_MBYTE
3869 if (has_mbyte)
3870 c = (*mb_ptr2char)(regline + col);
3871 else
3872#endif
3873 c = regline[col];
3874 if (prog->regstart == NUL
3875 || prog->regstart == c
3876 || (ireg_ic && ((
3877#ifdef FEAT_MBYTE
3878 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3879 || (c < 255 && prog->regstart < 255 &&
3880#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003881 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003882 retval = regtry(prog, col);
3883 else
3884 retval = 0;
3885 }
3886 else
3887 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003888#ifdef FEAT_RELTIME
3889 int tm_count = 0;
3890#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003891 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003892 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003893 {
3894 if (prog->regstart != NUL)
3895 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003896 /* Skip until the char we know it must start with.
3897 * Used often, do some work to avoid call overhead. */
3898 if (!ireg_ic
3899#ifdef FEAT_MBYTE
3900 && !has_mbyte
3901#endif
3902 )
3903 s = vim_strbyte(regline + col, prog->regstart);
3904 else
3905 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003906 if (s == NULL)
3907 {
3908 retval = 0;
3909 break;
3910 }
3911 col = (int)(s - regline);
3912 }
3913
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003914 /* Check for maximum column to try. */
3915 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3916 {
3917 retval = 0;
3918 break;
3919 }
3920
Bram Moolenaar071d4272004-06-13 20:20:40 +00003921 retval = regtry(prog, col);
3922 if (retval > 0)
3923 break;
3924
3925 /* if not currently on the first line, get it again */
3926 if (reglnum != 0)
3927 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003928 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003929 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003930 }
3931 if (regline[col] == NUL)
3932 break;
3933#ifdef FEAT_MBYTE
3934 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003935 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003936 else
3937#endif
3938 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003939#ifdef FEAT_RELTIME
3940 /* Check for timeout once in a twenty times to avoid overhead. */
3941 if (tm != NULL && ++tm_count == 20)
3942 {
3943 tm_count = 0;
3944 if (profile_passed_limit(tm))
3945 break;
3946 }
3947#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003948 }
3949 }
3950
Bram Moolenaar071d4272004-06-13 20:20:40 +00003951theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003952 /* Free "reg_tofree" when it's a bit big.
3953 * Free regstack and backpos if they are bigger than their initial size. */
3954 if (reg_tofreelen > 400)
3955 {
3956 vim_free(reg_tofree);
3957 reg_tofree = NULL;
3958 }
3959 if (regstack.ga_maxlen > REGSTACK_INITIAL)
3960 ga_clear(&regstack);
3961 if (backpos.ga_maxlen > BACKPOS_INITIAL)
3962 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003963
Bram Moolenaar071d4272004-06-13 20:20:40 +00003964 return retval;
3965}
3966
3967#ifdef FEAT_SYN_HL
3968static reg_extmatch_T *make_extmatch __ARGS((void));
3969
3970/*
3971 * Create a new extmatch and mark it as referenced once.
3972 */
3973 static reg_extmatch_T *
3974make_extmatch()
3975{
3976 reg_extmatch_T *em;
3977
3978 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3979 if (em != NULL)
3980 em->refcnt = 1;
3981 return em;
3982}
3983
3984/*
3985 * Add a reference to an extmatch.
3986 */
3987 reg_extmatch_T *
3988ref_extmatch(em)
3989 reg_extmatch_T *em;
3990{
3991 if (em != NULL)
3992 em->refcnt++;
3993 return em;
3994}
3995
3996/*
3997 * Remove a reference to an extmatch. If there are no references left, free
3998 * the info.
3999 */
4000 void
4001unref_extmatch(em)
4002 reg_extmatch_T *em;
4003{
4004 int i;
4005
4006 if (em != NULL && --em->refcnt <= 0)
4007 {
4008 for (i = 0; i < NSUBEXP; ++i)
4009 vim_free(em->matches[i]);
4010 vim_free(em);
4011 }
4012}
4013#endif
4014
4015/*
4016 * regtry - try match of "prog" with at regline["col"].
4017 * Returns 0 for failure, number of lines contained in the match otherwise.
4018 */
4019 static long
4020regtry(prog, col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004021 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004022 colnr_T col;
4023{
4024 reginput = regline + col;
4025 need_clear_subexpr = TRUE;
4026#ifdef FEAT_SYN_HL
4027 /* Clear the external match subpointers if necessary. */
4028 if (prog->reghasz == REX_SET)
4029 need_clear_zsubexpr = TRUE;
4030#endif
4031
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004032 if (regmatch(prog->program + 1) == 0)
4033 return 0;
4034
4035 cleanup_subexpr();
4036 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004037 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004038 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004039 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004040 reg_startpos[0].lnum = 0;
4041 reg_startpos[0].col = col;
4042 }
4043 if (reg_endpos[0].lnum < 0)
4044 {
4045 reg_endpos[0].lnum = reglnum;
4046 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004047 }
4048 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004049 /* Use line number of "\ze". */
4050 reglnum = reg_endpos[0].lnum;
4051 }
4052 else
4053 {
4054 if (reg_startp[0] == NULL)
4055 reg_startp[0] = regline + col;
4056 if (reg_endp[0] == NULL)
4057 reg_endp[0] = reginput;
4058 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004059#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004060 /* Package any found \z(...\) matches for export. Default is none. */
4061 unref_extmatch(re_extmatch_out);
4062 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004063
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004064 if (prog->reghasz == REX_SET)
4065 {
4066 int i;
4067
4068 cleanup_zsubexpr();
4069 re_extmatch_out = make_extmatch();
4070 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004071 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004072 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004073 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004074 /* Only accept single line matches. */
4075 if (reg_startzpos[i].lnum >= 0
4076 && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
4077 re_extmatch_out->matches[i] =
4078 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004079 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004080 reg_endzpos[i].col - reg_startzpos[i].col);
4081 }
4082 else
4083 {
4084 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4085 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004086 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004087 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004088 }
4089 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004090 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004091#endif
4092 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004093}
4094
4095#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00004096static int reg_prev_class __ARGS((void));
4097
Bram Moolenaar071d4272004-06-13 20:20:40 +00004098/*
4099 * Get class of previous character.
4100 */
4101 static int
4102reg_prev_class()
4103{
4104 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004105 return mb_get_class_buf(reginput - 1
4106 - (*mb_head_off)(regline, reginput - 1), reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004107 return -1;
4108}
4109
Bram Moolenaar071d4272004-06-13 20:20:40 +00004110#endif
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00004111#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004112
4113/*
4114 * The arguments from BRACE_LIMITS are stored here. They are actually local
4115 * to regmatch(), but they are here to reduce the amount of stack space used
4116 * (it can be called recursively many times).
4117 */
4118static long bl_minval;
4119static long bl_maxval;
4120
4121/*
4122 * regmatch - main matching routine
4123 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004124 * Conceptually the strategy is simple: Check to see whether the current node
4125 * matches, push an item onto the regstack and loop to see whether the rest
4126 * matches, and then act accordingly. In practice we make some effort to
4127 * avoid using the regstack, in particular by going through "ordinary" nodes
4128 * (that don't need to know whether the rest of the match failed) by a nested
4129 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004130 *
4131 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4132 * the last matched character.
4133 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4134 * undefined state!
4135 */
4136 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004137regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004138 char_u *scan; /* Current node. */
4139{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004140 char_u *next; /* Next node. */
4141 int op;
4142 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004143 regitem_T *rp;
4144 int no;
4145 int status; /* one of the RA_ values: */
4146#define RA_FAIL 1 /* something failed, abort */
4147#define RA_CONT 2 /* continue in inner loop */
4148#define RA_BREAK 3 /* break inner loop */
4149#define RA_MATCH 4 /* successful match */
4150#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004151
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004152 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004153 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004154 regstack.ga_len = 0;
4155 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004156
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004157 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004158 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004159 */
4160 for (;;)
4161 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004162 /* Some patterns may cause a long time to match, even though they are not
Bram Moolenaar071d4272004-06-13 20:20:40 +00004163 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
4164 fast_breakcheck();
4165
4166#ifdef DEBUG
4167 if (scan != NULL && regnarrate)
4168 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004169 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004170 mch_errmsg("(\n");
4171 }
4172#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004173
4174 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004175 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004176 * regstack.
4177 */
4178 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004179 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004180 if (got_int || scan == NULL)
4181 {
4182 status = RA_FAIL;
4183 break;
4184 }
4185 status = RA_CONT;
4186
Bram Moolenaar071d4272004-06-13 20:20:40 +00004187#ifdef DEBUG
4188 if (regnarrate)
4189 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004190 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004191 mch_errmsg("...\n");
4192# ifdef FEAT_SYN_HL
4193 if (re_extmatch_in != NULL)
4194 {
4195 int i;
4196
4197 mch_errmsg(_("External submatches:\n"));
4198 for (i = 0; i < NSUBEXP; i++)
4199 {
4200 mch_errmsg(" \"");
4201 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004202 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004203 mch_errmsg("\"\n");
4204 }
4205 }
4206# endif
4207 }
4208#endif
4209 next = regnext(scan);
4210
4211 op = OP(scan);
4212 /* Check for character class with NL added. */
Bram Moolenaar640009d2006-10-17 16:48:26 +00004213 if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
4214 && *reginput == NUL && reglnum <= reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004215 {
4216 reg_nextline();
4217 }
4218 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
4219 {
4220 ADVANCE_REGINPUT();
4221 }
4222 else
4223 {
4224 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004225 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004226#ifdef FEAT_MBYTE
4227 if (has_mbyte)
4228 c = (*mb_ptr2char)(reginput);
4229 else
4230#endif
4231 c = *reginput;
4232 switch (op)
4233 {
4234 case BOL:
4235 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004236 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004237 break;
4238
4239 case EOL:
4240 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004241 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004242 break;
4243
4244 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004245 /* We're not at the beginning of the file when below the first
4246 * line where we started, not at the start of the line or we
4247 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004248 if (reglnum != 0 || reginput != regline
Bram Moolenaara7139332007-12-09 18:26:22 +00004249 || (REG_MULTI && reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004250 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004251 break;
4252
4253 case RE_EOF:
4254 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004255 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004256 break;
4257
4258 case CURSOR:
4259 /* Check if the buffer is in a window and compare the
4260 * reg_win->w_cursor position to the match position. */
4261 if (reg_win == NULL
4262 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
4263 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004264 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004265 break;
4266
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004267 case RE_MARK:
4268 /* Compare the mark position to the match position. NOTE: Always
4269 * uses the current buffer. */
4270 {
4271 int mark = OPERAND(scan)[0];
4272 int cmp = OPERAND(scan)[1];
4273 pos_T *pos;
4274
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004275 pos = getmark_buf(reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004276 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004277 || pos->lnum <= 0 /* mark isn't set (in curbuf) */
4278 || (pos->lnum == reglnum + reg_firstlnum
4279 ? (pos->col == (colnr_T)(reginput - regline)
4280 ? (cmp == '<' || cmp == '>')
4281 : (pos->col < (colnr_T)(reginput - regline)
4282 ? cmp != '>'
4283 : cmp != '<'))
4284 : (pos->lnum < reglnum + reg_firstlnum
4285 ? cmp != '>'
4286 : cmp != '<')))
4287 status = RA_NOMATCH;
4288 }
4289 break;
4290
4291 case RE_VISUAL:
4292#ifdef FEAT_VISUAL
4293 /* Check if the buffer is the current buffer. and whether the
4294 * position is inside the Visual area. */
4295 if (reg_buf != curbuf || VIsual.lnum == 0)
4296 status = RA_NOMATCH;
4297 else
4298 {
4299 pos_T top, bot;
4300 linenr_T lnum;
4301 colnr_T col;
4302 win_T *wp = reg_win == NULL ? curwin : reg_win;
4303 int mode;
4304
4305 if (VIsual_active)
4306 {
4307 if (lt(VIsual, wp->w_cursor))
4308 {
4309 top = VIsual;
4310 bot = wp->w_cursor;
4311 }
4312 else
4313 {
4314 top = wp->w_cursor;
4315 bot = VIsual;
4316 }
4317 mode = VIsual_mode;
4318 }
4319 else
4320 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004321 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004322 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004323 top = curbuf->b_visual.vi_start;
4324 bot = curbuf->b_visual.vi_end;
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004325 }
4326 else
4327 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004328 top = curbuf->b_visual.vi_end;
4329 bot = curbuf->b_visual.vi_start;
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004330 }
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004331 mode = curbuf->b_visual.vi_mode;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004332 }
4333 lnum = reglnum + reg_firstlnum;
4334 col = (colnr_T)(reginput - regline);
4335 if (lnum < top.lnum || lnum > bot.lnum)
4336 status = RA_NOMATCH;
4337 else if (mode == 'v')
4338 {
4339 if ((lnum == top.lnum && col < top.col)
4340 || (lnum == bot.lnum
4341 && col >= bot.col + (*p_sel != 'e')))
4342 status = RA_NOMATCH;
4343 }
4344 else if (mode == Ctrl_V)
4345 {
4346 colnr_T start, end;
4347 colnr_T start2, end2;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004348 colnr_T cols;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004349
4350 getvvcol(wp, &top, &start, NULL, &end);
4351 getvvcol(wp, &bot, &start2, NULL, &end2);
4352 if (start2 < start)
4353 start = start2;
4354 if (end2 > end)
4355 end = end2;
4356 if (top.col == MAXCOL || bot.col == MAXCOL)
4357 end = MAXCOL;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004358 cols = win_linetabsize(wp,
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004359 regline, (colnr_T)(reginput - regline));
Bram Moolenaar89d40322006-08-29 15:30:07 +00004360 if (cols < start || cols > end - (*p_sel == 'e'))
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004361 status = RA_NOMATCH;
4362 }
4363 }
4364#else
4365 status = RA_NOMATCH;
4366#endif
4367 break;
4368
Bram Moolenaar071d4272004-06-13 20:20:40 +00004369 case RE_LNUM:
4370 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4371 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004372 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004373 break;
4374
4375 case RE_COL:
4376 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004377 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004378 break;
4379
4380 case RE_VCOL:
4381 if (!re_num_cmp((long_u)win_linetabsize(
4382 reg_win == NULL ? curwin : reg_win,
4383 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004384 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004385 break;
4386
4387 case BOW: /* \<word; reginput points to w */
4388 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004389 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004390#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004391 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004392 {
4393 int this_class;
4394
4395 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004396 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004397 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004398 status = RA_NOMATCH; /* not on a word at all */
4399 else if (reg_prev_class() == this_class)
4400 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004401 }
4402#endif
4403 else
4404 {
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01004405 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline
4406 && vim_iswordc_buf(reginput[-1], reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004407 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004408 }
4409 break;
4410
4411 case EOW: /* word\>; reginput points after d */
4412 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004413 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004414#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004415 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004416 {
4417 int this_class, prev_class;
4418
4419 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004420 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004421 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004422 if (this_class == prev_class
4423 || prev_class == 0 || prev_class == 1)
4424 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004425 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004426#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004427 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004428 {
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004429 if (!vim_iswordc_buf(reginput[-1], reg_buf)
4430 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004431 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004432 }
4433 break; /* Matched with EOW */
4434
4435 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004436 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004437 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004438 status = RA_NOMATCH;
4439 else
4440 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004441 break;
4442
4443 case IDENT:
4444 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004445 status = RA_NOMATCH;
4446 else
4447 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004448 break;
4449
4450 case SIDENT:
4451 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004452 status = RA_NOMATCH;
4453 else
4454 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004455 break;
4456
4457 case KWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004458 if (!vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004459 status = RA_NOMATCH;
4460 else
4461 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004462 break;
4463
4464 case SKWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004465 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004466 status = RA_NOMATCH;
4467 else
4468 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004469 break;
4470
4471 case FNAME:
4472 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004473 status = RA_NOMATCH;
4474 else
4475 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004476 break;
4477
4478 case SFNAME:
4479 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004480 status = RA_NOMATCH;
4481 else
4482 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004483 break;
4484
4485 case PRINT:
4486 if (ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004487 status = RA_NOMATCH;
4488 else
4489 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004490 break;
4491
4492 case SPRINT:
4493 if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004494 status = RA_NOMATCH;
4495 else
4496 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004497 break;
4498
4499 case WHITE:
4500 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004501 status = RA_NOMATCH;
4502 else
4503 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004504 break;
4505
4506 case NWHITE:
4507 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004508 status = RA_NOMATCH;
4509 else
4510 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004511 break;
4512
4513 case DIGIT:
4514 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004515 status = RA_NOMATCH;
4516 else
4517 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004518 break;
4519
4520 case NDIGIT:
4521 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004522 status = RA_NOMATCH;
4523 else
4524 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004525 break;
4526
4527 case HEX:
4528 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004529 status = RA_NOMATCH;
4530 else
4531 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004532 break;
4533
4534 case NHEX:
4535 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004536 status = RA_NOMATCH;
4537 else
4538 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004539 break;
4540
4541 case OCTAL:
4542 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004543 status = RA_NOMATCH;
4544 else
4545 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004546 break;
4547
4548 case NOCTAL:
4549 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004550 status = RA_NOMATCH;
4551 else
4552 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004553 break;
4554
4555 case WORD:
4556 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004557 status = RA_NOMATCH;
4558 else
4559 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004560 break;
4561
4562 case NWORD:
4563 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004564 status = RA_NOMATCH;
4565 else
4566 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004567 break;
4568
4569 case HEAD:
4570 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004571 status = RA_NOMATCH;
4572 else
4573 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004574 break;
4575
4576 case NHEAD:
4577 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004578 status = RA_NOMATCH;
4579 else
4580 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004581 break;
4582
4583 case ALPHA:
4584 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004585 status = RA_NOMATCH;
4586 else
4587 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004588 break;
4589
4590 case NALPHA:
4591 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004592 status = RA_NOMATCH;
4593 else
4594 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004595 break;
4596
4597 case LOWER:
4598 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004599 status = RA_NOMATCH;
4600 else
4601 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004602 break;
4603
4604 case NLOWER:
4605 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004606 status = RA_NOMATCH;
4607 else
4608 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004609 break;
4610
4611 case UPPER:
4612 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004613 status = RA_NOMATCH;
4614 else
4615 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004616 break;
4617
4618 case NUPPER:
4619 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004620 status = RA_NOMATCH;
4621 else
4622 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004623 break;
4624
4625 case EXACTLY:
4626 {
4627 int len;
4628 char_u *opnd;
4629
4630 opnd = OPERAND(scan);
4631 /* Inline the first byte, for speed. */
4632 if (*opnd != *reginput
4633 && (!ireg_ic || (
4634#ifdef FEAT_MBYTE
4635 !enc_utf8 &&
4636#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004637 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004638 status = RA_NOMATCH;
4639 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004640 {
4641 /* match empty string always works; happens when "~" is
4642 * empty. */
4643 }
4644 else if (opnd[1] == NUL
4645#ifdef FEAT_MBYTE
4646 && !(enc_utf8 && ireg_ic)
4647#endif
4648 )
4649 ++reginput; /* matched a single char */
4650 else
4651 {
4652 len = (int)STRLEN(opnd);
4653 /* Need to match first byte again for multi-byte. */
4654 if (cstrncmp(opnd, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004655 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004656#ifdef FEAT_MBYTE
4657 /* Check for following composing character. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004658 else if (enc_utf8
4659 && UTF_COMPOSINGLIKE(reginput, reginput + len))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004660 {
4661 /* raaron: This code makes a composing character get
4662 * ignored, which is the correct behavior (sometimes)
4663 * for voweled Hebrew texts. */
4664 if (!ireg_icombine)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004665 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004666 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004667#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004668 else
4669 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004670 }
4671 }
4672 break;
4673
4674 case ANYOF:
4675 case ANYBUT:
4676 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004677 status = RA_NOMATCH;
4678 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4679 status = RA_NOMATCH;
4680 else
4681 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004682 break;
4683
4684#ifdef FEAT_MBYTE
4685 case MULTIBYTECODE:
4686 if (has_mbyte)
4687 {
4688 int i, len;
4689 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004690 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004691
4692 opnd = OPERAND(scan);
4693 /* Safety check (just in case 'encoding' was changed since
4694 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004695 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004696 {
4697 status = RA_NOMATCH;
4698 break;
4699 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004700 if (enc_utf8)
4701 opndc = mb_ptr2char(opnd);
4702 if (enc_utf8 && utf_iscomposing(opndc))
4703 {
4704 /* When only a composing char is given match at any
4705 * position where that composing char appears. */
4706 status = RA_NOMATCH;
4707 for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004708 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004709 inpc = mb_ptr2char(reginput + i);
4710 if (!utf_iscomposing(inpc))
4711 {
4712 if (i > 0)
4713 break;
4714 }
4715 else if (opndc == inpc)
4716 {
4717 /* Include all following composing chars. */
4718 len = i + mb_ptr2len(reginput + i);
4719 status = RA_MATCH;
4720 break;
4721 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004722 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004723 }
4724 else
4725 for (i = 0; i < len; ++i)
4726 if (opnd[i] != reginput[i])
4727 {
4728 status = RA_NOMATCH;
4729 break;
4730 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004731 reginput += len;
4732 }
4733 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004734 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004735 break;
4736#endif
4737
4738 case NOTHING:
4739 break;
4740
4741 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004742 {
4743 int i;
4744 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004745
Bram Moolenaar582fd852005-03-28 20:58:01 +00004746 /*
4747 * When we run into BACK we need to check if we don't keep
4748 * looping without matching any input. The second and later
4749 * times a BACK is encountered it fails if the input is still
4750 * at the same position as the previous time.
4751 * The positions are stored in "backpos" and found by the
4752 * current value of "scan", the position in the RE program.
4753 */
4754 bp = (backpos_T *)backpos.ga_data;
4755 for (i = 0; i < backpos.ga_len; ++i)
4756 if (bp[i].bp_scan == scan)
4757 break;
4758 if (i == backpos.ga_len)
4759 {
4760 /* First time at this BACK, make room to store the pos. */
4761 if (ga_grow(&backpos, 1) == FAIL)
4762 status = RA_FAIL;
4763 else
4764 {
4765 /* get "ga_data" again, it may have changed */
4766 bp = (backpos_T *)backpos.ga_data;
4767 bp[i].bp_scan = scan;
4768 ++backpos.ga_len;
4769 }
4770 }
4771 else if (reg_save_equal(&bp[i].bp_pos))
4772 /* Still at same position as last time, fail. */
4773 status = RA_NOMATCH;
4774
4775 if (status != RA_FAIL && status != RA_NOMATCH)
4776 reg_save(&bp[i].bp_pos, &backpos);
4777 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004778 break;
4779
Bram Moolenaar071d4272004-06-13 20:20:40 +00004780 case MOPEN + 0: /* Match start: \zs */
4781 case MOPEN + 1: /* \( */
4782 case MOPEN + 2:
4783 case MOPEN + 3:
4784 case MOPEN + 4:
4785 case MOPEN + 5:
4786 case MOPEN + 6:
4787 case MOPEN + 7:
4788 case MOPEN + 8:
4789 case MOPEN + 9:
4790 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004791 no = op - MOPEN;
4792 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004793 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004794 if (rp == NULL)
4795 status = RA_FAIL;
4796 else
4797 {
4798 rp->rs_no = no;
4799 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4800 &reg_startp[no]);
4801 /* We simply continue and handle the result when done. */
4802 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004803 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004804 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004805
4806 case NOPEN: /* \%( */
4807 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004808 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004809 status = RA_FAIL;
4810 /* We simply continue and handle the result when done. */
4811 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004812
4813#ifdef FEAT_SYN_HL
4814 case ZOPEN + 1:
4815 case ZOPEN + 2:
4816 case ZOPEN + 3:
4817 case ZOPEN + 4:
4818 case ZOPEN + 5:
4819 case ZOPEN + 6:
4820 case ZOPEN + 7:
4821 case ZOPEN + 8:
4822 case ZOPEN + 9:
4823 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004824 no = op - ZOPEN;
4825 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004826 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004827 if (rp == NULL)
4828 status = RA_FAIL;
4829 else
4830 {
4831 rp->rs_no = no;
4832 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4833 &reg_startzp[no]);
4834 /* We simply continue and handle the result when done. */
4835 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004836 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004837 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004838#endif
4839
4840 case MCLOSE + 0: /* Match end: \ze */
4841 case MCLOSE + 1: /* \) */
4842 case MCLOSE + 2:
4843 case MCLOSE + 3:
4844 case MCLOSE + 4:
4845 case MCLOSE + 5:
4846 case MCLOSE + 6:
4847 case MCLOSE + 7:
4848 case MCLOSE + 8:
4849 case MCLOSE + 9:
4850 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004851 no = op - MCLOSE;
4852 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004853 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004854 if (rp == NULL)
4855 status = RA_FAIL;
4856 else
4857 {
4858 rp->rs_no = no;
4859 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4860 /* We simply continue and handle the result when done. */
4861 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004862 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004863 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004864
4865#ifdef FEAT_SYN_HL
4866 case ZCLOSE + 1: /* \) after \z( */
4867 case ZCLOSE + 2:
4868 case ZCLOSE + 3:
4869 case ZCLOSE + 4:
4870 case ZCLOSE + 5:
4871 case ZCLOSE + 6:
4872 case ZCLOSE + 7:
4873 case ZCLOSE + 8:
4874 case ZCLOSE + 9:
4875 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004876 no = op - ZCLOSE;
4877 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004878 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004879 if (rp == NULL)
4880 status = RA_FAIL;
4881 else
4882 {
4883 rp->rs_no = no;
4884 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4885 &reg_endzp[no]);
4886 /* We simply continue and handle the result when done. */
4887 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004888 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004889 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004890#endif
4891
4892 case BACKREF + 1:
4893 case BACKREF + 2:
4894 case BACKREF + 3:
4895 case BACKREF + 4:
4896 case BACKREF + 5:
4897 case BACKREF + 6:
4898 case BACKREF + 7:
4899 case BACKREF + 8:
4900 case BACKREF + 9:
4901 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004902 int len;
4903 linenr_T clnum;
4904 colnr_T ccol;
4905 char_u *p;
4906
4907 no = op - BACKREF;
4908 cleanup_subexpr();
4909 if (!REG_MULTI) /* Single-line regexp */
4910 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004911 if (reg_startp[no] == NULL || reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004912 {
4913 /* Backref was not set: Match an empty string. */
4914 len = 0;
4915 }
4916 else
4917 {
4918 /* Compare current input with back-ref in the same
4919 * line. */
4920 len = (int)(reg_endp[no] - reg_startp[no]);
4921 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004922 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004923 }
4924 }
4925 else /* Multi-line regexp */
4926 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004927 if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004928 {
4929 /* Backref was not set: Match an empty string. */
4930 len = 0;
4931 }
4932 else
4933 {
4934 if (reg_startpos[no].lnum == reglnum
4935 && reg_endpos[no].lnum == reglnum)
4936 {
4937 /* Compare back-ref within the current line. */
4938 len = reg_endpos[no].col - reg_startpos[no].col;
4939 if (cstrncmp(regline + reg_startpos[no].col,
4940 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004941 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004942 }
4943 else
4944 {
4945 /* Messy situation: Need to compare between two
4946 * lines. */
4947 ccol = reg_startpos[no].col;
4948 clnum = reg_startpos[no].lnum;
4949 for (;;)
4950 {
4951 /* Since getting one line may invalidate
4952 * the other, need to make copy. Slow! */
4953 if (regline != reg_tofree)
4954 {
4955 len = (int)STRLEN(regline);
4956 if (reg_tofree == NULL
4957 || len >= (int)reg_tofreelen)
4958 {
4959 len += 50; /* get some extra */
4960 vim_free(reg_tofree);
4961 reg_tofree = alloc(len);
4962 if (reg_tofree == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004963 {
4964 status = RA_FAIL; /* outof memory!*/
4965 break;
4966 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004967 reg_tofreelen = len;
4968 }
4969 STRCPY(reg_tofree, regline);
4970 reginput = reg_tofree
4971 + (reginput - regline);
4972 regline = reg_tofree;
4973 }
4974
4975 /* Get the line to compare with. */
4976 p = reg_getline(clnum);
4977 if (clnum == reg_endpos[no].lnum)
4978 len = reg_endpos[no].col - ccol;
4979 else
4980 len = (int)STRLEN(p + ccol);
4981
4982 if (cstrncmp(p + ccol, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004983 {
4984 status = RA_NOMATCH; /* doesn't match */
4985 break;
4986 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004987 if (clnum == reg_endpos[no].lnum)
4988 break; /* match and at end! */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004989 if (reglnum >= reg_maxline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004990 {
4991 status = RA_NOMATCH; /* text too short */
4992 break;
4993 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004994
4995 /* Advance to next line. */
4996 reg_nextline();
4997 ++clnum;
4998 ccol = 0;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004999 if (got_int)
5000 {
5001 status = RA_FAIL;
5002 break;
5003 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005004 }
5005
5006 /* found a match! Note that regline may now point
5007 * to a copy of the line, that should not matter. */
5008 }
5009 }
5010 }
5011
5012 /* Matched the backref, skip over it. */
5013 reginput += len;
5014 }
5015 break;
5016
5017#ifdef FEAT_SYN_HL
5018 case ZREF + 1:
5019 case ZREF + 2:
5020 case ZREF + 3:
5021 case ZREF + 4:
5022 case ZREF + 5:
5023 case ZREF + 6:
5024 case ZREF + 7:
5025 case ZREF + 8:
5026 case ZREF + 9:
5027 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005028 int len;
5029
5030 cleanup_zsubexpr();
5031 no = op - ZREF;
5032 if (re_extmatch_in != NULL
5033 && re_extmatch_in->matches[no] != NULL)
5034 {
5035 len = (int)STRLEN(re_extmatch_in->matches[no]);
5036 if (cstrncmp(re_extmatch_in->matches[no],
5037 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005038 status = RA_NOMATCH;
5039 else
5040 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005041 }
5042 else
5043 {
5044 /* Backref was not set: Match an empty string. */
5045 }
5046 }
5047 break;
5048#endif
5049
5050 case BRANCH:
5051 {
5052 if (OP(next) != BRANCH) /* No choice. */
5053 next = OPERAND(scan); /* Avoid recursion. */
5054 else
5055 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005056 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005057 if (rp == NULL)
5058 status = RA_FAIL;
5059 else
5060 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005061 }
5062 }
5063 break;
5064
5065 case BRACE_LIMITS:
5066 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005067 if (OP(next) == BRACE_SIMPLE)
5068 {
5069 bl_minval = OPERAND_MIN(scan);
5070 bl_maxval = OPERAND_MAX(scan);
5071 }
5072 else if (OP(next) >= BRACE_COMPLEX
5073 && OP(next) < BRACE_COMPLEX + 10)
5074 {
5075 no = OP(next) - BRACE_COMPLEX;
5076 brace_min[no] = OPERAND_MIN(scan);
5077 brace_max[no] = OPERAND_MAX(scan);
5078 brace_count[no] = 0;
5079 }
5080 else
5081 {
5082 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005083 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005084 }
5085 }
5086 break;
5087
5088 case BRACE_COMPLEX + 0:
5089 case BRACE_COMPLEX + 1:
5090 case BRACE_COMPLEX + 2:
5091 case BRACE_COMPLEX + 3:
5092 case BRACE_COMPLEX + 4:
5093 case BRACE_COMPLEX + 5:
5094 case BRACE_COMPLEX + 6:
5095 case BRACE_COMPLEX + 7:
5096 case BRACE_COMPLEX + 8:
5097 case BRACE_COMPLEX + 9:
5098 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005099 no = op - BRACE_COMPLEX;
5100 ++brace_count[no];
5101
5102 /* If not matched enough times yet, try one more */
5103 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005104 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005105 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005106 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005107 if (rp == NULL)
5108 status = RA_FAIL;
5109 else
5110 {
5111 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005112 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005113 next = OPERAND(scan);
5114 /* We continue and handle the result when done. */
5115 }
5116 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005117 }
5118
5119 /* If matched enough times, may try matching some more */
5120 if (brace_min[no] <= brace_max[no])
5121 {
5122 /* Range is the normal way around, use longest match */
5123 if (brace_count[no] <= brace_max[no])
5124 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005125 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005126 if (rp == NULL)
5127 status = RA_FAIL;
5128 else
5129 {
5130 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005131 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005132 next = OPERAND(scan);
5133 /* We continue and handle the result when done. */
5134 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005135 }
5136 }
5137 else
5138 {
5139 /* Range is backwards, use shortest match first */
5140 if (brace_count[no] <= brace_min[no])
5141 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005142 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005143 if (rp == NULL)
5144 status = RA_FAIL;
5145 else
5146 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005147 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005148 /* We continue and handle the result when done. */
5149 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005150 }
5151 }
5152 }
5153 break;
5154
5155 case BRACE_SIMPLE:
5156 case STAR:
5157 case PLUS:
5158 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005159 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005160
5161 /*
5162 * Lookahead to avoid useless match attempts when we know
5163 * what character comes next.
5164 */
5165 if (OP(next) == EXACTLY)
5166 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005167 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005168 if (ireg_ic)
5169 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005170 if (MB_ISUPPER(rst.nextb))
5171 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005172 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005173 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005174 }
5175 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005176 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005177 }
5178 else
5179 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005180 rst.nextb = NUL;
5181 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005182 }
5183 if (op != BRACE_SIMPLE)
5184 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005185 rst.minval = (op == STAR) ? 0 : 1;
5186 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005187 }
5188 else
5189 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005190 rst.minval = bl_minval;
5191 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005192 }
5193
5194 /*
5195 * When maxval > minval, try matching as much as possible, up
5196 * to maxval. When maxval < minval, try matching at least the
5197 * minimal number (since the range is backwards, that's also
5198 * maxval!).
5199 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005200 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005201 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005202 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005203 status = RA_FAIL;
5204 break;
5205 }
5206 if (rst.minval <= rst.maxval
5207 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5208 {
5209 /* It could match. Prepare for trying to match what
5210 * follows. The code is below. Parameters are stored in
5211 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005212 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005213 {
5214 EMSG(_(e_maxmempat));
5215 status = RA_FAIL;
5216 }
5217 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005218 status = RA_FAIL;
5219 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005220 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005221 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005222 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005223 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005224 if (rp == NULL)
5225 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005226 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005227 {
5228 *(((regstar_T *)rp) - 1) = rst;
5229 status = RA_BREAK; /* skip the restore bits */
5230 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005231 }
5232 }
5233 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005234 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005235
Bram Moolenaar071d4272004-06-13 20:20:40 +00005236 }
5237 break;
5238
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005239 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005240 case MATCH:
5241 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005242 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005243 if (rp == NULL)
5244 status = RA_FAIL;
5245 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005246 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005247 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005248 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005249 next = OPERAND(scan);
5250 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005251 }
5252 break;
5253
5254 case BEHIND:
5255 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005256 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005257 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005258 {
5259 EMSG(_(e_maxmempat));
5260 status = RA_FAIL;
5261 }
5262 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005263 status = RA_FAIL;
5264 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005265 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005266 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005267 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005268 if (rp == NULL)
5269 status = RA_FAIL;
5270 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005271 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005272 /* Need to save the subexpr to be able to restore them
5273 * when there is a match but we don't use it. */
5274 save_subexpr(((regbehind_T *)rp) - 1);
5275
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005276 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005277 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005278 /* First try if what follows matches. If it does then we
5279 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005280 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005281 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005282 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005283
5284 case BHPOS:
5285 if (REG_MULTI)
5286 {
5287 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5288 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005289 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005290 }
5291 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005292 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005293 break;
5294
5295 case NEWL:
Bram Moolenaar640009d2006-10-17 16:48:26 +00005296 if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
5297 || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005298 status = RA_NOMATCH;
5299 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005300 ADVANCE_REGINPUT();
5301 else
5302 reg_nextline();
5303 break;
5304
5305 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005306 status = RA_MATCH; /* Success! */
5307 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005308
5309 default:
5310 EMSG(_(e_re_corr));
5311#ifdef DEBUG
5312 printf("Illegal op code %d\n", op);
5313#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005314 status = RA_FAIL;
5315 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005316 }
5317 }
5318
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005319 /* If we can't continue sequentially, break the inner loop. */
5320 if (status != RA_CONT)
5321 break;
5322
5323 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005324 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005325
5326 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005327
5328 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005329 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005330 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005331 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005332 while (regstack.ga_len > 0 && status != RA_FAIL)
5333 {
5334 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5335 switch (rp->rs_state)
5336 {
5337 case RS_NOPEN:
5338 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005339 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005340 break;
5341
5342 case RS_MOPEN:
5343 /* Pop the state. Restore pointers when there is no match. */
5344 if (status == RA_NOMATCH)
5345 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5346 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005347 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005348 break;
5349
5350#ifdef FEAT_SYN_HL
5351 case RS_ZOPEN:
5352 /* Pop the state. Restore pointers when there is no match. */
5353 if (status == RA_NOMATCH)
5354 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5355 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005356 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005357 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005358#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005359
5360 case RS_MCLOSE:
5361 /* Pop the state. Restore pointers when there is no match. */
5362 if (status == RA_NOMATCH)
5363 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5364 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005365 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005366 break;
5367
5368#ifdef FEAT_SYN_HL
5369 case RS_ZCLOSE:
5370 /* Pop the state. Restore pointers when there is no match. */
5371 if (status == RA_NOMATCH)
5372 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5373 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005374 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005375 break;
5376#endif
5377
5378 case RS_BRANCH:
5379 if (status == RA_MATCH)
5380 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005381 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005382 else
5383 {
5384 if (status != RA_BREAK)
5385 {
5386 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005387 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005388 scan = rp->rs_scan;
5389 }
5390 if (scan == NULL || OP(scan) != BRANCH)
5391 {
5392 /* no more branches, didn't find a match */
5393 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005394 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005395 }
5396 else
5397 {
5398 /* Prepare to try a branch. */
5399 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005400 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005401 scan = OPERAND(scan);
5402 }
5403 }
5404 break;
5405
5406 case RS_BRCPLX_MORE:
5407 /* Pop the state. Restore pointers when there is no match. */
5408 if (status == RA_NOMATCH)
5409 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005410 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005411 --brace_count[rp->rs_no]; /* decrement match count */
5412 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005413 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005414 break;
5415
5416 case RS_BRCPLX_LONG:
5417 /* Pop the state. Restore pointers when there is no match. */
5418 if (status == RA_NOMATCH)
5419 {
5420 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005421 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005422 --brace_count[rp->rs_no];
5423 /* continue with the items after "\{}" */
5424 status = RA_CONT;
5425 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005426 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005427 if (status == RA_CONT)
5428 scan = regnext(scan);
5429 break;
5430
5431 case RS_BRCPLX_SHORT:
5432 /* Pop the state. Restore pointers when there is no match. */
5433 if (status == RA_NOMATCH)
5434 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005435 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005436 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005437 if (status == RA_NOMATCH)
5438 {
5439 scan = OPERAND(scan);
5440 status = RA_CONT;
5441 }
5442 break;
5443
5444 case RS_NOMATCH:
5445 /* Pop the state. If the operand matches for NOMATCH or
5446 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5447 * except for SUBPAT, and continue with the next item. */
5448 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5449 status = RA_NOMATCH;
5450 else
5451 {
5452 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005453 if (rp->rs_no != SUBPAT) /* zero-width */
5454 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005455 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005456 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005457 if (status == RA_CONT)
5458 scan = regnext(scan);
5459 break;
5460
5461 case RS_BEHIND1:
5462 if (status == RA_NOMATCH)
5463 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005464 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005465 regstack.ga_len -= sizeof(regbehind_T);
5466 }
5467 else
5468 {
5469 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5470 * the behind part does (not) match before the current
5471 * position in the input. This must be done at every
5472 * position in the input and checking if the match ends at
5473 * the current position. */
5474
5475 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005476 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005477
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005478 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005479 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005480 * result, hitting the start of the line or the previous
5481 * line (for multi-line matching).
5482 * Set behind_pos to where the match should end, BHPOS
5483 * will match it. Save the current value. */
5484 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5485 behind_pos = rp->rs_un.regsave;
5486
5487 rp->rs_state = RS_BEHIND2;
5488
Bram Moolenaar582fd852005-03-28 20:58:01 +00005489 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005490 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005491 }
5492 break;
5493
5494 case RS_BEHIND2:
5495 /*
5496 * Looping for BEHIND / NOBEHIND match.
5497 */
5498 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5499 {
5500 /* found a match that ends where "next" started */
5501 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5502 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005503 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5504 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005505 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005506 {
5507 /* But we didn't want a match. Need to restore the
5508 * subexpr, because what follows matched, so they have
5509 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005510 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005511 restore_subexpr(((regbehind_T *)rp) - 1);
5512 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005513 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005514 regstack.ga_len -= sizeof(regbehind_T);
5515 }
5516 else
5517 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005518 long limit;
5519
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005520 /* No match or a match that doesn't end where we want it: Go
5521 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005522 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005523 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005524 if (REG_MULTI)
5525 {
5526 if (rp->rs_un.regsave.rs_u.pos.col == 0)
5527 {
5528 if (rp->rs_un.regsave.rs_u.pos.lnum
5529 < behind_pos.rs_u.pos.lnum
5530 || reg_getline(
5531 --rp->rs_un.regsave.rs_u.pos.lnum)
5532 == NULL)
5533 no = FAIL;
5534 else
5535 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005536 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005537 rp->rs_un.regsave.rs_u.pos.col =
5538 (colnr_T)STRLEN(regline);
5539 }
5540 }
5541 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005542 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005543#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005544 if (has_mbyte)
5545 rp->rs_un.regsave.rs_u.pos.col -=
5546 (*mb_head_off)(regline, regline
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005547 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005548 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005549#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005550 --rp->rs_un.regsave.rs_u.pos.col;
5551 if (limit > 0
5552 && ((rp->rs_un.regsave.rs_u.pos.lnum
5553 < behind_pos.rs_u.pos.lnum
5554 ? (colnr_T)STRLEN(regline)
5555 : behind_pos.rs_u.pos.col)
5556 - rp->rs_un.regsave.rs_u.pos.col > limit))
5557 no = FAIL;
5558 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005559 }
5560 else
5561 {
5562 if (rp->rs_un.regsave.rs_u.ptr == regline)
5563 no = FAIL;
5564 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005565 {
5566 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
5567 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5568 - rp->rs_un.regsave.rs_u.ptr) > limit)
5569 no = FAIL;
5570 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005571 }
5572 if (no == OK)
5573 {
5574 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005575 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005576 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005577 if (status == RA_MATCH)
5578 {
5579 /* We did match, so subexpr may have been changed,
5580 * need to restore them for the next try. */
5581 status = RA_NOMATCH;
5582 restore_subexpr(((regbehind_T *)rp) - 1);
5583 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005584 }
5585 else
5586 {
5587 /* Can't advance. For NOBEHIND that's a match. */
5588 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5589 if (rp->rs_no == NOBEHIND)
5590 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005591 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5592 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005593 status = RA_MATCH;
5594 }
5595 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005596 {
5597 /* We do want a proper match. Need to restore the
5598 * subexpr if we had a match, because they may have
5599 * been set. */
5600 if (status == RA_MATCH)
5601 {
5602 status = RA_NOMATCH;
5603 restore_subexpr(((regbehind_T *)rp) - 1);
5604 }
5605 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005606 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005607 regstack.ga_len -= sizeof(regbehind_T);
5608 }
5609 }
5610 break;
5611
5612 case RS_STAR_LONG:
5613 case RS_STAR_SHORT:
5614 {
5615 regstar_T *rst = ((regstar_T *)rp) - 1;
5616
5617 if (status == RA_MATCH)
5618 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005619 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005620 regstack.ga_len -= sizeof(regstar_T);
5621 break;
5622 }
5623
5624 /* Tried once already, restore input pointers. */
5625 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005626 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005627
5628 /* Repeat until we found a position where it could match. */
5629 for (;;)
5630 {
5631 if (status != RA_BREAK)
5632 {
5633 /* Tried first position already, advance. */
5634 if (rp->rs_state == RS_STAR_LONG)
5635 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005636 /* Trying for longest match, but couldn't or
5637 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005638 if (--rst->count < rst->minval)
5639 break;
5640 if (reginput == regline)
5641 {
5642 /* backup to last char of previous line */
5643 --reglnum;
5644 regline = reg_getline(reglnum);
5645 /* Just in case regrepeat() didn't count
5646 * right. */
5647 if (regline == NULL)
5648 break;
5649 reginput = regline + STRLEN(regline);
5650 fast_breakcheck();
5651 }
5652 else
5653 mb_ptr_back(regline, reginput);
5654 }
5655 else
5656 {
5657 /* Range is backwards, use shortest match first.
5658 * Careful: maxval and minval are exchanged!
5659 * Couldn't or didn't match: try advancing one
5660 * char. */
5661 if (rst->count == rst->minval
5662 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5663 break;
5664 ++rst->count;
5665 }
5666 if (got_int)
5667 break;
5668 }
5669 else
5670 status = RA_NOMATCH;
5671
5672 /* If it could match, try it. */
5673 if (rst->nextb == NUL || *reginput == rst->nextb
5674 || *reginput == rst->nextb_ic)
5675 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005676 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005677 scan = regnext(rp->rs_scan);
5678 status = RA_CONT;
5679 break;
5680 }
5681 }
5682 if (status != RA_CONT)
5683 {
5684 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005685 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005686 regstack.ga_len -= sizeof(regstar_T);
5687 status = RA_NOMATCH;
5688 }
5689 }
5690 break;
5691 }
5692
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005693 /* If we want to continue the inner loop or didn't pop a state
5694 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005695 if (status == RA_CONT || rp == (regitem_T *)
5696 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5697 break;
5698 }
5699
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005700 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005701 if (status == RA_CONT)
5702 continue;
5703
5704 /*
5705 * If the regstack is empty or something failed we are done.
5706 */
5707 if (regstack.ga_len == 0 || status == RA_FAIL)
5708 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005709 if (scan == NULL)
5710 {
5711 /*
5712 * We get here only if there's trouble -- normally "case END" is
5713 * the terminating point.
5714 */
5715 EMSG(_(e_re_corr));
5716#ifdef DEBUG
5717 printf("Premature EOL\n");
5718#endif
5719 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005720 if (status == RA_FAIL)
5721 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005722 return (status == RA_MATCH);
5723 }
5724
5725 } /* End of loop until the regstack is empty. */
5726
5727 /* NOTREACHED */
5728}
5729
5730/*
5731 * Push an item onto the regstack.
5732 * Returns pointer to new item. Returns NULL when out of memory.
5733 */
5734 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005735regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005736 regstate_T state;
5737 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005738{
5739 regitem_T *rp;
5740
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005741 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005742 {
5743 EMSG(_(e_maxmempat));
5744 return NULL;
5745 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005746 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005747 return NULL;
5748
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005749 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005750 rp->rs_state = state;
5751 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005752
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005753 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005754 return rp;
5755}
5756
5757/*
5758 * Pop an item from the regstack.
5759 */
5760 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005761regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005762 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005763{
5764 regitem_T *rp;
5765
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005766 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005767 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005768
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005769 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005770}
5771
Bram Moolenaar071d4272004-06-13 20:20:40 +00005772/*
5773 * regrepeat - repeatedly match something simple, return how many.
5774 * Advances reginput (and reglnum) to just after the matched chars.
5775 */
5776 static int
5777regrepeat(p, maxcount)
5778 char_u *p;
5779 long maxcount; /* maximum number of matches allowed */
5780{
5781 long count = 0;
5782 char_u *scan;
5783 char_u *opnd;
5784 int mask;
5785 int testval = 0;
5786
5787 scan = reginput; /* Make local copy of reginput for speed. */
5788 opnd = OPERAND(p);
5789 switch (OP(p))
5790 {
5791 case ANY:
5792 case ANY + ADD_NL:
5793 while (count < maxcount)
5794 {
5795 /* Matching anything means we continue until end-of-line (or
5796 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5797 while (*scan != NUL && count < maxcount)
5798 {
5799 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005800 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005801 }
Bram Moolenaar640009d2006-10-17 16:48:26 +00005802 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5803 || reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005804 break;
5805 ++count; /* count the line-break */
5806 reg_nextline();
5807 scan = reginput;
5808 if (got_int)
5809 break;
5810 }
5811 break;
5812
5813 case IDENT:
5814 case IDENT + ADD_NL:
5815 testval = TRUE;
5816 /*FALLTHROUGH*/
5817 case SIDENT:
5818 case SIDENT + ADD_NL:
5819 while (count < maxcount)
5820 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005821 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005822 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005823 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005824 }
5825 else if (*scan == NUL)
5826 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005827 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5828 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005829 break;
5830 reg_nextline();
5831 scan = reginput;
5832 if (got_int)
5833 break;
5834 }
5835 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5836 ++scan;
5837 else
5838 break;
5839 ++count;
5840 }
5841 break;
5842
5843 case KWORD:
5844 case KWORD + ADD_NL:
5845 testval = TRUE;
5846 /*FALLTHROUGH*/
5847 case SKWORD:
5848 case SKWORD + ADD_NL:
5849 while (count < maxcount)
5850 {
Bram Moolenaarf813a182013-01-30 13:59:37 +01005851 if (vim_iswordp_buf(scan, reg_buf)
5852 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005853 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005854 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005855 }
5856 else if (*scan == NUL)
5857 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005858 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5859 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005860 break;
5861 reg_nextline();
5862 scan = reginput;
5863 if (got_int)
5864 break;
5865 }
5866 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5867 ++scan;
5868 else
5869 break;
5870 ++count;
5871 }
5872 break;
5873
5874 case FNAME:
5875 case FNAME + ADD_NL:
5876 testval = TRUE;
5877 /*FALLTHROUGH*/
5878 case SFNAME:
5879 case SFNAME + ADD_NL:
5880 while (count < maxcount)
5881 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005882 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005883 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005884 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005885 }
5886 else if (*scan == NUL)
5887 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005888 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5889 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005890 break;
5891 reg_nextline();
5892 scan = reginput;
5893 if (got_int)
5894 break;
5895 }
5896 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5897 ++scan;
5898 else
5899 break;
5900 ++count;
5901 }
5902 break;
5903
5904 case PRINT:
5905 case PRINT + ADD_NL:
5906 testval = TRUE;
5907 /*FALLTHROUGH*/
5908 case SPRINT:
5909 case SPRINT + ADD_NL:
5910 while (count < maxcount)
5911 {
5912 if (*scan == NUL)
5913 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005914 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5915 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005916 break;
5917 reg_nextline();
5918 scan = reginput;
5919 if (got_int)
5920 break;
5921 }
5922 else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5923 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005924 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005925 }
5926 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5927 ++scan;
5928 else
5929 break;
5930 ++count;
5931 }
5932 break;
5933
5934 case WHITE:
5935 case WHITE + ADD_NL:
5936 testval = mask = RI_WHITE;
5937do_class:
5938 while (count < maxcount)
5939 {
5940#ifdef FEAT_MBYTE
5941 int l;
5942#endif
5943 if (*scan == NUL)
5944 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005945 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5946 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005947 break;
5948 reg_nextline();
5949 scan = reginput;
5950 if (got_int)
5951 break;
5952 }
5953#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005954 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005955 {
5956 if (testval != 0)
5957 break;
5958 scan += l;
5959 }
5960#endif
5961 else if ((class_tab[*scan] & mask) == testval)
5962 ++scan;
5963 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5964 ++scan;
5965 else
5966 break;
5967 ++count;
5968 }
5969 break;
5970
5971 case NWHITE:
5972 case NWHITE + ADD_NL:
5973 mask = RI_WHITE;
5974 goto do_class;
5975 case DIGIT:
5976 case DIGIT + ADD_NL:
5977 testval = mask = RI_DIGIT;
5978 goto do_class;
5979 case NDIGIT:
5980 case NDIGIT + ADD_NL:
5981 mask = RI_DIGIT;
5982 goto do_class;
5983 case HEX:
5984 case HEX + ADD_NL:
5985 testval = mask = RI_HEX;
5986 goto do_class;
5987 case NHEX:
5988 case NHEX + ADD_NL:
5989 mask = RI_HEX;
5990 goto do_class;
5991 case OCTAL:
5992 case OCTAL + ADD_NL:
5993 testval = mask = RI_OCTAL;
5994 goto do_class;
5995 case NOCTAL:
5996 case NOCTAL + ADD_NL:
5997 mask = RI_OCTAL;
5998 goto do_class;
5999 case WORD:
6000 case WORD + ADD_NL:
6001 testval = mask = RI_WORD;
6002 goto do_class;
6003 case NWORD:
6004 case NWORD + ADD_NL:
6005 mask = RI_WORD;
6006 goto do_class;
6007 case HEAD:
6008 case HEAD + ADD_NL:
6009 testval = mask = RI_HEAD;
6010 goto do_class;
6011 case NHEAD:
6012 case NHEAD + ADD_NL:
6013 mask = RI_HEAD;
6014 goto do_class;
6015 case ALPHA:
6016 case ALPHA + ADD_NL:
6017 testval = mask = RI_ALPHA;
6018 goto do_class;
6019 case NALPHA:
6020 case NALPHA + ADD_NL:
6021 mask = RI_ALPHA;
6022 goto do_class;
6023 case LOWER:
6024 case LOWER + ADD_NL:
6025 testval = mask = RI_LOWER;
6026 goto do_class;
6027 case NLOWER:
6028 case NLOWER + ADD_NL:
6029 mask = RI_LOWER;
6030 goto do_class;
6031 case UPPER:
6032 case UPPER + ADD_NL:
6033 testval = mask = RI_UPPER;
6034 goto do_class;
6035 case NUPPER:
6036 case NUPPER + ADD_NL:
6037 mask = RI_UPPER;
6038 goto do_class;
6039
6040 case EXACTLY:
6041 {
6042 int cu, cl;
6043
6044 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006045 * would have been used for it. It does handle single-byte
6046 * characters, such as latin1. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006047 if (ireg_ic)
6048 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006049 cu = MB_TOUPPER(*opnd);
6050 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006051 while (count < maxcount && (*scan == cu || *scan == cl))
6052 {
6053 count++;
6054 scan++;
6055 }
6056 }
6057 else
6058 {
6059 cu = *opnd;
6060 while (count < maxcount && *scan == cu)
6061 {
6062 count++;
6063 scan++;
6064 }
6065 }
6066 break;
6067 }
6068
6069#ifdef FEAT_MBYTE
6070 case MULTIBYTECODE:
6071 {
6072 int i, len, cf = 0;
6073
6074 /* Safety check (just in case 'encoding' was changed since
6075 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006076 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006077 {
6078 if (ireg_ic && enc_utf8)
6079 cf = utf_fold(utf_ptr2char(opnd));
6080 while (count < maxcount)
6081 {
6082 for (i = 0; i < len; ++i)
6083 if (opnd[i] != scan[i])
6084 break;
6085 if (i < len && (!ireg_ic || !enc_utf8
6086 || utf_fold(utf_ptr2char(scan)) != cf))
6087 break;
6088 scan += len;
6089 ++count;
6090 }
6091 }
6092 }
6093 break;
6094#endif
6095
6096 case ANYOF:
6097 case ANYOF + ADD_NL:
6098 testval = TRUE;
6099 /*FALLTHROUGH*/
6100
6101 case ANYBUT:
6102 case ANYBUT + ADD_NL:
6103 while (count < maxcount)
6104 {
6105#ifdef FEAT_MBYTE
6106 int len;
6107#endif
6108 if (*scan == NUL)
6109 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00006110 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6111 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006112 break;
6113 reg_nextline();
6114 scan = reginput;
6115 if (got_int)
6116 break;
6117 }
6118 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6119 ++scan;
6120#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006121 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006122 {
6123 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6124 break;
6125 scan += len;
6126 }
6127#endif
6128 else
6129 {
6130 if ((cstrchr(opnd, *scan) == NULL) == testval)
6131 break;
6132 ++scan;
6133 }
6134 ++count;
6135 }
6136 break;
6137
6138 case NEWL:
6139 while (count < maxcount
Bram Moolenaar640009d2006-10-17 16:48:26 +00006140 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
6141 && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006142 {
6143 count++;
6144 if (reg_line_lbr)
6145 ADVANCE_REGINPUT();
6146 else
6147 reg_nextline();
6148 scan = reginput;
6149 if (got_int)
6150 break;
6151 }
6152 break;
6153
6154 default: /* Oh dear. Called inappropriately. */
6155 EMSG(_(e_re_corr));
6156#ifdef DEBUG
6157 printf("Called regrepeat with op code %d\n", OP(p));
6158#endif
6159 break;
6160 }
6161
6162 reginput = scan;
6163
6164 return (int)count;
6165}
6166
6167/*
6168 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006169 * Returns NULL when calculating size, when there is no next item and when
6170 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006171 */
6172 static char_u *
6173regnext(p)
6174 char_u *p;
6175{
6176 int offset;
6177
Bram Moolenaard3005802009-11-25 17:21:32 +00006178 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006179 return NULL;
6180
6181 offset = NEXT(p);
6182 if (offset == 0)
6183 return NULL;
6184
Bram Moolenaar582fd852005-03-28 20:58:01 +00006185 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006186 return p - offset;
6187 else
6188 return p + offset;
6189}
6190
6191/*
6192 * Check the regexp program for its magic number.
6193 * Return TRUE if it's wrong.
6194 */
6195 static int
6196prog_magic_wrong()
6197{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006198 regprog_T *prog;
6199
6200 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
6201 if (prog->engine == &nfa_regengine)
6202 /* For NFA matcher we don't check the magic */
6203 return FALSE;
6204
6205 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006206 {
6207 EMSG(_(e_re_corr));
6208 return TRUE;
6209 }
6210 return FALSE;
6211}
6212
6213/*
6214 * Cleanup the subexpressions, if this wasn't done yet.
6215 * This construction is used to clear the subexpressions only when they are
6216 * used (to increase speed).
6217 */
6218 static void
6219cleanup_subexpr()
6220{
6221 if (need_clear_subexpr)
6222 {
6223 if (REG_MULTI)
6224 {
6225 /* Use 0xff to set lnum to -1 */
6226 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6227 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6228 }
6229 else
6230 {
6231 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6232 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
6233 }
6234 need_clear_subexpr = FALSE;
6235 }
6236}
6237
6238#ifdef FEAT_SYN_HL
6239 static void
6240cleanup_zsubexpr()
6241{
6242 if (need_clear_zsubexpr)
6243 {
6244 if (REG_MULTI)
6245 {
6246 /* Use 0xff to set lnum to -1 */
6247 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6248 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6249 }
6250 else
6251 {
6252 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6253 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6254 }
6255 need_clear_zsubexpr = FALSE;
6256 }
6257}
6258#endif
6259
6260/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006261 * Save the current subexpr to "bp", so that they can be restored
6262 * later by restore_subexpr().
6263 */
6264 static void
6265save_subexpr(bp)
6266 regbehind_T *bp;
6267{
6268 int i;
6269
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006270 /* When "need_clear_subexpr" is set we don't need to save the values, only
6271 * remember that this flag needs to be set again when restoring. */
6272 bp->save_need_clear_subexpr = need_clear_subexpr;
6273 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006274 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006275 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006276 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006277 if (REG_MULTI)
6278 {
6279 bp->save_start[i].se_u.pos = reg_startpos[i];
6280 bp->save_end[i].se_u.pos = reg_endpos[i];
6281 }
6282 else
6283 {
6284 bp->save_start[i].se_u.ptr = reg_startp[i];
6285 bp->save_end[i].se_u.ptr = reg_endp[i];
6286 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006287 }
6288 }
6289}
6290
6291/*
6292 * Restore the subexpr from "bp".
6293 */
6294 static void
6295restore_subexpr(bp)
6296 regbehind_T *bp;
6297{
6298 int i;
6299
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006300 /* Only need to restore saved values when they are not to be cleared. */
6301 need_clear_subexpr = bp->save_need_clear_subexpr;
6302 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006303 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006304 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006305 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006306 if (REG_MULTI)
6307 {
6308 reg_startpos[i] = bp->save_start[i].se_u.pos;
6309 reg_endpos[i] = bp->save_end[i].se_u.pos;
6310 }
6311 else
6312 {
6313 reg_startp[i] = bp->save_start[i].se_u.ptr;
6314 reg_endp[i] = bp->save_end[i].se_u.ptr;
6315 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006316 }
6317 }
6318}
6319
6320/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006321 * Advance reglnum, regline and reginput to the next line.
6322 */
6323 static void
6324reg_nextline()
6325{
6326 regline = reg_getline(++reglnum);
6327 reginput = regline;
6328 fast_breakcheck();
6329}
6330
6331/*
6332 * Save the input line and position in a regsave_T.
6333 */
6334 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006335reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006336 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006337 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006338{
6339 if (REG_MULTI)
6340 {
6341 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6342 save->rs_u.pos.lnum = reglnum;
6343 }
6344 else
6345 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006346 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006347}
6348
6349/*
6350 * Restore the input line and position from a regsave_T.
6351 */
6352 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006353reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006354 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006355 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006356{
6357 if (REG_MULTI)
6358 {
6359 if (reglnum != save->rs_u.pos.lnum)
6360 {
6361 /* only call reg_getline() when the line number changed to save
6362 * a bit of time */
6363 reglnum = save->rs_u.pos.lnum;
6364 regline = reg_getline(reglnum);
6365 }
6366 reginput = regline + save->rs_u.pos.col;
6367 }
6368 else
6369 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006370 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006371}
6372
6373/*
6374 * Return TRUE if current position is equal to saved position.
6375 */
6376 static int
6377reg_save_equal(save)
6378 regsave_T *save;
6379{
6380 if (REG_MULTI)
6381 return reglnum == save->rs_u.pos.lnum
6382 && reginput == regline + save->rs_u.pos.col;
6383 return reginput == save->rs_u.ptr;
6384}
6385
6386/*
6387 * Tentatively set the sub-expression start to the current position (after
6388 * calling regmatch() they will have changed). Need to save the existing
6389 * values for when there is no match.
6390 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6391 * depending on REG_MULTI.
6392 */
6393 static void
6394save_se_multi(savep, posp)
6395 save_se_T *savep;
6396 lpos_T *posp;
6397{
6398 savep->se_u.pos = *posp;
6399 posp->lnum = reglnum;
6400 posp->col = (colnr_T)(reginput - regline);
6401}
6402
6403 static void
6404save_se_one(savep, pp)
6405 save_se_T *savep;
6406 char_u **pp;
6407{
6408 savep->se_u.ptr = *pp;
6409 *pp = reginput;
6410}
6411
6412/*
6413 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6414 */
6415 static int
6416re_num_cmp(val, scan)
6417 long_u val;
6418 char_u *scan;
6419{
6420 long_u n = OPERAND_MIN(scan);
6421
6422 if (OPERAND_CMP(scan) == '>')
6423 return val > n;
6424 if (OPERAND_CMP(scan) == '<')
6425 return val < n;
6426 return val == n;
6427}
6428
6429
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006430#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006431
6432/*
6433 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6434 */
6435 static void
6436regdump(pattern, r)
6437 char_u *pattern;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006438 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006439{
6440 char_u *s;
6441 int op = EXACTLY; /* Arbitrary non-END op. */
6442 char_u *next;
6443 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006444 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006445
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006446#ifdef BT_REGEXP_LOG
6447 f = fopen("bt_regexp_log.log", "a");
6448#else
6449 f = stdout;
6450#endif
6451 if (f == NULL)
6452 return;
6453 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006454
6455 s = r->program + 1;
6456 /*
6457 * Loop until we find the END that isn't before a referred next (an END
6458 * can also appear in a NOMATCH operand).
6459 */
6460 while (op != END || s <= end)
6461 {
6462 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006463 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006464 next = regnext(s);
6465 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006466 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006467 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006468 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006469 if (end < next)
6470 end = next;
6471 if (op == BRACE_LIMITS)
6472 {
6473 /* Two short ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006474 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006475 s += 8;
6476 }
6477 s += 3;
6478 if (op == ANYOF || op == ANYOF + ADD_NL
6479 || op == ANYBUT || op == ANYBUT + ADD_NL
6480 || op == EXACTLY)
6481 {
6482 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006483 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006484 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006485 fprintf(f, "%c", *s++);
6486 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006487 s++;
6488 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006489 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006490 }
6491
6492 /* Header fields of interest. */
6493 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006494 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006495 ? (char *)transchar(r->regstart)
6496 : "multibyte", r->regstart);
6497 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006498 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006499 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006500 fprintf(f, "must have \"%s\"", r->regmust);
6501 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006502
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006503#ifdef BT_REGEXP_LOG
6504 fclose(f);
6505#endif
6506}
6507#endif /* BT_REGEXP_DUMP */
6508
6509#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006510/*
6511 * regprop - printable representation of opcode
6512 */
6513 static char_u *
6514regprop(op)
6515 char_u *op;
6516{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006517 char *p;
6518 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006519
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006520 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006521
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006522 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006523 {
6524 case BOL:
6525 p = "BOL";
6526 break;
6527 case EOL:
6528 p = "EOL";
6529 break;
6530 case RE_BOF:
6531 p = "BOF";
6532 break;
6533 case RE_EOF:
6534 p = "EOF";
6535 break;
6536 case CURSOR:
6537 p = "CURSOR";
6538 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006539 case RE_VISUAL:
6540 p = "RE_VISUAL";
6541 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006542 case RE_LNUM:
6543 p = "RE_LNUM";
6544 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006545 case RE_MARK:
6546 p = "RE_MARK";
6547 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006548 case RE_COL:
6549 p = "RE_COL";
6550 break;
6551 case RE_VCOL:
6552 p = "RE_VCOL";
6553 break;
6554 case BOW:
6555 p = "BOW";
6556 break;
6557 case EOW:
6558 p = "EOW";
6559 break;
6560 case ANY:
6561 p = "ANY";
6562 break;
6563 case ANY + ADD_NL:
6564 p = "ANY+NL";
6565 break;
6566 case ANYOF:
6567 p = "ANYOF";
6568 break;
6569 case ANYOF + ADD_NL:
6570 p = "ANYOF+NL";
6571 break;
6572 case ANYBUT:
6573 p = "ANYBUT";
6574 break;
6575 case ANYBUT + ADD_NL:
6576 p = "ANYBUT+NL";
6577 break;
6578 case IDENT:
6579 p = "IDENT";
6580 break;
6581 case IDENT + ADD_NL:
6582 p = "IDENT+NL";
6583 break;
6584 case SIDENT:
6585 p = "SIDENT";
6586 break;
6587 case SIDENT + ADD_NL:
6588 p = "SIDENT+NL";
6589 break;
6590 case KWORD:
6591 p = "KWORD";
6592 break;
6593 case KWORD + ADD_NL:
6594 p = "KWORD+NL";
6595 break;
6596 case SKWORD:
6597 p = "SKWORD";
6598 break;
6599 case SKWORD + ADD_NL:
6600 p = "SKWORD+NL";
6601 break;
6602 case FNAME:
6603 p = "FNAME";
6604 break;
6605 case FNAME + ADD_NL:
6606 p = "FNAME+NL";
6607 break;
6608 case SFNAME:
6609 p = "SFNAME";
6610 break;
6611 case SFNAME + ADD_NL:
6612 p = "SFNAME+NL";
6613 break;
6614 case PRINT:
6615 p = "PRINT";
6616 break;
6617 case PRINT + ADD_NL:
6618 p = "PRINT+NL";
6619 break;
6620 case SPRINT:
6621 p = "SPRINT";
6622 break;
6623 case SPRINT + ADD_NL:
6624 p = "SPRINT+NL";
6625 break;
6626 case WHITE:
6627 p = "WHITE";
6628 break;
6629 case WHITE + ADD_NL:
6630 p = "WHITE+NL";
6631 break;
6632 case NWHITE:
6633 p = "NWHITE";
6634 break;
6635 case NWHITE + ADD_NL:
6636 p = "NWHITE+NL";
6637 break;
6638 case DIGIT:
6639 p = "DIGIT";
6640 break;
6641 case DIGIT + ADD_NL:
6642 p = "DIGIT+NL";
6643 break;
6644 case NDIGIT:
6645 p = "NDIGIT";
6646 break;
6647 case NDIGIT + ADD_NL:
6648 p = "NDIGIT+NL";
6649 break;
6650 case HEX:
6651 p = "HEX";
6652 break;
6653 case HEX + ADD_NL:
6654 p = "HEX+NL";
6655 break;
6656 case NHEX:
6657 p = "NHEX";
6658 break;
6659 case NHEX + ADD_NL:
6660 p = "NHEX+NL";
6661 break;
6662 case OCTAL:
6663 p = "OCTAL";
6664 break;
6665 case OCTAL + ADD_NL:
6666 p = "OCTAL+NL";
6667 break;
6668 case NOCTAL:
6669 p = "NOCTAL";
6670 break;
6671 case NOCTAL + ADD_NL:
6672 p = "NOCTAL+NL";
6673 break;
6674 case WORD:
6675 p = "WORD";
6676 break;
6677 case WORD + ADD_NL:
6678 p = "WORD+NL";
6679 break;
6680 case NWORD:
6681 p = "NWORD";
6682 break;
6683 case NWORD + ADD_NL:
6684 p = "NWORD+NL";
6685 break;
6686 case HEAD:
6687 p = "HEAD";
6688 break;
6689 case HEAD + ADD_NL:
6690 p = "HEAD+NL";
6691 break;
6692 case NHEAD:
6693 p = "NHEAD";
6694 break;
6695 case NHEAD + ADD_NL:
6696 p = "NHEAD+NL";
6697 break;
6698 case ALPHA:
6699 p = "ALPHA";
6700 break;
6701 case ALPHA + ADD_NL:
6702 p = "ALPHA+NL";
6703 break;
6704 case NALPHA:
6705 p = "NALPHA";
6706 break;
6707 case NALPHA + ADD_NL:
6708 p = "NALPHA+NL";
6709 break;
6710 case LOWER:
6711 p = "LOWER";
6712 break;
6713 case LOWER + ADD_NL:
6714 p = "LOWER+NL";
6715 break;
6716 case NLOWER:
6717 p = "NLOWER";
6718 break;
6719 case NLOWER + ADD_NL:
6720 p = "NLOWER+NL";
6721 break;
6722 case UPPER:
6723 p = "UPPER";
6724 break;
6725 case UPPER + ADD_NL:
6726 p = "UPPER+NL";
6727 break;
6728 case NUPPER:
6729 p = "NUPPER";
6730 break;
6731 case NUPPER + ADD_NL:
6732 p = "NUPPER+NL";
6733 break;
6734 case BRANCH:
6735 p = "BRANCH";
6736 break;
6737 case EXACTLY:
6738 p = "EXACTLY";
6739 break;
6740 case NOTHING:
6741 p = "NOTHING";
6742 break;
6743 case BACK:
6744 p = "BACK";
6745 break;
6746 case END:
6747 p = "END";
6748 break;
6749 case MOPEN + 0:
6750 p = "MATCH START";
6751 break;
6752 case MOPEN + 1:
6753 case MOPEN + 2:
6754 case MOPEN + 3:
6755 case MOPEN + 4:
6756 case MOPEN + 5:
6757 case MOPEN + 6:
6758 case MOPEN + 7:
6759 case MOPEN + 8:
6760 case MOPEN + 9:
6761 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6762 p = NULL;
6763 break;
6764 case MCLOSE + 0:
6765 p = "MATCH END";
6766 break;
6767 case MCLOSE + 1:
6768 case MCLOSE + 2:
6769 case MCLOSE + 3:
6770 case MCLOSE + 4:
6771 case MCLOSE + 5:
6772 case MCLOSE + 6:
6773 case MCLOSE + 7:
6774 case MCLOSE + 8:
6775 case MCLOSE + 9:
6776 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6777 p = NULL;
6778 break;
6779 case BACKREF + 1:
6780 case BACKREF + 2:
6781 case BACKREF + 3:
6782 case BACKREF + 4:
6783 case BACKREF + 5:
6784 case BACKREF + 6:
6785 case BACKREF + 7:
6786 case BACKREF + 8:
6787 case BACKREF + 9:
6788 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6789 p = NULL;
6790 break;
6791 case NOPEN:
6792 p = "NOPEN";
6793 break;
6794 case NCLOSE:
6795 p = "NCLOSE";
6796 break;
6797#ifdef FEAT_SYN_HL
6798 case ZOPEN + 1:
6799 case ZOPEN + 2:
6800 case ZOPEN + 3:
6801 case ZOPEN + 4:
6802 case ZOPEN + 5:
6803 case ZOPEN + 6:
6804 case ZOPEN + 7:
6805 case ZOPEN + 8:
6806 case ZOPEN + 9:
6807 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6808 p = NULL;
6809 break;
6810 case ZCLOSE + 1:
6811 case ZCLOSE + 2:
6812 case ZCLOSE + 3:
6813 case ZCLOSE + 4:
6814 case ZCLOSE + 5:
6815 case ZCLOSE + 6:
6816 case ZCLOSE + 7:
6817 case ZCLOSE + 8:
6818 case ZCLOSE + 9:
6819 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6820 p = NULL;
6821 break;
6822 case ZREF + 1:
6823 case ZREF + 2:
6824 case ZREF + 3:
6825 case ZREF + 4:
6826 case ZREF + 5:
6827 case ZREF + 6:
6828 case ZREF + 7:
6829 case ZREF + 8:
6830 case ZREF + 9:
6831 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6832 p = NULL;
6833 break;
6834#endif
6835 case STAR:
6836 p = "STAR";
6837 break;
6838 case PLUS:
6839 p = "PLUS";
6840 break;
6841 case NOMATCH:
6842 p = "NOMATCH";
6843 break;
6844 case MATCH:
6845 p = "MATCH";
6846 break;
6847 case BEHIND:
6848 p = "BEHIND";
6849 break;
6850 case NOBEHIND:
6851 p = "NOBEHIND";
6852 break;
6853 case SUBPAT:
6854 p = "SUBPAT";
6855 break;
6856 case BRACE_LIMITS:
6857 p = "BRACE_LIMITS";
6858 break;
6859 case BRACE_SIMPLE:
6860 p = "BRACE_SIMPLE";
6861 break;
6862 case BRACE_COMPLEX + 0:
6863 case BRACE_COMPLEX + 1:
6864 case BRACE_COMPLEX + 2:
6865 case BRACE_COMPLEX + 3:
6866 case BRACE_COMPLEX + 4:
6867 case BRACE_COMPLEX + 5:
6868 case BRACE_COMPLEX + 6:
6869 case BRACE_COMPLEX + 7:
6870 case BRACE_COMPLEX + 8:
6871 case BRACE_COMPLEX + 9:
6872 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6873 p = NULL;
6874 break;
6875#ifdef FEAT_MBYTE
6876 case MULTIBYTECODE:
6877 p = "MULTIBYTECODE";
6878 break;
6879#endif
6880 case NEWL:
6881 p = "NEWL";
6882 break;
6883 default:
6884 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6885 p = NULL;
6886 break;
6887 }
6888 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006889 STRCAT(buf, p);
6890 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006891}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006892#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006893
6894#ifdef FEAT_MBYTE
6895static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6896
6897typedef struct
6898{
6899 int a, b, c;
6900} decomp_T;
6901
6902
6903/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006904static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006905{
6906 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6907 {0x5d0,0,0}, /* 0xfb21 alt alef */
6908 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6909 {0x5d4,0,0}, /* 0xfb23 alt he */
6910 {0x5db,0,0}, /* 0xfb24 alt kaf */
6911 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6912 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6913 {0x5e8,0,0}, /* 0xfb27 alt resh */
6914 {0x5ea,0,0}, /* 0xfb28 alt tav */
6915 {'+', 0, 0}, /* 0xfb29 alt plus */
6916 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6917 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6918 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6919 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6920 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6921 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6922 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6923 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6924 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6925 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6926 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6927 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6928 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
6929 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
6930 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
6931 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
6932 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
6933 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
6934 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
6935 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
6936 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
6937 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
6938 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
6939 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
6940 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
6941 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
6942 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
6943 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
6944 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
6945 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
6946 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
6947 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
6948 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
6949 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
6950 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
6951 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
6952 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
6953 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
6954};
6955
6956 static void
6957mb_decompose(c, c1, c2, c3)
6958 int c, *c1, *c2, *c3;
6959{
6960 decomp_T d;
6961
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02006962 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006963 {
6964 d = decomp_table[c - 0xfb20];
6965 *c1 = d.a;
6966 *c2 = d.b;
6967 *c3 = d.c;
6968 }
6969 else
6970 {
6971 *c1 = c;
6972 *c2 = *c3 = 0;
6973 }
6974}
6975#endif
6976
6977/*
6978 * Compare two strings, ignore case if ireg_ic set.
6979 * Return 0 if strings match, non-zero otherwise.
6980 * Correct the length "*n" when composing characters are ignored.
6981 */
6982 static int
6983cstrncmp(s1, s2, n)
6984 char_u *s1, *s2;
6985 int *n;
6986{
6987 int result;
6988
6989 if (!ireg_ic)
6990 result = STRNCMP(s1, s2, *n);
6991 else
6992 result = MB_STRNICMP(s1, s2, *n);
6993
6994#ifdef FEAT_MBYTE
6995 /* if it failed and it's utf8 and we want to combineignore: */
6996 if (result != 0 && enc_utf8 && ireg_icombine)
6997 {
6998 char_u *str1, *str2;
6999 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007000 int junk;
7001
7002 /* we have to handle the strcmp ourselves, since it is necessary to
7003 * deal with the composing characters by ignoring them: */
7004 str1 = s1;
7005 str2 = s2;
7006 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007007 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007008 {
7009 c1 = mb_ptr2char_adv(&str1);
7010 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007011
7012 /* decompose the character if necessary, into 'base' characters
7013 * because I don't care about Arabic, I will hard-code the Hebrew
7014 * which I *do* care about! So sue me... */
7015 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
7016 {
7017 /* decomposition necessary? */
7018 mb_decompose(c1, &c11, &junk, &junk);
7019 mb_decompose(c2, &c12, &junk, &junk);
7020 c1 = c11;
7021 c2 = c12;
7022 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
7023 break;
7024 }
7025 }
7026 result = c2 - c1;
7027 if (result == 0)
7028 *n = (int)(str2 - s2);
7029 }
7030#endif
7031
7032 return result;
7033}
7034
7035/*
7036 * cstrchr: This function is used a lot for simple searches, keep it fast!
7037 */
7038 static char_u *
7039cstrchr(s, c)
7040 char_u *s;
7041 int c;
7042{
7043 char_u *p;
7044 int cc;
7045
7046 if (!ireg_ic
7047#ifdef FEAT_MBYTE
7048 || (!enc_utf8 && mb_char2len(c) > 1)
7049#endif
7050 )
7051 return vim_strchr(s, c);
7052
7053 /* tolower() and toupper() can be slow, comparing twice should be a lot
7054 * faster (esp. when using MS Visual C++!).
7055 * For UTF-8 need to use folded case. */
7056#ifdef FEAT_MBYTE
7057 if (enc_utf8 && c > 0x80)
7058 cc = utf_fold(c);
7059 else
7060#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007061 if (MB_ISUPPER(c))
7062 cc = MB_TOLOWER(c);
7063 else if (MB_ISLOWER(c))
7064 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007065 else
7066 return vim_strchr(s, c);
7067
7068#ifdef FEAT_MBYTE
7069 if (has_mbyte)
7070 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007071 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007072 {
7073 if (enc_utf8 && c > 0x80)
7074 {
7075 if (utf_fold(utf_ptr2char(p)) == cc)
7076 return p;
7077 }
7078 else if (*p == c || *p == cc)
7079 return p;
7080 }
7081 }
7082 else
7083#endif
7084 /* Faster version for when there are no multi-byte characters. */
7085 for (p = s; *p != NUL; ++p)
7086 if (*p == c || *p == cc)
7087 return p;
7088
7089 return NULL;
7090}
7091
7092/***************************************************************
7093 * regsub stuff *
7094 ***************************************************************/
7095
7096/* This stuff below really confuses cc on an SGI -- webb */
7097#ifdef __sgi
7098# undef __ARGS
7099# define __ARGS(x) ()
7100#endif
7101
7102/*
7103 * We should define ftpr as a pointer to a function returning a pointer to
7104 * a function returning a pointer to a function ...
7105 * This is impossible, so we declare a pointer to a function returning a
7106 * pointer to a function returning void. This should work for all compilers.
7107 */
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007108typedef void (*(*fptr_T) __ARGS((int *, int)))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007109
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007110static fptr_T do_upper __ARGS((int *, int));
7111static fptr_T do_Upper __ARGS((int *, int));
7112static fptr_T do_lower __ARGS((int *, int));
7113static fptr_T do_Lower __ARGS((int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007114
7115static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
7116
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007117 static fptr_T
Bram Moolenaar071d4272004-06-13 20:20:40 +00007118do_upper(d, c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007119 int *d;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007120 int c;
7121{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007122 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007123
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007124 return (fptr_T)NULL;
7125}
7126
7127 static fptr_T
7128do_Upper(d, c)
7129 int *d;
7130 int c;
7131{
7132 *d = MB_TOUPPER(c);
7133
7134 return (fptr_T)do_Upper;
7135}
7136
7137 static fptr_T
7138do_lower(d, c)
7139 int *d;
7140 int c;
7141{
7142 *d = MB_TOLOWER(c);
7143
7144 return (fptr_T)NULL;
7145}
7146
7147 static fptr_T
7148do_Lower(d, c)
7149 int *d;
7150 int c;
7151{
7152 *d = MB_TOLOWER(c);
7153
7154 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007155}
7156
7157/*
7158 * regtilde(): Replace tildes in the pattern by the old pattern.
7159 *
7160 * Short explanation of the tilde: It stands for the previous replacement
7161 * pattern. If that previous pattern also contains a ~ we should go back a
7162 * step further... But we insert the previous pattern into the current one
7163 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007164 * This still does not handle the case where "magic" changes. So require the
7165 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007166 *
7167 * The tildes are parsed once before the first call to vim_regsub().
7168 */
7169 char_u *
7170regtilde(source, magic)
7171 char_u *source;
7172 int magic;
7173{
7174 char_u *newsub = source;
7175 char_u *tmpsub;
7176 char_u *p;
7177 int len;
7178 int prevlen;
7179
7180 for (p = newsub; *p; ++p)
7181 {
7182 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7183 {
7184 if (reg_prev_sub != NULL)
7185 {
7186 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7187 prevlen = (int)STRLEN(reg_prev_sub);
7188 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7189 if (tmpsub != NULL)
7190 {
7191 /* copy prefix */
7192 len = (int)(p - newsub); /* not including ~ */
7193 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007194 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007195 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7196 /* copy postfix */
7197 if (!magic)
7198 ++p; /* back off \ */
7199 STRCPY(tmpsub + len + prevlen, p + 1);
7200
7201 if (newsub != source) /* already allocated newsub */
7202 vim_free(newsub);
7203 newsub = tmpsub;
7204 p = newsub + len + prevlen;
7205 }
7206 }
7207 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007208 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007209 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007210 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007211 --p;
7212 }
7213 else
7214 {
7215 if (*p == '\\' && p[1]) /* skip escaped characters */
7216 ++p;
7217#ifdef FEAT_MBYTE
7218 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007219 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007220#endif
7221 }
7222 }
7223
7224 vim_free(reg_prev_sub);
7225 if (newsub != source) /* newsub was allocated, just keep it */
7226 reg_prev_sub = newsub;
7227 else /* no ~ found, need to save newsub */
7228 reg_prev_sub = vim_strsave(newsub);
7229 return newsub;
7230}
7231
7232#ifdef FEAT_EVAL
7233static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7234
7235/* These pointers are used instead of reg_match and reg_mmatch for
7236 * reg_submatch(). Needed for when the substitution string is an expression
7237 * that contains a call to substitute() and submatch(). */
7238static regmatch_T *submatch_match;
7239static regmmatch_T *submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007240static linenr_T submatch_firstlnum;
7241static linenr_T submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007242static int submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007243#endif
7244
7245#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
7246/*
7247 * vim_regsub() - perform substitutions after a vim_regexec() or
7248 * vim_regexec_multi() match.
7249 *
7250 * If "copy" is TRUE really copy into "dest".
7251 * If "copy" is FALSE nothing is copied, this is just to find out the length
7252 * of the result.
7253 *
7254 * If "backslash" is TRUE, a backslash will be removed later, need to double
7255 * them to keep them, and insert a backslash before a CR to avoid it being
7256 * replaced with a line break later.
7257 *
7258 * Note: The matched text must not change between the call of
7259 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7260 * references invalid!
7261 *
7262 * Returns the size of the replacement, including terminating NUL.
7263 */
7264 int
7265vim_regsub(rmp, source, dest, copy, magic, backslash)
7266 regmatch_T *rmp;
7267 char_u *source;
7268 char_u *dest;
7269 int copy;
7270 int magic;
7271 int backslash;
7272{
7273 reg_match = rmp;
7274 reg_mmatch = NULL;
7275 reg_maxline = 0;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01007276 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007277 return vim_regsub_both(source, dest, copy, magic, backslash);
7278}
7279#endif
7280
7281 int
7282vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
7283 regmmatch_T *rmp;
7284 linenr_T lnum;
7285 char_u *source;
7286 char_u *dest;
7287 int copy;
7288 int magic;
7289 int backslash;
7290{
7291 reg_match = NULL;
7292 reg_mmatch = rmp;
7293 reg_buf = curbuf; /* always works on the current buffer! */
7294 reg_firstlnum = lnum;
7295 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7296 return vim_regsub_both(source, dest, copy, magic, backslash);
7297}
7298
7299 static int
7300vim_regsub_both(source, dest, copy, magic, backslash)
7301 char_u *source;
7302 char_u *dest;
7303 int copy;
7304 int magic;
7305 int backslash;
7306{
7307 char_u *src;
7308 char_u *dst;
7309 char_u *s;
7310 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007311 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007312 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007313 fptr_T func_all = (fptr_T)NULL;
7314 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007315 linenr_T clnum = 0; /* init for GCC */
7316 int len = 0; /* init for GCC */
7317#ifdef FEAT_EVAL
7318 static char_u *eval_result = NULL;
7319#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007320
7321 /* Be paranoid... */
7322 if (source == NULL || dest == NULL)
7323 {
7324 EMSG(_(e_null));
7325 return 0;
7326 }
7327 if (prog_magic_wrong())
7328 return 0;
7329 src = source;
7330 dst = dest;
7331
7332 /*
7333 * When the substitute part starts with "\=" evaluate it as an expression.
7334 */
7335 if (source[0] == '\\' && source[1] == '='
7336#ifdef FEAT_EVAL
7337 && !can_f_submatch /* can't do this recursively */
7338#endif
7339 )
7340 {
7341#ifdef FEAT_EVAL
7342 /* To make sure that the length doesn't change between checking the
7343 * length and copying the string, and to speed up things, the
7344 * resulting string is saved from the call with "copy" == FALSE to the
7345 * call with "copy" == TRUE. */
7346 if (copy)
7347 {
7348 if (eval_result != NULL)
7349 {
7350 STRCPY(dest, eval_result);
7351 dst += STRLEN(eval_result);
7352 vim_free(eval_result);
7353 eval_result = NULL;
7354 }
7355 }
7356 else
7357 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007358 win_T *save_reg_win;
7359 int save_ireg_ic;
7360
7361 vim_free(eval_result);
7362
7363 /* The expression may contain substitute(), which calls us
7364 * recursively. Make sure submatch() gets the text from the first
7365 * level. Don't need to save "reg_buf", because
7366 * vim_regexec_multi() can't be called recursively. */
7367 submatch_match = reg_match;
7368 submatch_mmatch = reg_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007369 submatch_firstlnum = reg_firstlnum;
7370 submatch_maxline = reg_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007371 submatch_line_lbr = reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007372 save_reg_win = reg_win;
7373 save_ireg_ic = ireg_ic;
7374 can_f_submatch = TRUE;
7375
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007376 eval_result = eval_to_string(source + 2, NULL, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007377 if (eval_result != NULL)
7378 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007379 int had_backslash = FALSE;
7380
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00007381 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007382 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007383 /* Change NL to CR, so that it becomes a line break,
7384 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007385 * Skip over a backslashed character. */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007386 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007387 *s = CAR;
7388 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007389 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007390 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007391 /* Change NL to CR here too, so that this works:
7392 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7393 * abc\
7394 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007395 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007396 */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007397 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007398 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007399 had_backslash = TRUE;
7400 }
7401 }
7402 if (had_backslash && backslash)
7403 {
7404 /* Backslashes will be consumed, need to double them. */
7405 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7406 if (s != NULL)
7407 {
7408 vim_free(eval_result);
7409 eval_result = s;
7410 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007411 }
7412
7413 dst += STRLEN(eval_result);
7414 }
7415
7416 reg_match = submatch_match;
7417 reg_mmatch = submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007418 reg_firstlnum = submatch_firstlnum;
7419 reg_maxline = submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007420 reg_line_lbr = submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007421 reg_win = save_reg_win;
7422 ireg_ic = save_ireg_ic;
7423 can_f_submatch = FALSE;
7424 }
7425#endif
7426 }
7427 else
7428 while ((c = *src++) != NUL)
7429 {
7430 if (c == '&' && magic)
7431 no = 0;
7432 else if (c == '\\' && *src != NUL)
7433 {
7434 if (*src == '&' && !magic)
7435 {
7436 ++src;
7437 no = 0;
7438 }
7439 else if ('0' <= *src && *src <= '9')
7440 {
7441 no = *src++ - '0';
7442 }
7443 else if (vim_strchr((char_u *)"uUlLeE", *src))
7444 {
7445 switch (*src++)
7446 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007447 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007448 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007449 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007450 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007451 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007452 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007453 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007454 continue;
7455 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007456 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007457 continue;
7458 }
7459 }
7460 }
7461 if (no < 0) /* Ordinary character. */
7462 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007463 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7464 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007465 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007466 if (copy)
7467 {
7468 *dst++ = c;
7469 *dst++ = *src++;
7470 *dst++ = *src++;
7471 }
7472 else
7473 {
7474 dst += 3;
7475 src += 2;
7476 }
7477 continue;
7478 }
7479
Bram Moolenaar071d4272004-06-13 20:20:40 +00007480 if (c == '\\' && *src != NUL)
7481 {
7482 /* Check for abbreviations -- webb */
7483 switch (*src)
7484 {
7485 case 'r': c = CAR; ++src; break;
7486 case 'n': c = NL; ++src; break;
7487 case 't': c = TAB; ++src; break;
7488 /* Oh no! \e already has meaning in subst pat :-( */
7489 /* case 'e': c = ESC; ++src; break; */
7490 case 'b': c = Ctrl_H; ++src; break;
7491
7492 /* If "backslash" is TRUE the backslash will be removed
7493 * later. Used to insert a literal CR. */
7494 default: if (backslash)
7495 {
7496 if (copy)
7497 *dst = '\\';
7498 ++dst;
7499 }
7500 c = *src++;
7501 }
7502 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007503#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007504 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007505 c = mb_ptr2char(src - 1);
7506#endif
7507
Bram Moolenaardb552d602006-03-23 22:59:57 +00007508 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007509 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007510 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007511 func_one = (fptr_T)(func_one(&cc, c));
7512 else if (func_all != (fptr_T)NULL)
7513 /* Turbo C complains without the typecast */
7514 func_all = (fptr_T)(func_all(&cc, c));
7515 else /* just copy */
7516 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007517
7518#ifdef FEAT_MBYTE
7519 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007520 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007521 int totlen = mb_ptr2len(src - 1);
7522
Bram Moolenaar071d4272004-06-13 20:20:40 +00007523 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007524 mb_char2bytes(cc, dst);
7525 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007526 if (enc_utf8)
7527 {
7528 int clen = utf_ptr2len(src - 1);
7529
7530 /* If the character length is shorter than "totlen", there
7531 * are composing characters; copy them as-is. */
7532 if (clen < totlen)
7533 {
7534 if (copy)
7535 mch_memmove(dst + 1, src - 1 + clen,
7536 (size_t)(totlen - clen));
7537 dst += totlen - clen;
7538 }
7539 }
7540 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007541 }
7542 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007543#endif
7544 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007545 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007546 dst++;
7547 }
7548 else
7549 {
7550 if (REG_MULTI)
7551 {
7552 clnum = reg_mmatch->startpos[no].lnum;
7553 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7554 s = NULL;
7555 else
7556 {
7557 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7558 if (reg_mmatch->endpos[no].lnum == clnum)
7559 len = reg_mmatch->endpos[no].col
7560 - reg_mmatch->startpos[no].col;
7561 else
7562 len = (int)STRLEN(s);
7563 }
7564 }
7565 else
7566 {
7567 s = reg_match->startp[no];
7568 if (reg_match->endp[no] == NULL)
7569 s = NULL;
7570 else
7571 len = (int)(reg_match->endp[no] - s);
7572 }
7573 if (s != NULL)
7574 {
7575 for (;;)
7576 {
7577 if (len == 0)
7578 {
7579 if (REG_MULTI)
7580 {
7581 if (reg_mmatch->endpos[no].lnum == clnum)
7582 break;
7583 if (copy)
7584 *dst = CAR;
7585 ++dst;
7586 s = reg_getline(++clnum);
7587 if (reg_mmatch->endpos[no].lnum == clnum)
7588 len = reg_mmatch->endpos[no].col;
7589 else
7590 len = (int)STRLEN(s);
7591 }
7592 else
7593 break;
7594 }
7595 else if (*s == NUL) /* we hit NUL. */
7596 {
7597 if (copy)
7598 EMSG(_(e_re_damg));
7599 goto exit;
7600 }
7601 else
7602 {
7603 if (backslash && (*s == CAR || *s == '\\'))
7604 {
7605 /*
7606 * Insert a backslash in front of a CR, otherwise
7607 * it will be replaced by a line break.
7608 * Number of backslashes will be halved later,
7609 * double them here.
7610 */
7611 if (copy)
7612 {
7613 dst[0] = '\\';
7614 dst[1] = *s;
7615 }
7616 dst += 2;
7617 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007618 else
7619 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007620#ifdef FEAT_MBYTE
7621 if (has_mbyte)
7622 c = mb_ptr2char(s);
7623 else
7624#endif
7625 c = *s;
7626
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007627 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007628 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007629 func_one = (fptr_T)(func_one(&cc, c));
7630 else if (func_all != (fptr_T)NULL)
7631 /* Turbo C complains without the typecast */
7632 func_all = (fptr_T)(func_all(&cc, c));
7633 else /* just copy */
7634 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007635
7636#ifdef FEAT_MBYTE
7637 if (has_mbyte)
7638 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007639 int l;
7640
7641 /* Copy composing characters separately, one
7642 * at a time. */
7643 if (enc_utf8)
7644 l = utf_ptr2len(s) - 1;
7645 else
7646 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007647
7648 s += l;
7649 len -= l;
7650 if (copy)
7651 mb_char2bytes(cc, dst);
7652 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007653 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007654 else
7655#endif
7656 if (copy)
7657 *dst = cc;
7658 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007659 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007660
Bram Moolenaar071d4272004-06-13 20:20:40 +00007661 ++s;
7662 --len;
7663 }
7664 }
7665 }
7666 no = -1;
7667 }
7668 }
7669 if (copy)
7670 *dst = NUL;
7671
7672exit:
7673 return (int)((dst - dest) + 1);
7674}
7675
7676#ifdef FEAT_EVAL
Bram Moolenaard32a3192009-11-26 19:40:49 +00007677static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7678
Bram Moolenaar071d4272004-06-13 20:20:40 +00007679/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007680 * Call reg_getline() with the line numbers from the submatch. If a
7681 * substitute() was used the reg_maxline and other values have been
7682 * overwritten.
7683 */
7684 static char_u *
7685reg_getline_submatch(lnum)
7686 linenr_T lnum;
7687{
7688 char_u *s;
7689 linenr_T save_first = reg_firstlnum;
7690 linenr_T save_max = reg_maxline;
7691
7692 reg_firstlnum = submatch_firstlnum;
7693 reg_maxline = submatch_maxline;
7694
7695 s = reg_getline(lnum);
7696
7697 reg_firstlnum = save_first;
7698 reg_maxline = save_max;
7699 return s;
7700}
7701
7702/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007703 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007704 * allocated memory.
7705 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7706 */
7707 char_u *
7708reg_submatch(no)
7709 int no;
7710{
7711 char_u *retval = NULL;
7712 char_u *s;
7713 int len;
7714 int round;
7715 linenr_T lnum;
7716
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007717 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007718 return NULL;
7719
7720 if (submatch_match == NULL)
7721 {
7722 /*
7723 * First round: compute the length and allocate memory.
7724 * Second round: copy the text.
7725 */
7726 for (round = 1; round <= 2; ++round)
7727 {
7728 lnum = submatch_mmatch->startpos[no].lnum;
7729 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7730 return NULL;
7731
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007732 s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007733 if (s == NULL) /* anti-crash check, cannot happen? */
7734 break;
7735 if (submatch_mmatch->endpos[no].lnum == lnum)
7736 {
7737 /* Within one line: take form start to end col. */
7738 len = submatch_mmatch->endpos[no].col
7739 - submatch_mmatch->startpos[no].col;
7740 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007741 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007742 ++len;
7743 }
7744 else
7745 {
7746 /* Multiple lines: take start line from start col, middle
7747 * lines completely and end line up to end col. */
7748 len = (int)STRLEN(s);
7749 if (round == 2)
7750 {
7751 STRCPY(retval, s);
7752 retval[len] = '\n';
7753 }
7754 ++len;
7755 ++lnum;
7756 while (lnum < submatch_mmatch->endpos[no].lnum)
7757 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007758 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007759 if (round == 2)
7760 STRCPY(retval + len, s);
7761 len += (int)STRLEN(s);
7762 if (round == 2)
7763 retval[len] = '\n';
7764 ++len;
7765 }
7766 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007767 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00007768 submatch_mmatch->endpos[no].col);
7769 len += submatch_mmatch->endpos[no].col;
7770 if (round == 2)
7771 retval[len] = NUL;
7772 ++len;
7773 }
7774
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007775 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007776 {
7777 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007778 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007779 return NULL;
7780 }
7781 }
7782 }
7783 else
7784 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00007785 s = submatch_match->startp[no];
7786 if (s == NULL || submatch_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007787 retval = NULL;
7788 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007789 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007790 }
7791
7792 return retval;
7793}
7794#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007795
7796static regengine_T bt_regengine =
7797{
7798 bt_regcomp,
7799 bt_regexec,
7800#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7801 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7802 bt_regexec_nl,
7803#endif
7804 bt_regexec_multi
7805#ifdef DEBUG
7806 ,(char_u *)""
7807#endif
7808};
7809
7810
7811#include "regexp_nfa.c"
7812
7813static regengine_T nfa_regengine =
7814{
7815 nfa_regcomp,
7816 nfa_regexec,
7817#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7818 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7819 nfa_regexec_nl,
7820#endif
7821 nfa_regexec_multi
7822#ifdef DEBUG
7823 ,(char_u *)""
7824#endif
7825};
7826
7827/* Which regexp engine to use? Needed for vim_regcomp().
7828 * Must match with 'regexpengine'. */
7829static int regexp_engine = 0;
7830#define AUTOMATIC_ENGINE 0
7831#define BACKTRACKING_ENGINE 1
7832#define NFA_ENGINE 2
7833#ifdef DEBUG
7834static char_u regname[][30] = {
7835 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02007836 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007837 "NFA Regexp Engine"
7838 };
7839#endif
7840
7841/*
7842 * Compile a regular expression into internal code.
7843 * Returns the program in allocated memory. Returns NULL for an error.
7844 */
7845 regprog_T *
7846vim_regcomp(expr_arg, re_flags)
7847 char_u *expr_arg;
7848 int re_flags;
7849{
7850 regprog_T *prog = NULL;
7851 char_u *expr = expr_arg;
7852
7853 syntax_error = FALSE;
7854 regexp_engine = p_re;
7855
7856 /* Check for prefix "\%#=", that sets the regexp engine */
7857 if (STRNCMP(expr, "\\%#=", 4) == 0)
7858 {
7859 int newengine = expr[4] - '0';
7860
7861 if (newengine == AUTOMATIC_ENGINE
7862 || newengine == BACKTRACKING_ENGINE
7863 || newengine == NFA_ENGINE)
7864 {
7865 regexp_engine = expr[4] - '0';
7866 expr += 5;
7867#ifdef DEBUG
7868 EMSG3("New regexp mode selected (%d): %s", regexp_engine,
7869 regname[newengine]);
7870#endif
7871 }
7872 else
7873 {
7874 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
7875 regexp_engine = AUTOMATIC_ENGINE;
7876 }
7877 }
7878#ifdef DEBUG
7879 bt_regengine.expr = expr;
7880 nfa_regengine.expr = expr;
7881#endif
7882
7883 /*
7884 * First try the NFA engine, unless backtracking was requested.
7885 */
7886 if (regexp_engine != BACKTRACKING_ENGINE)
7887 prog = nfa_regengine.regcomp(expr, re_flags);
7888 else
7889 prog = bt_regengine.regcomp(expr, re_flags);
7890
7891 if (prog == NULL) /* error compiling regexp with initial engine */
7892 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007893#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007894 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
7895 {
7896 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007897 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007898 if (f)
7899 {
7900 if (!syntax_error)
7901 fprintf(f, "NFA engine could not handle \"%s\"\n", expr);
7902 else
7903 fprintf(f, "Syntax error in \"%s\"\n", expr);
7904 fclose(f);
7905 }
7906 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007907 EMSG2("(NFA) Could not open \"%s\" to write !!!",
7908 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007909 /*
7910 if (syntax_error)
7911 EMSG("NFA Regexp: Syntax Error !");
7912 */
7913 }
7914#endif
7915 /*
7916 * If NFA engine failed, then revert to the backtracking engine.
7917 * Except when there was a syntax error, which was properly handled by
7918 * NFA engine.
7919 */
7920 if (regexp_engine == AUTOMATIC_ENGINE)
7921 if (!syntax_error)
7922 prog = bt_regengine.regcomp(expr, re_flags);
7923
7924 } /* endif prog==NULL */
7925
7926
7927 return prog;
7928}
7929
7930/*
7931 * Match a regexp against a string.
7932 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
7933 * Uses curbuf for line count and 'iskeyword'.
7934 *
7935 * Return TRUE if there is a match, FALSE if not.
7936 */
7937 int
7938vim_regexec(rmp, line, col)
7939 regmatch_T *rmp;
7940 char_u *line; /* string to match against */
7941 colnr_T col; /* column to start looking for match */
7942{
7943 return rmp->regprog->engine->regexec(rmp, line, col);
7944}
7945
7946#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7947 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7948/*
7949 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
7950 */
7951 int
7952vim_regexec_nl(rmp, line, col)
7953 regmatch_T *rmp;
7954 char_u *line;
7955 colnr_T col;
7956{
7957 return rmp->regprog->engine->regexec_nl(rmp, line, col);
7958}
7959#endif
7960
7961/*
7962 * Match a regexp against multiple lines.
7963 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
7964 * Uses curbuf for line count and 'iskeyword'.
7965 *
7966 * Return zero if there is no match. Return number of lines contained in the
7967 * match otherwise.
7968 */
7969 long
7970vim_regexec_multi(rmp, win, buf, lnum, col, tm)
7971 regmmatch_T *rmp;
7972 win_T *win; /* window in which to search or NULL */
7973 buf_T *buf; /* buffer in which to search */
7974 linenr_T lnum; /* nr of line to start looking for match */
7975 colnr_T col; /* column to start looking for match */
7976 proftime_T *tm; /* timeout limit or NULL */
7977{
7978 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
7979}