blob: 3eaf74dd93b2daac89dcc0d0caa30b2b36a9ded1 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020041/* Uncomment the first if you do not want to see debugging logs or files
42 * related to regular expressions, even when compiling with -DDEBUG.
43 * Uncomment the second to get the regexp debugging. */
44/* #undef DEBUG */
45/* #define DEBUG */
46
Bram Moolenaar071d4272004-06-13 20:20:40 +000047#include "vim.h"
48
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020049#ifdef DEBUG
50/* show/save debugging data when BT engine is used */
51# define BT_REGEXP_DUMP
52/* save the debugging data to a file instead of displaying it */
53# define BT_REGEXP_LOG
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +020054# define BT_REGEXP_DEBUG_LOG
55# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +000057
58/*
59 * The "internal use only" fields in regexp.h are present to pass info from
60 * compile to execute that permits the execute phase to run lots faster on
61 * simple cases. They are:
62 *
63 * regstart char that must begin a match; NUL if none obvious; Can be a
64 * multi-byte character.
65 * reganch is the match anchored (at beginning-of-line only)?
66 * regmust string (pointer into program) that match must include, or NULL
67 * regmlen length of regmust string
68 * regflags RF_ values or'ed together
69 *
70 * Regstart and reganch permit very fast decisions on suitable starting points
71 * for a match, cutting down the work a lot. Regmust permits fast rejection
72 * of lines that cannot possibly match. The regmust tests are costly enough
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something
74 * potentially expensive (at present, the only such thing detected is * or +
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77 * computing it anyway.
78 */
79
80/*
81 * Structure for regexp "program". This is essentially a linear encoding
82 * of a nondeterministic finite-state machine (aka syntax charts or
83 * "railroad normal form" in parsing technology). Each node is an opcode
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86 * pointer with a BRANCH on both ends of it is connecting two alternatives.
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
88 * (as opposed to a collection of them) is never concatenated with anything
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000090 * node points to the node after the stuff to be repeated.
91 * The operand of some types of node is a literal string; for others, it is a
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
93 * is the first node of the branch.
94 * (NB this is *not* a tree structure: the tail of the branch connects to the
95 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000096 *
97 * pattern is coded like:
98 *
99 * +-----------------+
100 * | V
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
102 * | ^ | ^
103 * +------+ +----------+
104 *
105 *
106 * +------------------+
107 * V |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109 * | | ^ ^
110 * | +---------------+ |
111 * +---------------------------------------------+
112 *
113 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000114 * +----------------------+
115 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaarc9b4b052006-04-30 18:54:39 +0000117 * | | ^ ^
118 * | +-----------+ |
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000119 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000120 *
121 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122 * +-------------------------+
123 * V |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
125 * | | ^
126 * | +----------------+
127 * +-----------------------------------------------+
128 *
129 *
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
131 * | | ^ ^
132 * | +----------------+ |
133 * +--------------------------------+
134 *
135 * +---------+
136 * | V
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
138 * | | | | ^ ^
139 * | | | +-----+ |
140 * | | +----------------+ |
141 * | +---------------------------+ |
142 * +------------------------------------------------------+
143 *
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +0000144 * They all start with a BRANCH for "\|" alternatives, even when there is only
Bram Moolenaar071d4272004-06-13 20:20:40 +0000145 * one alternative.
146 */
147
148/*
149 * The opcodes are:
150 */
151
152/* definition number opnd? meaning */
153#define END 0 /* End of program or NOMATCH operand. */
154#define BOL 1 /* Match "" at beginning of line. */
155#define EOL 2 /* Match "" at end of line. */
156#define BRANCH 3 /* node Match this alternative, or the
157 * next... */
158#define BACK 4 /* Match "", "next" ptr points backward. */
159#define EXACTLY 5 /* str Match this string. */
160#define NOTHING 6 /* Match empty string. */
161#define STAR 7 /* node Match this (simple) thing 0 or more
162 * times. */
163#define PLUS 8 /* node Match this (simple) thing 1 or more
164 * times. */
165#define MATCH 9 /* node match the operand zero-width */
166#define NOMATCH 10 /* node check for no match with operand */
167#define BEHIND 11 /* node look behind for a match with operand */
168#define NOBEHIND 12 /* node look behind for no match with operand */
169#define SUBPAT 13 /* node match the operand here */
170#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
171 * n times (\{m,n\}). */
172#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
173#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
174#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
175 * and BRACE_COMPLEX. */
176#define NEWL 18 /* Match line-break */
177#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
178
179
180/* character classes: 20-48 normal, 50-78 include a line-break */
181#define ADD_NL 30
182#define FIRST_NL ANY + ADD_NL
183#define ANY 20 /* Match any one character. */
184#define ANYOF 21 /* str Match any character in this string. */
185#define ANYBUT 22 /* str Match any character not in this
186 * string. */
187#define IDENT 23 /* Match identifier char */
188#define SIDENT 24 /* Match identifier char but no digit */
189#define KWORD 25 /* Match keyword char */
190#define SKWORD 26 /* Match word char but no digit */
191#define FNAME 27 /* Match file name char */
192#define SFNAME 28 /* Match file name char but no digit */
193#define PRINT 29 /* Match printable char */
194#define SPRINT 30 /* Match printable char but no digit */
195#define WHITE 31 /* Match whitespace char */
196#define NWHITE 32 /* Match non-whitespace char */
197#define DIGIT 33 /* Match digit char */
198#define NDIGIT 34 /* Match non-digit char */
199#define HEX 35 /* Match hex char */
200#define NHEX 36 /* Match non-hex char */
201#define OCTAL 37 /* Match octal char */
202#define NOCTAL 38 /* Match non-octal char */
203#define WORD 39 /* Match word char */
204#define NWORD 40 /* Match non-word char */
205#define HEAD 41 /* Match head char */
206#define NHEAD 42 /* Match non-head char */
207#define ALPHA 43 /* Match alpha char */
208#define NALPHA 44 /* Match non-alpha char */
209#define LOWER 45 /* Match lowercase char */
210#define NLOWER 46 /* Match non-lowercase char */
211#define UPPER 47 /* Match uppercase char */
212#define NUPPER 48 /* Match non-uppercase char */
213#define LAST_NL NUPPER + ADD_NL
214#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
215
216#define MOPEN 80 /* -89 Mark this point in input as start of
217 * \( subexpr. MOPEN + 0 marks start of
218 * match. */
219#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
220 * end of match. */
221#define BACKREF 100 /* -109 node Match same string again \1-\9 */
222
223#ifdef FEAT_SYN_HL
224# define ZOPEN 110 /* -119 Mark this point in input as start of
225 * \z( subexpr. */
226# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
227# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
228#endif
229
230#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
231
232#define NOPEN 150 /* Mark this point in input as start of
233 \%( subexpr. */
234#define NCLOSE 151 /* Analogous to NOPEN. */
235
236#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
237#define RE_BOF 201 /* Match "" at beginning of file. */
238#define RE_EOF 202 /* Match "" at end of file. */
239#define CURSOR 203 /* Match location of cursor. */
240
241#define RE_LNUM 204 /* nr cmp Match line number */
242#define RE_COL 205 /* nr cmp Match column number */
243#define RE_VCOL 206 /* nr cmp Match virtual column number */
244
Bram Moolenaar71fe80d2006-01-22 23:25:56 +0000245#define RE_MARK 207 /* mark cmp Match mark position */
246#define RE_VISUAL 208 /* Match Visual area */
247
Bram Moolenaar071d4272004-06-13 20:20:40 +0000248/*
249 * Magic characters have a special meaning, they don't match literally.
250 * Magic characters are negative. This separates them from literal characters
251 * (possibly multi-byte). Only ASCII characters can be Magic.
252 */
253#define Magic(x) ((int)(x) - 256)
254#define un_Magic(x) ((x) + 256)
255#define is_Magic(x) ((x) < 0)
256
257static int no_Magic __ARGS((int x));
258static int toggle_Magic __ARGS((int x));
259
260 static int
261no_Magic(x)
262 int x;
263{
264 if (is_Magic(x))
265 return un_Magic(x);
266 return x;
267}
268
269 static int
270toggle_Magic(x)
271 int x;
272{
273 if (is_Magic(x))
274 return un_Magic(x);
275 return Magic(x);
276}
277
278/*
279 * The first byte of the regexp internal "program" is actually this magic
280 * number; the start node begins in the second byte. It's used to catch the
281 * most severe mutilation of the program by the caller.
282 */
283
284#define REGMAGIC 0234
285
286/*
287 * Opcode notes:
288 *
289 * BRANCH The set of branches constituting a single choice are hooked
290 * together with their "next" pointers, since precedence prevents
291 * anything being concatenated to any individual branch. The
292 * "next" pointer of the last BRANCH in a choice points to the
293 * thing following the whole choice. This is also where the
294 * final "next" pointer of each individual branch points; each
295 * branch starts with the operand node of a BRANCH node.
296 *
297 * BACK Normal "next" pointers all implicitly point forward; BACK
298 * exists to make loop structures possible.
299 *
300 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
301 * BRANCH structures using BACK. Simple cases (one character
302 * per match) are implemented with STAR and PLUS for speed
303 * and to minimize recursive plunges.
304 *
305 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
306 * node, and defines the min and max limits to be used for that
307 * node.
308 *
309 * MOPEN,MCLOSE ...are numbered at compile time.
310 * ZOPEN,ZCLOSE ...ditto
311 */
312
313/*
314 * A node is one char of opcode followed by two chars of "next" pointer.
315 * "Next" pointers are stored as two 8-bit bytes, high order first. The
316 * value is a positive offset from the opcode of the node containing it.
317 * An operand, if any, simply follows the node. (Note that much of the
318 * code generation knows about this implicit relationship.)
319 *
320 * Using two bytes for the "next" pointer is vast overkill for most things,
321 * but allows patterns to get big without disasters.
322 */
323#define OP(p) ((int)*(p))
324#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
325#define OPERAND(p) ((p) + 3)
326/* Obtain an operand that was stored as four bytes, MSB first. */
327#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
328 + ((long)(p)[5] << 8) + (long)(p)[6])
329/* Obtain a second operand stored as four bytes. */
330#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
331/* Obtain a second single-byte operand stored after a four bytes operand. */
332#define OPERAND_CMP(p) (p)[7]
333
334/*
335 * Utility definitions.
336 */
337#define UCHARAT(p) ((int)*(char_u *)(p))
338
339/* Used for an error (down from) vim_regcomp(): give the error message, set
340 * rc_did_emsg and return NULL */
Bram Moolenaar98692072006-02-04 00:57:42 +0000341#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000342#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200343#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
344#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
345#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000346
347#define MAX_LIMIT (32767L << 16L)
348
349static int re_multi_type __ARGS((int));
350static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
351static char_u *cstrchr __ARGS((char_u *, int));
352
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200353#ifdef BT_REGEXP_DUMP
354static void regdump __ARGS((char_u *, bt_regprog_T *));
355#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000356#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +0000357static char_u *regprop __ARGS((char_u *));
358#endif
359
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200360static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
361static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
362static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
363static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
364
Bram Moolenaar071d4272004-06-13 20:20:40 +0000365#define NOT_MULTI 0
366#define MULTI_ONE 1
367#define MULTI_MULT 2
368/*
369 * Return NOT_MULTI if c is not a "multi" operator.
370 * Return MULTI_ONE if c is a single "multi" operator.
371 * Return MULTI_MULT if c is a multi "multi" operator.
372 */
373 static int
374re_multi_type(c)
375 int c;
376{
377 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
378 return MULTI_ONE;
379 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
380 return MULTI_MULT;
381 return NOT_MULTI;
382}
383
384/*
385 * Flags to be passed up and down.
386 */
387#define HASWIDTH 0x1 /* Known never to match null string. */
388#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
389#define SPSTART 0x4 /* Starts with * or +. */
390#define HASNL 0x8 /* Contains some \n. */
391#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
392#define WORST 0 /* Worst case. */
393
394/*
395 * When regcode is set to this value, code is not emitted and size is computed
396 * instead.
397 */
398#define JUST_CALC_SIZE ((char_u *) -1)
399
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000400static char_u *reg_prev_sub = NULL;
401
Bram Moolenaar071d4272004-06-13 20:20:40 +0000402/*
403 * REGEXP_INRANGE contains all characters which are always special in a []
404 * range after '\'.
405 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
406 * These are:
407 * \n - New line (NL).
408 * \r - Carriage Return (CR).
409 * \t - Tab (TAB).
410 * \e - Escape (ESC).
411 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000412 * \d - Character code in decimal, eg \d123
413 * \o - Character code in octal, eg \o80
414 * \x - Character code in hex, eg \x4a
415 * \u - Multibyte character code, eg \u20ac
416 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000417 */
418static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000419static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000420
421static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000422static int get_char_class __ARGS((char_u **pp));
423static int get_equi_class __ARGS((char_u **pp));
424static void reg_equi_class __ARGS((int c));
425static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000426static char_u *skip_anyof __ARGS((char_u *p));
427static void init_class_tab __ARGS((void));
428
429/*
430 * Translate '\x' to its control character, except "\n", which is Magic.
431 */
432 static int
433backslash_trans(c)
434 int c;
435{
436 switch (c)
437 {
438 case 'r': return CAR;
439 case 't': return TAB;
440 case 'e': return ESC;
441 case 'b': return BS;
442 }
443 return c;
444}
445
446/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000447 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000448 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
449 * recognized. Otherwise "pp" is advanced to after the item.
450 */
451 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000452get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000453 char_u **pp;
454{
455 static const char *(class_names[]) =
456 {
457 "alnum:]",
458#define CLASS_ALNUM 0
459 "alpha:]",
460#define CLASS_ALPHA 1
461 "blank:]",
462#define CLASS_BLANK 2
463 "cntrl:]",
464#define CLASS_CNTRL 3
465 "digit:]",
466#define CLASS_DIGIT 4
467 "graph:]",
468#define CLASS_GRAPH 5
469 "lower:]",
470#define CLASS_LOWER 6
471 "print:]",
472#define CLASS_PRINT 7
473 "punct:]",
474#define CLASS_PUNCT 8
475 "space:]",
476#define CLASS_SPACE 9
477 "upper:]",
478#define CLASS_UPPER 10
479 "xdigit:]",
480#define CLASS_XDIGIT 11
481 "tab:]",
482#define CLASS_TAB 12
483 "return:]",
484#define CLASS_RETURN 13
485 "backspace:]",
486#define CLASS_BACKSPACE 14
487 "escape:]",
488#define CLASS_ESCAPE 15
489 };
490#define CLASS_NONE 99
491 int i;
492
493 if ((*pp)[1] == ':')
494 {
Bram Moolenaar78a15312009-05-15 19:33:18 +0000495 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000496 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
497 {
498 *pp += STRLEN(class_names[i]) + 2;
499 return i;
500 }
501 }
502 return CLASS_NONE;
503}
504
505/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000506 * Specific version of character class functions.
507 * Using a table to keep this fast.
508 */
509static short class_tab[256];
510
511#define RI_DIGIT 0x01
512#define RI_HEX 0x02
513#define RI_OCTAL 0x04
514#define RI_WORD 0x08
515#define RI_HEAD 0x10
516#define RI_ALPHA 0x20
517#define RI_LOWER 0x40
518#define RI_UPPER 0x80
519#define RI_WHITE 0x100
520
521 static void
522init_class_tab()
523{
524 int i;
525 static int done = FALSE;
526
527 if (done)
528 return;
529
530 for (i = 0; i < 256; ++i)
531 {
532 if (i >= '0' && i <= '7')
533 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
534 else if (i >= '8' && i <= '9')
535 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
536 else if (i >= 'a' && i <= 'f')
537 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
538#ifdef EBCDIC
539 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
540 || (i >= 's' && i <= 'z'))
541#else
542 else if (i >= 'g' && i <= 'z')
543#endif
544 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
545 else if (i >= 'A' && i <= 'F')
546 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
547#ifdef EBCDIC
548 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
549 || (i >= 'S' && i <= 'Z'))
550#else
551 else if (i >= 'G' && i <= 'Z')
552#endif
553 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
554 else if (i == '_')
555 class_tab[i] = RI_WORD + RI_HEAD;
556 else
557 class_tab[i] = 0;
558 }
559 class_tab[' '] |= RI_WHITE;
560 class_tab['\t'] |= RI_WHITE;
561 done = TRUE;
562}
563
564#ifdef FEAT_MBYTE
565# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
566# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
567# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
568# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
569# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
570# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
571# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
572# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
573# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
574#else
575# define ri_digit(c) (class_tab[c] & RI_DIGIT)
576# define ri_hex(c) (class_tab[c] & RI_HEX)
577# define ri_octal(c) (class_tab[c] & RI_OCTAL)
578# define ri_word(c) (class_tab[c] & RI_WORD)
579# define ri_head(c) (class_tab[c] & RI_HEAD)
580# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
581# define ri_lower(c) (class_tab[c] & RI_LOWER)
582# define ri_upper(c) (class_tab[c] & RI_UPPER)
583# define ri_white(c) (class_tab[c] & RI_WHITE)
584#endif
585
586/* flags for regflags */
587#define RF_ICASE 1 /* ignore case */
588#define RF_NOICASE 2 /* don't ignore case */
589#define RF_HASNL 4 /* can match a NL */
590#define RF_ICOMBINE 8 /* ignore combining characters */
591#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
592
593/*
594 * Global work variables for vim_regcomp().
595 */
596
597static char_u *regparse; /* Input-scan pointer. */
598static int prevchr_len; /* byte length of previous char */
599static int num_complex_braces; /* Complex \{...} count */
600static int regnpar; /* () count. */
601#ifdef FEAT_SYN_HL
602static int regnzpar; /* \z() count. */
603static int re_has_z; /* \z item detected */
604#endif
605static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
606static long regsize; /* Code size. */
Bram Moolenaard3005802009-11-25 17:21:32 +0000607static int reg_toolong; /* TRUE when offset out of range */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000608static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
609static unsigned regflags; /* RF_ flags for prog */
610static long brace_min[10]; /* Minimums for complex brace repeats */
611static long brace_max[10]; /* Maximums for complex brace repeats */
612static int brace_count[10]; /* Current counts for complex brace repeats */
613#if defined(FEAT_SYN_HL) || defined(PROTO)
614static int had_eol; /* TRUE when EOL found by vim_regcomp() */
615#endif
616static int one_exactly = FALSE; /* only do one char for EXACTLY */
617
618static int reg_magic; /* magicness of the pattern: */
619#define MAGIC_NONE 1 /* "\V" very unmagic */
620#define MAGIC_OFF 2 /* "\M" or 'magic' off */
621#define MAGIC_ON 3 /* "\m" or 'magic' */
622#define MAGIC_ALL 4 /* "\v" very magic */
623
624static int reg_string; /* matching with a string instead of a buffer
625 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000626static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000627
628/*
629 * META contains all characters that may be magic, except '^' and '$'.
630 */
631
632#ifdef EBCDIC
633static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
634#else
635/* META[] is used often enough to justify turning it into a table. */
636static char_u META_flags[] = {
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639/* % & ( ) * + . */
640 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
641/* 1 2 3 4 5 6 7 8 9 < = > ? */
642 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
643/* @ A C D F H I K L M O */
644 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
645/* P S U V W X Z [ _ */
646 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
647/* a c d f h i k l m n o */
648 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
649/* p s u v w x z { | ~ */
650 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
651};
652#endif
653
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200654static int curchr; /* currently parsed character */
655/* Previous character. Note: prevchr is sometimes -1 when we are not at the
656 * start, eg in /[ ^I]^ the pattern was never found even if it existed,
657 * because ^ was taken to be magic -- webb */
658static int prevchr;
659static int prevprevchr; /* previous-previous character */
660static int nextchr; /* used for ungetchr() */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000661
662/* arguments for reg() */
663#define REG_NOPAREN 0 /* toplevel reg() */
664#define REG_PAREN 1 /* \(\) */
665#define REG_ZPAREN 2 /* \z(\) */
666#define REG_NPAREN 3 /* \%(\) */
667
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200668typedef struct
669{
670 char_u *regparse;
671 int prevchr_len;
672 int curchr;
673 int prevchr;
674 int prevprevchr;
675 int nextchr;
676 int at_start;
677 int prev_at_start;
678 int regnpar;
679} parse_state_T;
680
Bram Moolenaar071d4272004-06-13 20:20:40 +0000681/*
682 * Forward declarations for vim_regcomp()'s friends.
683 */
684static void initchr __ARGS((char_u *));
Bram Moolenaar3737fc12013-06-01 14:42:56 +0200685static void save_parse_state __ARGS((parse_state_T *ps));
686static void restore_parse_state __ARGS((parse_state_T *ps));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000687static int getchr __ARGS((void));
688static void skipchr_keepstart __ARGS((void));
689static int peekchr __ARGS((void));
690static void skipchr __ARGS((void));
691static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000692static int gethexchrs __ARGS((int maxinputlen));
693static int getoctchrs __ARGS((void));
694static int getdecchrs __ARGS((void));
695static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000696static void regcomp_start __ARGS((char_u *expr, int flags));
697static char_u *reg __ARGS((int, int *));
698static char_u *regbranch __ARGS((int *flagp));
699static char_u *regconcat __ARGS((int *flagp));
700static char_u *regpiece __ARGS((int *));
701static char_u *regatom __ARGS((int *));
702static char_u *regnode __ARGS((int));
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000703#ifdef FEAT_MBYTE
704static int use_multibytecode __ARGS((int c));
705#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000706static int prog_magic_wrong __ARGS((void));
707static char_u *regnext __ARGS((char_u *));
708static void regc __ARGS((int b));
709#ifdef FEAT_MBYTE
710static void regmbc __ARGS((int c));
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200711# define REGMBC(x) regmbc(x);
712# define CASEMBC(x) case x:
Bram Moolenaardf177f62005-02-22 08:39:57 +0000713#else
714# define regmbc(c) regc(c)
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200715# define REGMBC(x)
716# define CASEMBC(x)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000717#endif
718static void reginsert __ARGS((int, char_u *));
Bram Moolenaar75eb1612013-05-29 18:45:11 +0200719static void reginsert_nr __ARGS((int op, long val, char_u *opnd));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000720static void reginsert_limits __ARGS((int, long, long, char_u *));
721static char_u *re_put_long __ARGS((char_u *pr, long_u val));
722static int read_limits __ARGS((long *, long *));
723static void regtail __ARGS((char_u *, char_u *));
724static void regoptail __ARGS((char_u *, char_u *));
725
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200726static regengine_T bt_regengine;
727static regengine_T nfa_regengine;
728
Bram Moolenaar071d4272004-06-13 20:20:40 +0000729/*
730 * Return TRUE if compiled regular expression "prog" can match a line break.
731 */
732 int
733re_multiline(prog)
734 regprog_T *prog;
735{
736 return (prog->regflags & RF_HASNL);
737}
738
739/*
740 * Return TRUE if compiled regular expression "prog" looks before the start
741 * position (pattern contains "\@<=" or "\@<!").
742 */
743 int
744re_lookbehind(prog)
745 regprog_T *prog;
746{
747 return (prog->regflags & RF_LOOKBH);
748}
749
750/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000751 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
752 * Returns a character representing the class. Zero means that no item was
753 * recognized. Otherwise "pp" is advanced to after the item.
754 */
755 static int
756get_equi_class(pp)
757 char_u **pp;
758{
759 int c;
760 int l = 1;
761 char_u *p = *pp;
762
763 if (p[1] == '=')
764 {
765#ifdef FEAT_MBYTE
766 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000767 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000768#endif
769 if (p[l + 2] == '=' && p[l + 3] == ']')
770 {
771#ifdef FEAT_MBYTE
772 if (has_mbyte)
773 c = mb_ptr2char(p + 2);
774 else
775#endif
776 c = p[2];
777 *pp += l + 4;
778 return c;
779 }
780 }
781 return 0;
782}
783
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200784#ifdef EBCDIC
785/*
786 * Table for equivalence class "c". (IBM-1047)
787 */
788char *EQUIVAL_CLASS_C[16] = {
789 "A\x62\x63\x64\x65\x66\x67",
790 "C\x68",
791 "E\x71\x72\x73\x74",
792 "I\x75\x76\x77\x78",
793 "N\x69",
794 "O\xEB\xEC\xED\xEE\xEF",
795 "U\xFB\xFC\xFD\xFE",
796 "Y\xBA",
797 "a\x42\x43\x44\x45\x46\x47",
798 "c\x48",
799 "e\x51\x52\x53\x54",
800 "i\x55\x56\x57\x58",
801 "n\x49",
802 "o\xCB\xCC\xCD\xCE\xCF",
803 "u\xDB\xDC\xDD\xDE",
804 "y\x8D\xDF",
805};
806#endif
807
Bram Moolenaardf177f62005-02-22 08:39:57 +0000808/*
809 * Produce the bytes for equivalence class "c".
810 * Currently only handles latin1, latin9 and utf-8.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200811 * NOTE: When changing this function, also change nfa_emit_equi_class()
Bram Moolenaardf177f62005-02-22 08:39:57 +0000812 */
813 static void
814reg_equi_class(c)
815 int c;
816{
817#ifdef FEAT_MBYTE
818 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000819 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000820#endif
821 {
Bram Moolenaar2c704a72010-06-03 21:17:25 +0200822#ifdef EBCDIC
823 int i;
824
825 /* This might be slower than switch/case below. */
826 for (i = 0; i < 16; i++)
827 {
828 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
829 {
830 char *p = EQUIVAL_CLASS_C[i];
831
832 while (*p != 0)
833 regmbc(*p++);
834 return;
835 }
836 }
837#else
Bram Moolenaardf177f62005-02-22 08:39:57 +0000838 switch (c)
839 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000840 case 'A': case '\300': case '\301': case '\302':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200841 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
842 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000843 case '\303': case '\304': case '\305':
844 regmbc('A'); regmbc('\300'); regmbc('\301');
845 regmbc('\302'); regmbc('\303'); regmbc('\304');
846 regmbc('\305');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200847 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
848 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
849 REGMBC(0x1ea2)
850 return;
851 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
852 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000853 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000854 case 'C': case '\307':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200855 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000856 regmbc('C'); regmbc('\307');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200857 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
858 REGMBC(0x10c)
859 return;
860 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
861 CASEMBC(0x1e0e) CASEMBC(0x1e10)
862 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
863 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000864 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000865 case 'E': case '\310': case '\311': case '\312': case '\313':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200866 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
867 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000868 regmbc('E'); regmbc('\310'); regmbc('\311');
869 regmbc('\312'); regmbc('\313');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200870 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
871 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
872 REGMBC(0x1ebc)
873 return;
874 case 'F': CASEMBC(0x1e1e)
875 regmbc('F'); REGMBC(0x1e1e)
876 return;
877 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
878 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
879 CASEMBC(0x1e20)
880 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
881 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
882 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
883 return;
884 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
885 CASEMBC(0x1e26) CASEMBC(0x1e28)
886 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
887 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000888 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000889 case 'I': case '\314': case '\315': case '\316': case '\317':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200890 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
891 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000892 regmbc('I'); regmbc('\314'); regmbc('\315');
893 regmbc('\316'); regmbc('\317');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200894 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
895 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
896 REGMBC(0x1ec8)
897 return;
898 case 'J': CASEMBC(0x134)
899 regmbc('J'); REGMBC(0x134)
900 return;
901 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
902 CASEMBC(0x1e34)
903 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
904 REGMBC(0x1e30) REGMBC(0x1e34)
905 return;
906 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
907 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
908 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
909 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
910 REGMBC(0x1e3a)
911 return;
912 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
913 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000914 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000915 case 'N': case '\321':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200916 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
917 CASEMBC(0x1e48)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000918 regmbc('N'); regmbc('\321');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200919 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
920 REGMBC(0x1e44) REGMBC(0x1e48)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000921 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000922 case 'O': case '\322': case '\323': case '\324': case '\325':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200923 case '\326': case '\330':
924 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
925 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000926 regmbc('O'); regmbc('\322'); regmbc('\323');
927 regmbc('\324'); regmbc('\325'); regmbc('\326');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200928 regmbc('\330');
929 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
930 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
931 REGMBC(0x1ec) REGMBC(0x1ece)
932 return;
933 case 'P': case 0x1e54: case 0x1e56:
934 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
935 return;
936 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
937 CASEMBC(0x1e58) CASEMBC(0x1e5e)
938 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
939 REGMBC(0x1e58) REGMBC(0x1e5e)
940 return;
941 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
942 CASEMBC(0x160) CASEMBC(0x1e60)
943 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
944 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
945 return;
946 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
947 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
948 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
949 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000950 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000951 case 'U': case '\331': case '\332': case '\333': case '\334':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200952 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
953 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
954 CASEMBC(0x1ee6)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000955 regmbc('U'); regmbc('\331'); regmbc('\332');
956 regmbc('\333'); regmbc('\334');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200957 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
958 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
959 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
960 return;
961 case 'V': CASEMBC(0x1e7c)
962 regmbc('V'); REGMBC(0x1e7c)
963 return;
964 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
965 CASEMBC(0x1e84) CASEMBC(0x1e86)
966 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
967 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
968 return;
969 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
970 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000971 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000972 case 'Y': case '\335':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200973 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
974 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000975 regmbc('Y'); regmbc('\335');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200976 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
977 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
978 return;
979 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
980 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
981 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
982 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
983 REGMBC(0x1e94)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000984 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000985 case 'a': case '\340': case '\341': case '\342':
986 case '\343': case '\344': case '\345':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200987 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
988 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000989 regmbc('a'); regmbc('\340'); regmbc('\341');
990 regmbc('\342'); regmbc('\343'); regmbc('\344');
991 regmbc('\345');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +0200992 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
993 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
994 REGMBC(0x1ea3)
995 return;
996 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
997 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000998 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000999 case 'c': case '\347':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001000 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001001 regmbc('c'); regmbc('\347');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001002 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
1003 REGMBC(0x10d)
1004 return;
1005 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1d0b)
1006 CASEMBC(0x1e11)
1007 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
1008 REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001009 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001010 case 'e': case '\350': case '\351': case '\352': case '\353':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001011 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1012 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001013 regmbc('e'); regmbc('\350'); regmbc('\351');
1014 regmbc('\352'); regmbc('\353');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001015 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1016 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1017 REGMBC(0x1ebd)
1018 return;
1019 case 'f': CASEMBC(0x1e1f)
1020 regmbc('f'); REGMBC(0x1e1f)
1021 return;
1022 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1023 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1024 CASEMBC(0x1e21)
1025 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1026 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1027 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1028 return;
1029 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1030 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1031 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1032 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1033 REGMBC(0x1e96)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001034 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001035 case 'i': case '\354': case '\355': case '\356': case '\357':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001036 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1037 CASEMBC(0x1d0) CASEMBC(0x1ec9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001038 regmbc('i'); regmbc('\354'); regmbc('\355');
1039 regmbc('\356'); regmbc('\357');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001040 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1041 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1042 return;
1043 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1044 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1045 return;
1046 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1047 CASEMBC(0x1e35)
1048 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1049 REGMBC(0x1e31) REGMBC(0x1e35)
1050 return;
1051 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1052 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1053 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1054 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1055 REGMBC(0x1e3b)
1056 return;
1057 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1058 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001059 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001060 case 'n': case '\361':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001061 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1062 CASEMBC(0x1e45) CASEMBC(0x1e49)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001063 regmbc('n'); regmbc('\361');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001064 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1065 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001066 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001067 case 'o': case '\362': case '\363': case '\364': case '\365':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001068 case '\366': case '\370':
1069 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1070 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001071 regmbc('o'); regmbc('\362'); regmbc('\363');
1072 regmbc('\364'); regmbc('\365'); regmbc('\366');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001073 regmbc('\370');
1074 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1075 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1076 REGMBC(0x1ed) REGMBC(0x1ecf)
1077 return;
1078 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1079 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1080 return;
1081 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1082 CASEMBC(0x1e59) CASEMBC(0x1e5f)
1083 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1084 REGMBC(0x1e59) REGMBC(0x1e5f)
1085 return;
1086 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1087 CASEMBC(0x161) CASEMBC(0x1e61)
1088 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1089 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1090 return;
1091 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1092 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1093 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1094 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001095 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001096 case 'u': case '\371': case '\372': case '\373': case '\374':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001097 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1098 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1099 CASEMBC(0x1ee7)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001100 regmbc('u'); regmbc('\371'); regmbc('\372');
1101 regmbc('\373'); regmbc('\374');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001102 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1103 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1104 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1105 return;
1106 case 'v': CASEMBC(0x1e7d)
1107 regmbc('v'); REGMBC(0x1e7d)
1108 return;
1109 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1110 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1111 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1112 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1113 REGMBC(0x1e98)
1114 return;
1115 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1116 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001117 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001118 case 'y': case '\375': case '\377':
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001119 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1120 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001121 regmbc('y'); regmbc('\375'); regmbc('\377');
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02001122 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1123 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1124 return;
1125 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1126 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1127 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1128 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1129 REGMBC(0x1e95)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001130 return;
1131 }
Bram Moolenaar2c704a72010-06-03 21:17:25 +02001132#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00001133 }
1134 regmbc(c);
1135}
1136
1137/*
1138 * Check for a collating element "[.a.]". "pp" points to the '['.
1139 * Returns a character. Zero means that no item was recognized. Otherwise
1140 * "pp" is advanced to after the item.
1141 * Currently only single characters are recognized!
1142 */
1143 static int
1144get_coll_element(pp)
1145 char_u **pp;
1146{
1147 int c;
1148 int l = 1;
1149 char_u *p = *pp;
1150
1151 if (p[1] == '.')
1152 {
1153#ifdef FEAT_MBYTE
1154 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001155 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001156#endif
1157 if (p[l + 2] == '.' && p[l + 3] == ']')
1158 {
1159#ifdef FEAT_MBYTE
1160 if (has_mbyte)
1161 c = mb_ptr2char(p + 2);
1162 else
1163#endif
1164 c = p[2];
1165 *pp += l + 4;
1166 return c;
1167 }
1168 }
1169 return 0;
1170}
1171
1172
1173/*
1174 * Skip over a "[]" range.
1175 * "p" must point to the character after the '['.
1176 * The returned pointer is on the matching ']', or the terminating NUL.
1177 */
1178 static char_u *
1179skip_anyof(p)
1180 char_u *p;
1181{
1182 int cpo_lit; /* 'cpoptions' contains 'l' flag */
1183 int cpo_bsl; /* 'cpoptions' contains '\' flag */
1184#ifdef FEAT_MBYTE
1185 int l;
1186#endif
1187
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00001188 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1189 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaardf177f62005-02-22 08:39:57 +00001190
1191 if (*p == '^') /* Complement of range. */
1192 ++p;
1193 if (*p == ']' || *p == '-')
1194 ++p;
1195 while (*p != NUL && *p != ']')
1196 {
1197#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001198 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +00001199 p += l;
1200 else
1201#endif
1202 if (*p == '-')
1203 {
1204 ++p;
1205 if (*p != ']' && *p != NUL)
1206 mb_ptr_adv(p);
1207 }
1208 else if (*p == '\\'
1209 && !cpo_bsl
1210 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
1211 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
1212 p += 2;
1213 else if (*p == '[')
1214 {
1215 if (get_char_class(&p) == CLASS_NONE
1216 && get_equi_class(&p) == 0
1217 && get_coll_element(&p) == 0)
1218 ++p; /* It was not a class name */
1219 }
1220 else
1221 ++p;
1222 }
1223
1224 return p;
1225}
1226
1227/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00001228 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +00001229 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +00001230 * Take care of characters with a backslash in front of it.
1231 * Skip strings inside [ and ].
1232 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1233 * expression and change "\?" to "?". If "*newp" is not NULL the expression
1234 * is changed in-place.
1235 */
1236 char_u *
1237skip_regexp(startp, dirc, magic, newp)
1238 char_u *startp;
1239 int dirc;
1240 int magic;
1241 char_u **newp;
1242{
1243 int mymagic;
1244 char_u *p = startp;
1245
1246 if (magic)
1247 mymagic = MAGIC_ON;
1248 else
1249 mymagic = MAGIC_OFF;
1250
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00001251 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00001252 {
1253 if (p[0] == dirc) /* found end of regexp */
1254 break;
1255 if ((p[0] == '[' && mymagic >= MAGIC_ON)
1256 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1257 {
1258 p = skip_anyof(p + 1);
1259 if (p[0] == NUL)
1260 break;
1261 }
1262 else if (p[0] == '\\' && p[1] != NUL)
1263 {
1264 if (dirc == '?' && newp != NULL && p[1] == '?')
1265 {
1266 /* change "\?" to "?", make a copy first. */
1267 if (*newp == NULL)
1268 {
1269 *newp = vim_strsave(startp);
1270 if (*newp != NULL)
1271 p = *newp + (p - startp);
1272 }
1273 if (*newp != NULL)
Bram Moolenaar446cb832008-06-24 21:56:24 +00001274 STRMOVE(p, p + 1);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001275 else
1276 ++p;
1277 }
1278 else
1279 ++p; /* skip next character */
1280 if (*p == 'v')
1281 mymagic = MAGIC_ALL;
1282 else if (*p == 'V')
1283 mymagic = MAGIC_NONE;
1284 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001285 }
1286 return p;
1287}
1288
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001289static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
1290
Bram Moolenaar071d4272004-06-13 20:20:40 +00001291/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001292 * bt_regcomp() - compile a regular expression into internal code for the
1293 * traditional back track matcher.
Bram Moolenaar86b68352004-12-27 21:59:20 +00001294 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001295 *
1296 * We can't allocate space until we know how big the compiled form will be,
1297 * but we can't compile it (and thus know how big it is) until we've got a
1298 * place to put the code. So we cheat: we compile it twice, once with code
1299 * generation turned off and size counting turned on, and once "for real".
1300 * This also means that we don't allocate space until we are sure that the
1301 * thing really will compile successfully, and we never have to move the
1302 * code and thus invalidate pointers into it. (Note that it has to be in
1303 * one piece because vim_free() must be able to free it all.)
1304 *
1305 * Whether upper/lower case is to be ignored is decided when executing the
1306 * program, it does not matter here.
1307 *
1308 * Beware that the optimization-preparation code in here knows about some
1309 * of the structure of the compiled regexp.
1310 * "re_flags": RE_MAGIC and/or RE_STRING.
1311 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001312 static regprog_T *
1313bt_regcomp(expr, re_flags)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001314 char_u *expr;
1315 int re_flags;
1316{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001317 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001318 char_u *scan;
1319 char_u *longest;
1320 int len;
1321 int flags;
1322
1323 if (expr == NULL)
1324 EMSG_RET_NULL(_(e_null));
1325
1326 init_class_tab();
1327
1328 /*
1329 * First pass: determine size, legality.
1330 */
1331 regcomp_start(expr, re_flags);
1332 regcode = JUST_CALC_SIZE;
1333 regc(REGMAGIC);
1334 if (reg(REG_NOPAREN, &flags) == NULL)
1335 return NULL;
1336
1337 /* Small enough for pointer-storage convention? */
1338#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1339 if (regsize >= 65536L - 256L)
1340 EMSG_RET_NULL(_("E339: Pattern too long"));
1341#endif
1342
1343 /* Allocate space. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001344 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001345 if (r == NULL)
1346 return NULL;
1347
1348 /*
1349 * Second pass: emit code.
1350 */
1351 regcomp_start(expr, re_flags);
1352 regcode = r->program;
1353 regc(REGMAGIC);
Bram Moolenaard3005802009-11-25 17:21:32 +00001354 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001355 {
1356 vim_free(r);
Bram Moolenaard3005802009-11-25 17:21:32 +00001357 if (reg_toolong)
1358 EMSG_RET_NULL(_("E339: Pattern too long"));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001359 return NULL;
1360 }
1361
1362 /* Dig out information for optimizations. */
1363 r->regstart = NUL; /* Worst-case defaults. */
1364 r->reganch = 0;
1365 r->regmust = NULL;
1366 r->regmlen = 0;
1367 r->regflags = regflags;
1368 if (flags & HASNL)
1369 r->regflags |= RF_HASNL;
1370 if (flags & HASLOOKBH)
1371 r->regflags |= RF_LOOKBH;
1372#ifdef FEAT_SYN_HL
1373 /* Remember whether this pattern has any \z specials in it. */
1374 r->reghasz = re_has_z;
1375#endif
1376 scan = r->program + 1; /* First BRANCH. */
1377 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1378 {
1379 scan = OPERAND(scan);
1380
1381 /* Starting-point info. */
1382 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1383 {
1384 r->reganch++;
1385 scan = regnext(scan);
1386 }
1387
1388 if (OP(scan) == EXACTLY)
1389 {
1390#ifdef FEAT_MBYTE
1391 if (has_mbyte)
1392 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1393 else
1394#endif
1395 r->regstart = *OPERAND(scan);
1396 }
1397 else if ((OP(scan) == BOW
1398 || OP(scan) == EOW
1399 || OP(scan) == NOTHING
1400 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1401 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1402 && OP(regnext(scan)) == EXACTLY)
1403 {
1404#ifdef FEAT_MBYTE
1405 if (has_mbyte)
1406 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1407 else
1408#endif
1409 r->regstart = *OPERAND(regnext(scan));
1410 }
1411
1412 /*
1413 * If there's something expensive in the r.e., find the longest
1414 * literal string that must appear and make it the regmust. Resolve
1415 * ties in favor of later strings, since the regstart check works
1416 * with the beginning of the r.e. and avoiding duplication
1417 * strengthens checking. Not a strong reason, but sufficient in the
1418 * absence of others.
1419 */
1420 /*
1421 * When the r.e. starts with BOW, it is faster to look for a regmust
1422 * first. Used a lot for "#" and "*" commands. (Added by mool).
1423 */
1424 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1425 && !(flags & HASNL))
1426 {
1427 longest = NULL;
1428 len = 0;
1429 for (; scan != NULL; scan = regnext(scan))
1430 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1431 {
1432 longest = OPERAND(scan);
1433 len = (int)STRLEN(OPERAND(scan));
1434 }
1435 r->regmust = longest;
1436 r->regmlen = len;
1437 }
1438 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001439#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00001440 regdump(expr, r);
1441#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001442 r->engine = &bt_regengine;
1443 return (regprog_T *)r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001444}
1445
1446/*
1447 * Setup to parse the regexp. Used once to get the length and once to do it.
1448 */
1449 static void
1450regcomp_start(expr, re_flags)
1451 char_u *expr;
1452 int re_flags; /* see vim_regcomp() */
1453{
1454 initchr(expr);
1455 if (re_flags & RE_MAGIC)
1456 reg_magic = MAGIC_ON;
1457 else
1458 reg_magic = MAGIC_OFF;
1459 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001460 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001461
1462 num_complex_braces = 0;
1463 regnpar = 1;
1464 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1465#ifdef FEAT_SYN_HL
1466 regnzpar = 1;
1467 re_has_z = 0;
1468#endif
1469 regsize = 0L;
Bram Moolenaard3005802009-11-25 17:21:32 +00001470 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001471 regflags = 0;
1472#if defined(FEAT_SYN_HL) || defined(PROTO)
1473 had_eol = FALSE;
1474#endif
1475}
1476
1477#if defined(FEAT_SYN_HL) || defined(PROTO)
1478/*
1479 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1480 * found. This is messy, but it works fine.
1481 */
1482 int
1483vim_regcomp_had_eol()
1484{
1485 return had_eol;
1486}
1487#endif
1488
1489/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001490 * Parse regular expression, i.e. main body or parenthesized thing.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001491 *
1492 * Caller must absorb opening parenthesis.
1493 *
1494 * Combining parenthesis handling with the base level of regular expression
1495 * is a trifle forced, but the need to tie the tails of the branches to what
1496 * follows makes it hard to avoid.
1497 */
1498 static char_u *
1499reg(paren, flagp)
1500 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1501 int *flagp;
1502{
1503 char_u *ret;
1504 char_u *br;
1505 char_u *ender;
1506 int parno = 0;
1507 int flags;
1508
1509 *flagp = HASWIDTH; /* Tentatively. */
1510
1511#ifdef FEAT_SYN_HL
1512 if (paren == REG_ZPAREN)
1513 {
1514 /* Make a ZOPEN node. */
1515 if (regnzpar >= NSUBEXP)
1516 EMSG_RET_NULL(_("E50: Too many \\z("));
1517 parno = regnzpar;
1518 regnzpar++;
1519 ret = regnode(ZOPEN + parno);
1520 }
1521 else
1522#endif
1523 if (paren == REG_PAREN)
1524 {
1525 /* Make a MOPEN node. */
1526 if (regnpar >= NSUBEXP)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001527 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001528 parno = regnpar;
1529 ++regnpar;
1530 ret = regnode(MOPEN + parno);
1531 }
1532 else if (paren == REG_NPAREN)
1533 {
1534 /* Make a NOPEN node. */
1535 ret = regnode(NOPEN);
1536 }
1537 else
1538 ret = NULL;
1539
1540 /* Pick up the branches, linking them together. */
1541 br = regbranch(&flags);
1542 if (br == NULL)
1543 return NULL;
1544 if (ret != NULL)
1545 regtail(ret, br); /* [MZ]OPEN -> first. */
1546 else
1547 ret = br;
1548 /* If one of the branches can be zero-width, the whole thing can.
1549 * If one of the branches has * at start or matches a line-break, the
1550 * whole thing can. */
1551 if (!(flags & HASWIDTH))
1552 *flagp &= ~HASWIDTH;
1553 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1554 while (peekchr() == Magic('|'))
1555 {
1556 skipchr();
1557 br = regbranch(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001558 if (br == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001559 return NULL;
1560 regtail(ret, br); /* BRANCH -> BRANCH. */
1561 if (!(flags & HASWIDTH))
1562 *flagp &= ~HASWIDTH;
1563 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1564 }
1565
1566 /* Make a closing node, and hook it on the end. */
1567 ender = regnode(
1568#ifdef FEAT_SYN_HL
1569 paren == REG_ZPAREN ? ZCLOSE + parno :
1570#endif
1571 paren == REG_PAREN ? MCLOSE + parno :
1572 paren == REG_NPAREN ? NCLOSE : END);
1573 regtail(ret, ender);
1574
1575 /* Hook the tails of the branches to the closing node. */
1576 for (br = ret; br != NULL; br = regnext(br))
1577 regoptail(br, ender);
1578
1579 /* Check for proper termination. */
1580 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1581 {
1582#ifdef FEAT_SYN_HL
1583 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001584 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001585 else
1586#endif
1587 if (paren == REG_NPAREN)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001588 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001589 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001590 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001591 }
1592 else if (paren == REG_NOPAREN && peekchr() != NUL)
1593 {
1594 if (curchr == Magic(')'))
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001595 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001596 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001597 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001598 /* NOTREACHED */
1599 }
1600 /*
1601 * Here we set the flag allowing back references to this set of
1602 * parentheses.
1603 */
1604 if (paren == REG_PAREN)
1605 had_endbrace[parno] = TRUE; /* have seen the close paren */
1606 return ret;
1607}
1608
1609/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001610 * Parse one alternative of an | operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001611 * Implements the & operator.
1612 */
1613 static char_u *
1614regbranch(flagp)
1615 int *flagp;
1616{
1617 char_u *ret;
1618 char_u *chain = NULL;
1619 char_u *latest;
1620 int flags;
1621
1622 *flagp = WORST | HASNL; /* Tentatively. */
1623
1624 ret = regnode(BRANCH);
1625 for (;;)
1626 {
1627 latest = regconcat(&flags);
1628 if (latest == NULL)
1629 return NULL;
1630 /* If one of the branches has width, the whole thing has. If one of
1631 * the branches anchors at start-of-line, the whole thing does.
1632 * If one of the branches uses look-behind, the whole thing does. */
1633 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1634 /* If one of the branches doesn't match a line-break, the whole thing
1635 * doesn't. */
1636 *flagp &= ~HASNL | (flags & HASNL);
1637 if (chain != NULL)
1638 regtail(chain, latest);
1639 if (peekchr() != Magic('&'))
1640 break;
1641 skipchr();
1642 regtail(latest, regnode(END)); /* operand ends */
Bram Moolenaard3005802009-11-25 17:21:32 +00001643 if (reg_toolong)
1644 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001645 reginsert(MATCH, latest);
1646 chain = latest;
1647 }
1648
1649 return ret;
1650}
1651
1652/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001653 * Parse one alternative of an | or & operator.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001654 * Implements the concatenation operator.
1655 */
1656 static char_u *
1657regconcat(flagp)
1658 int *flagp;
1659{
1660 char_u *first = NULL;
1661 char_u *chain = NULL;
1662 char_u *latest;
1663 int flags;
1664 int cont = TRUE;
1665
1666 *flagp = WORST; /* Tentatively. */
1667
1668 while (cont)
1669 {
1670 switch (peekchr())
1671 {
1672 case NUL:
1673 case Magic('|'):
1674 case Magic('&'):
1675 case Magic(')'):
1676 cont = FALSE;
1677 break;
1678 case Magic('Z'):
1679#ifdef FEAT_MBYTE
1680 regflags |= RF_ICOMBINE;
1681#endif
1682 skipchr_keepstart();
1683 break;
1684 case Magic('c'):
1685 regflags |= RF_ICASE;
1686 skipchr_keepstart();
1687 break;
1688 case Magic('C'):
1689 regflags |= RF_NOICASE;
1690 skipchr_keepstart();
1691 break;
1692 case Magic('v'):
1693 reg_magic = MAGIC_ALL;
1694 skipchr_keepstart();
1695 curchr = -1;
1696 break;
1697 case Magic('m'):
1698 reg_magic = MAGIC_ON;
1699 skipchr_keepstart();
1700 curchr = -1;
1701 break;
1702 case Magic('M'):
1703 reg_magic = MAGIC_OFF;
1704 skipchr_keepstart();
1705 curchr = -1;
1706 break;
1707 case Magic('V'):
1708 reg_magic = MAGIC_NONE;
1709 skipchr_keepstart();
1710 curchr = -1;
1711 break;
1712 default:
1713 latest = regpiece(&flags);
Bram Moolenaard3005802009-11-25 17:21:32 +00001714 if (latest == NULL || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001715 return NULL;
1716 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1717 if (chain == NULL) /* First piece. */
1718 *flagp |= flags & SPSTART;
1719 else
1720 regtail(chain, latest);
1721 chain = latest;
1722 if (first == NULL)
1723 first = latest;
1724 break;
1725 }
1726 }
1727 if (first == NULL) /* Loop ran zero times. */
1728 first = regnode(NOTHING);
1729 return first;
1730}
1731
1732/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001733 * Parse something followed by possible [*+=].
Bram Moolenaar071d4272004-06-13 20:20:40 +00001734 *
1735 * Note that the branching code sequences used for = and the general cases
1736 * of * and + are somewhat optimized: they use the same NOTHING node as
1737 * both the endmarker for their branch list and the body of the last branch.
1738 * It might seem that this node could be dispensed with entirely, but the
1739 * endmarker role is not redundant.
1740 */
1741 static char_u *
1742regpiece(flagp)
1743 int *flagp;
1744{
1745 char_u *ret;
1746 int op;
1747 char_u *next;
1748 int flags;
1749 long minval;
1750 long maxval;
1751
1752 ret = regatom(&flags);
1753 if (ret == NULL)
1754 return NULL;
1755
1756 op = peekchr();
1757 if (re_multi_type(op) == NOT_MULTI)
1758 {
1759 *flagp = flags;
1760 return ret;
1761 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001762 /* default flags */
1763 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1764
1765 skipchr();
1766 switch (op)
1767 {
1768 case Magic('*'):
1769 if (flags & SIMPLE)
1770 reginsert(STAR, ret);
1771 else
1772 {
1773 /* Emit x* as (x&|), where & means "self". */
1774 reginsert(BRANCH, ret); /* Either x */
1775 regoptail(ret, regnode(BACK)); /* and loop */
1776 regoptail(ret, ret); /* back */
1777 regtail(ret, regnode(BRANCH)); /* or */
1778 regtail(ret, regnode(NOTHING)); /* null. */
1779 }
1780 break;
1781
1782 case Magic('+'):
1783 if (flags & SIMPLE)
1784 reginsert(PLUS, ret);
1785 else
1786 {
1787 /* Emit x+ as x(&|), where & means "self". */
1788 next = regnode(BRANCH); /* Either */
1789 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001790 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001791 regtail(next, regnode(BRANCH)); /* or */
1792 regtail(ret, regnode(NOTHING)); /* null. */
1793 }
1794 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1795 break;
1796
1797 case Magic('@'):
1798 {
1799 int lop = END;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001800 int nr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001801
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001802 nr = getdecchrs();
Bram Moolenaar071d4272004-06-13 20:20:40 +00001803 switch (no_Magic(getchr()))
1804 {
1805 case '=': lop = MATCH; break; /* \@= */
1806 case '!': lop = NOMATCH; break; /* \@! */
1807 case '>': lop = SUBPAT; break; /* \@> */
1808 case '<': switch (no_Magic(getchr()))
1809 {
1810 case '=': lop = BEHIND; break; /* \@<= */
1811 case '!': lop = NOBEHIND; break; /* \@<! */
1812 }
1813 }
1814 if (lop == END)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001815 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001816 reg_magic == MAGIC_ALL);
1817 /* Look behind must match with behind_pos. */
1818 if (lop == BEHIND || lop == NOBEHIND)
1819 {
1820 regtail(ret, regnode(BHPOS));
1821 *flagp |= HASLOOKBH;
1822 }
1823 regtail(ret, regnode(END)); /* operand ends */
Bram Moolenaar75eb1612013-05-29 18:45:11 +02001824 if (lop == BEHIND || lop == NOBEHIND)
1825 {
1826 if (nr < 0)
1827 nr = 0; /* no limit is same as zero limit */
1828 reginsert_nr(lop, nr, ret);
1829 }
1830 else
1831 reginsert(lop, ret);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001832 break;
1833 }
1834
1835 case Magic('?'):
1836 case Magic('='):
1837 /* Emit x= as (x|) */
1838 reginsert(BRANCH, ret); /* Either x */
1839 regtail(ret, regnode(BRANCH)); /* or */
1840 next = regnode(NOTHING); /* null. */
1841 regtail(ret, next);
1842 regoptail(ret, next);
1843 break;
1844
1845 case Magic('{'):
1846 if (!read_limits(&minval, &maxval))
1847 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001848 if (flags & SIMPLE)
1849 {
1850 reginsert(BRACE_SIMPLE, ret);
1851 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1852 }
1853 else
1854 {
1855 if (num_complex_braces >= 10)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001856 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00001857 reg_magic == MAGIC_ALL);
1858 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1859 regoptail(ret, regnode(BACK));
1860 regoptail(ret, ret);
1861 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1862 ++num_complex_braces;
1863 }
1864 if (minval > 0 && maxval > 0)
1865 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1866 break;
1867 }
1868 if (re_multi_type(peekchr()) != NOT_MULTI)
1869 {
1870 /* Can't have a multi follow a multi. */
1871 if (peekchr() == Magic('*'))
1872 sprintf((char *)IObuff, _("E61: Nested %s*"),
1873 reg_magic >= MAGIC_ON ? "" : "\\");
1874 else
1875 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1876 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1877 EMSG_RET_NULL(IObuff);
1878 }
1879
1880 return ret;
1881}
1882
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001883/* When making changes to classchars also change nfa_classcodes. */
1884static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1885static int classcodes[] = {
1886 ANY, IDENT, SIDENT, KWORD, SKWORD,
1887 FNAME, SFNAME, PRINT, SPRINT,
1888 WHITE, NWHITE, DIGIT, NDIGIT,
1889 HEX, NHEX, OCTAL, NOCTAL,
1890 WORD, NWORD, HEAD, NHEAD,
1891 ALPHA, NALPHA, LOWER, NLOWER,
1892 UPPER, NUPPER
1893};
1894
Bram Moolenaar071d4272004-06-13 20:20:40 +00001895/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02001896 * Parse the lowest level.
Bram Moolenaar071d4272004-06-13 20:20:40 +00001897 *
1898 * Optimization: gobbles an entire sequence of ordinary characters so that
1899 * it can turn them into a single node, which is smaller to store and
1900 * faster to run. Don't do this when one_exactly is set.
1901 */
1902 static char_u *
1903regatom(flagp)
1904 int *flagp;
1905{
1906 char_u *ret;
1907 int flags;
1908 int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001909 int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001910 int c;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001911 char_u *p;
1912 int extra = 0;
1913
1914 *flagp = WORST; /* Tentatively. */
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00001915 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1916 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001917
1918 c = getchr();
1919 switch (c)
1920 {
1921 case Magic('^'):
1922 ret = regnode(BOL);
1923 break;
1924
1925 case Magic('$'):
1926 ret = regnode(EOL);
1927#if defined(FEAT_SYN_HL) || defined(PROTO)
1928 had_eol = TRUE;
1929#endif
1930 break;
1931
1932 case Magic('<'):
1933 ret = regnode(BOW);
1934 break;
1935
1936 case Magic('>'):
1937 ret = regnode(EOW);
1938 break;
1939
1940 case Magic('_'):
1941 c = no_Magic(getchr());
1942 if (c == '^') /* "\_^" is start-of-line */
1943 {
1944 ret = regnode(BOL);
1945 break;
1946 }
1947 if (c == '$') /* "\_$" is end-of-line */
1948 {
1949 ret = regnode(EOL);
1950#if defined(FEAT_SYN_HL) || defined(PROTO)
1951 had_eol = TRUE;
1952#endif
1953 break;
1954 }
1955
1956 extra = ADD_NL;
1957 *flagp |= HASNL;
1958
1959 /* "\_[" is character range plus newline */
1960 if (c == '[')
1961 goto collection;
1962
1963 /* "\_x" is character class plus newline */
1964 /*FALLTHROUGH*/
1965
1966 /*
1967 * Character classes.
1968 */
1969 case Magic('.'):
1970 case Magic('i'):
1971 case Magic('I'):
1972 case Magic('k'):
1973 case Magic('K'):
1974 case Magic('f'):
1975 case Magic('F'):
1976 case Magic('p'):
1977 case Magic('P'):
1978 case Magic('s'):
1979 case Magic('S'):
1980 case Magic('d'):
1981 case Magic('D'):
1982 case Magic('x'):
1983 case Magic('X'):
1984 case Magic('o'):
1985 case Magic('O'):
1986 case Magic('w'):
1987 case Magic('W'):
1988 case Magic('h'):
1989 case Magic('H'):
1990 case Magic('a'):
1991 case Magic('A'):
1992 case Magic('l'):
1993 case Magic('L'):
1994 case Magic('u'):
1995 case Magic('U'):
1996 p = vim_strchr(classchars, no_Magic(c));
1997 if (p == NULL)
1998 EMSG_RET_NULL(_("E63: invalid use of \\_"));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00001999#ifdef FEAT_MBYTE
2000 /* When '.' is followed by a composing char ignore the dot, so that
2001 * the composing char is matched here. */
2002 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2003 {
2004 c = getchr();
2005 goto do_multibyte;
2006 }
2007#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002008 ret = regnode(classcodes[p - classchars] + extra);
2009 *flagp |= HASWIDTH | SIMPLE;
2010 break;
2011
2012 case Magic('n'):
2013 if (reg_string)
2014 {
2015 /* In a string "\n" matches a newline character. */
2016 ret = regnode(EXACTLY);
2017 regc(NL);
2018 regc(NUL);
2019 *flagp |= HASWIDTH | SIMPLE;
2020 }
2021 else
2022 {
2023 /* In buffer text "\n" matches the end of a line. */
2024 ret = regnode(NEWL);
2025 *flagp |= HASWIDTH | HASNL;
2026 }
2027 break;
2028
2029 case Magic('('):
2030 if (one_exactly)
2031 EMSG_ONE_RET_NULL;
2032 ret = reg(REG_PAREN, &flags);
2033 if (ret == NULL)
2034 return NULL;
2035 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2036 break;
2037
2038 case NUL:
2039 case Magic('|'):
2040 case Magic('&'):
2041 case Magic(')'):
Bram Moolenaard4210772008-01-02 14:35:30 +00002042 if (one_exactly)
2043 EMSG_ONE_RET_NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002044 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
2045 /* NOTREACHED */
2046
2047 case Magic('='):
2048 case Magic('?'):
2049 case Magic('+'):
2050 case Magic('@'):
2051 case Magic('{'):
2052 case Magic('*'):
2053 c = no_Magic(c);
2054 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2055 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2056 ? "" : "\\", c);
2057 EMSG_RET_NULL(IObuff);
2058 /* NOTREACHED */
2059
2060 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00002061 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002062 {
2063 char_u *lp;
2064
2065 ret = regnode(EXACTLY);
2066 lp = reg_prev_sub;
2067 while (*lp != NUL)
2068 regc(*lp++);
2069 regc(NUL);
2070 if (*reg_prev_sub != NUL)
2071 {
2072 *flagp |= HASWIDTH;
2073 if ((lp - reg_prev_sub) == 1)
2074 *flagp |= SIMPLE;
2075 }
2076 }
2077 else
2078 EMSG_RET_NULL(_(e_nopresub));
2079 break;
2080
2081 case Magic('1'):
2082 case Magic('2'):
2083 case Magic('3'):
2084 case Magic('4'):
2085 case Magic('5'):
2086 case Magic('6'):
2087 case Magic('7'):
2088 case Magic('8'):
2089 case Magic('9'):
2090 {
2091 int refnum;
2092
2093 refnum = c - Magic('0');
2094 /*
2095 * Check if the back reference is legal. We must have seen the
2096 * close brace.
2097 * TODO: Should also check that we don't refer to something
2098 * that is repeated (+*=): what instance of the repetition
2099 * should we match?
2100 */
2101 if (!had_endbrace[refnum])
2102 {
2103 /* Trick: check if "@<=" or "@<!" follows, in which case
2104 * the \1 can appear before the referenced match. */
2105 for (p = regparse; *p != NUL; ++p)
2106 if (p[0] == '@' && p[1] == '<'
2107 && (p[2] == '!' || p[2] == '='))
2108 break;
2109 if (*p == NUL)
2110 EMSG_RET_NULL(_("E65: Illegal back reference"));
2111 }
2112 ret = regnode(BACKREF + refnum);
2113 }
2114 break;
2115
Bram Moolenaar071d4272004-06-13 20:20:40 +00002116 case Magic('z'):
2117 {
2118 c = no_Magic(getchr());
2119 switch (c)
2120 {
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002121#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +00002122 case '(': if (reg_do_extmatch != REX_SET)
2123 EMSG_RET_NULL(_("E66: \\z( not allowed here"));
2124 if (one_exactly)
2125 EMSG_ONE_RET_NULL;
2126 ret = reg(REG_ZPAREN, &flags);
2127 if (ret == NULL)
2128 return NULL;
2129 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2130 re_has_z = REX_SET;
2131 break;
2132
2133 case '1':
2134 case '2':
2135 case '3':
2136 case '4':
2137 case '5':
2138 case '6':
2139 case '7':
2140 case '8':
2141 case '9': if (reg_do_extmatch != REX_USE)
2142 EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
2143 ret = regnode(ZREF + c - '0');
2144 re_has_z = REX_USE;
2145 break;
Bram Moolenaarc4956c82006-03-12 21:58:43 +00002146#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00002147
2148 case 's': ret = regnode(MOPEN + 0);
2149 break;
2150
2151 case 'e': ret = regnode(MCLOSE + 0);
2152 break;
2153
2154 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2155 }
2156 }
2157 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002158
2159 case Magic('%'):
2160 {
2161 c = no_Magic(getchr());
2162 switch (c)
2163 {
2164 /* () without a back reference */
2165 case '(':
2166 if (one_exactly)
2167 EMSG_ONE_RET_NULL;
2168 ret = reg(REG_NPAREN, &flags);
2169 if (ret == NULL)
2170 return NULL;
2171 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2172 break;
2173
2174 /* Catch \%^ and \%$ regardless of where they appear in the
2175 * pattern -- regardless of whether or not it makes sense. */
2176 case '^':
2177 ret = regnode(RE_BOF);
2178 break;
2179
2180 case '$':
2181 ret = regnode(RE_EOF);
2182 break;
2183
2184 case '#':
2185 ret = regnode(CURSOR);
2186 break;
2187
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002188 case 'V':
2189 ret = regnode(RE_VISUAL);
2190 break;
2191
Bram Moolenaar071d4272004-06-13 20:20:40 +00002192 /* \%[abc]: Emit as a list of branches, all ending at the last
2193 * branch which matches nothing. */
2194 case '[':
2195 if (one_exactly) /* doesn't nest */
2196 EMSG_ONE_RET_NULL;
2197 {
2198 char_u *lastbranch;
2199 char_u *lastnode = NULL;
2200 char_u *br;
2201
2202 ret = NULL;
2203 while ((c = getchr()) != ']')
2204 {
2205 if (c == NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002206 EMSG2_RET_NULL(_("E69: Missing ] after %s%%["),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002207 reg_magic == MAGIC_ALL);
2208 br = regnode(BRANCH);
2209 if (ret == NULL)
2210 ret = br;
2211 else
2212 regtail(lastnode, br);
2213
2214 ungetchr();
2215 one_exactly = TRUE;
2216 lastnode = regatom(flagp);
2217 one_exactly = FALSE;
2218 if (lastnode == NULL)
2219 return NULL;
2220 }
2221 if (ret == NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002222 EMSG2_RET_NULL(_("E70: Empty %s%%[]"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002223 reg_magic == MAGIC_ALL);
2224 lastbranch = regnode(BRANCH);
2225 br = regnode(NOTHING);
2226 if (ret != JUST_CALC_SIZE)
2227 {
2228 regtail(lastnode, br);
2229 regtail(lastbranch, br);
2230 /* connect all branches to the NOTHING
2231 * branch at the end */
2232 for (br = ret; br != lastnode; )
2233 {
2234 if (OP(br) == BRANCH)
2235 {
2236 regtail(br, lastbranch);
2237 br = OPERAND(br);
2238 }
2239 else
2240 br = regnext(br);
2241 }
2242 }
Bram Moolenaara6404a42008-08-08 11:45:39 +00002243 *flagp &= ~(HASWIDTH | SIMPLE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002244 break;
2245 }
2246
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002247 case 'd': /* %d123 decimal */
2248 case 'o': /* %o123 octal */
2249 case 'x': /* %xab hex 2 */
2250 case 'u': /* %uabcd hex 4 */
2251 case 'U': /* %U1234abcd hex 8 */
2252 {
2253 int i;
2254
2255 switch (c)
2256 {
2257 case 'd': i = getdecchrs(); break;
2258 case 'o': i = getoctchrs(); break;
2259 case 'x': i = gethexchrs(2); break;
2260 case 'u': i = gethexchrs(4); break;
2261 case 'U': i = gethexchrs(8); break;
2262 default: i = -1; break;
2263 }
2264
2265 if (i < 0)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002266 EMSG2_RET_NULL(
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002267 _("E678: Invalid character after %s%%[dxouU]"),
2268 reg_magic == MAGIC_ALL);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002269#ifdef FEAT_MBYTE
2270 if (use_multibytecode(i))
2271 ret = regnode(MULTIBYTECODE);
2272 else
2273#endif
2274 ret = regnode(EXACTLY);
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002275 if (i == 0)
2276 regc(0x0a);
2277 else
2278#ifdef FEAT_MBYTE
2279 regmbc(i);
2280#else
2281 regc(i);
2282#endif
2283 regc(NUL);
2284 *flagp |= HASWIDTH;
2285 break;
2286 }
2287
Bram Moolenaar071d4272004-06-13 20:20:40 +00002288 default:
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002289 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2290 || c == '\'')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002291 {
2292 long_u n = 0;
2293 int cmp;
2294
2295 cmp = c;
2296 if (cmp == '<' || cmp == '>')
2297 c = getchr();
2298 while (VIM_ISDIGIT(c))
2299 {
2300 n = n * 10 + (c - '0');
2301 c = getchr();
2302 }
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00002303 if (c == '\'' && n == 0)
2304 {
2305 /* "\%'m", "\%<'m" and "\%>'m": Mark */
2306 c = getchr();
2307 ret = regnode(RE_MARK);
2308 if (ret == JUST_CALC_SIZE)
2309 regsize += 2;
2310 else
2311 {
2312 *regcode++ = c;
2313 *regcode++ = cmp;
2314 }
2315 break;
2316 }
2317 else if (c == 'l' || c == 'c' || c == 'v')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002318 {
2319 if (c == 'l')
2320 ret = regnode(RE_LNUM);
2321 else if (c == 'c')
2322 ret = regnode(RE_COL);
2323 else
2324 ret = regnode(RE_VCOL);
2325 if (ret == JUST_CALC_SIZE)
2326 regsize += 5;
2327 else
2328 {
2329 /* put the number and the optional
2330 * comparator after the opcode */
2331 regcode = re_put_long(regcode, n);
2332 *regcode++ = cmp;
2333 }
2334 break;
2335 }
2336 }
2337
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002338 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
Bram Moolenaar071d4272004-06-13 20:20:40 +00002339 reg_magic == MAGIC_ALL);
2340 }
2341 }
2342 break;
2343
2344 case Magic('['):
2345collection:
2346 {
2347 char_u *lp;
2348
2349 /*
2350 * If there is no matching ']', we assume the '[' is a normal
2351 * character. This makes 'incsearch' and ":help [" work.
2352 */
2353 lp = skip_anyof(regparse);
2354 if (*lp == ']') /* there is a matching ']' */
2355 {
2356 int startc = -1; /* > 0 when next '-' is a range */
2357 int endc;
2358
2359 /*
2360 * In a character class, different parsing rules apply.
2361 * Not even \ is special anymore, nothing is.
2362 */
2363 if (*regparse == '^') /* Complement of range. */
2364 {
2365 ret = regnode(ANYBUT + extra);
2366 regparse++;
2367 }
2368 else
2369 ret = regnode(ANYOF + extra);
2370
2371 /* At the start ']' and '-' mean the literal character. */
2372 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002373 {
2374 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002375 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002376 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002377
2378 while (*regparse != NUL && *regparse != ']')
2379 {
2380 if (*regparse == '-')
2381 {
2382 ++regparse;
2383 /* The '-' is not used for a range at the end and
2384 * after or before a '\n'. */
2385 if (*regparse == ']' || *regparse == NUL
2386 || startc == -1
2387 || (regparse[0] == '\\' && regparse[1] == 'n'))
2388 {
2389 regc('-');
2390 startc = '-'; /* [--x] is a range */
2391 }
2392 else
2393 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002394 /* Also accept "a-[.z.]" */
2395 endc = 0;
2396 if (*regparse == '[')
2397 endc = get_coll_element(&regparse);
2398 if (endc == 0)
2399 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002400#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002401 if (has_mbyte)
2402 endc = mb_ptr2char_adv(&regparse);
2403 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002404#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002405 endc = *regparse++;
2406 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002407
2408 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002409 if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002410 endc = coll_get_char();
2411
Bram Moolenaar071d4272004-06-13 20:20:40 +00002412 if (startc > endc)
2413 EMSG_RET_NULL(_(e_invrange));
2414#ifdef FEAT_MBYTE
2415 if (has_mbyte && ((*mb_char2len)(startc) > 1
2416 || (*mb_char2len)(endc) > 1))
2417 {
2418 /* Limit to a range of 256 chars */
2419 if (endc > startc + 256)
2420 EMSG_RET_NULL(_(e_invrange));
2421 while (++startc <= endc)
2422 regmbc(startc);
2423 }
2424 else
2425#endif
2426 {
2427#ifdef EBCDIC
2428 int alpha_only = FALSE;
2429
2430 /* for alphabetical range skip the gaps
2431 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2432 if (isalpha(startc) && isalpha(endc))
2433 alpha_only = TRUE;
2434#endif
2435 while (++startc <= endc)
2436#ifdef EBCDIC
2437 if (!alpha_only || isalpha(startc))
2438#endif
2439 regc(startc);
2440 }
2441 startc = -1;
2442 }
2443 }
2444 /*
2445 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2446 * accepts "\t", "\e", etc., but only when the 'l' flag in
2447 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002448 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002449 */
2450 else if (*regparse == '\\'
Bram Moolenaardf177f62005-02-22 08:39:57 +00002451 && !cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002452 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2453 || (!cpo_lit
2454 && vim_strchr(REGEXP_ABBR,
2455 regparse[1]) != NULL)))
2456 {
2457 regparse++;
2458 if (*regparse == 'n')
2459 {
2460 /* '\n' in range: also match NL */
2461 if (ret != JUST_CALC_SIZE)
2462 {
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002463 /* Using \n inside [^] does not change what
2464 * matches. "[^\n]" is the same as ".". */
2465 if (*ret == ANYOF)
2466 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002467 *ret = ANYOF + ADD_NL;
Bram Moolenaare337e5f2013-01-30 18:21:51 +01002468 *flagp |= HASNL;
2469 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002470 /* else: must have had a \n already */
2471 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002472 regparse++;
2473 startc = -1;
2474 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002475 else if (*regparse == 'd'
2476 || *regparse == 'o'
2477 || *regparse == 'x'
2478 || *regparse == 'u'
2479 || *regparse == 'U')
2480 {
2481 startc = coll_get_char();
2482 if (startc == 0)
2483 regc(0x0a);
2484 else
2485#ifdef FEAT_MBYTE
2486 regmbc(startc);
2487#else
2488 regc(startc);
2489#endif
2490 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002491 else
2492 {
2493 startc = backslash_trans(*regparse++);
2494 regc(startc);
2495 }
2496 }
2497 else if (*regparse == '[')
2498 {
2499 int c_class;
2500 int cu;
2501
Bram Moolenaardf177f62005-02-22 08:39:57 +00002502 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002503 startc = -1;
2504 /* Characters assumed to be 8 bits! */
2505 switch (c_class)
2506 {
2507 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002508 c_class = get_equi_class(&regparse);
2509 if (c_class != 0)
2510 {
2511 /* produce equivalence class */
2512 reg_equi_class(c_class);
2513 }
2514 else if ((c_class =
2515 get_coll_element(&regparse)) != 0)
2516 {
2517 /* produce a collating element */
2518 regmbc(c_class);
2519 }
2520 else
2521 {
2522 /* literal '[', allow [[-x] as a range */
2523 startc = *regparse++;
2524 regc(startc);
2525 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002526 break;
2527 case CLASS_ALNUM:
2528 for (cu = 1; cu <= 255; cu++)
2529 if (isalnum(cu))
2530 regc(cu);
2531 break;
2532 case CLASS_ALPHA:
2533 for (cu = 1; cu <= 255; cu++)
2534 if (isalpha(cu))
2535 regc(cu);
2536 break;
2537 case CLASS_BLANK:
2538 regc(' ');
2539 regc('\t');
2540 break;
2541 case CLASS_CNTRL:
2542 for (cu = 1; cu <= 255; cu++)
2543 if (iscntrl(cu))
2544 regc(cu);
2545 break;
2546 case CLASS_DIGIT:
2547 for (cu = 1; cu <= 255; cu++)
2548 if (VIM_ISDIGIT(cu))
2549 regc(cu);
2550 break;
2551 case CLASS_GRAPH:
2552 for (cu = 1; cu <= 255; cu++)
2553 if (isgraph(cu))
2554 regc(cu);
2555 break;
2556 case CLASS_LOWER:
2557 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002558 if (MB_ISLOWER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002559 regc(cu);
2560 break;
2561 case CLASS_PRINT:
2562 for (cu = 1; cu <= 255; cu++)
2563 if (vim_isprintc(cu))
2564 regc(cu);
2565 break;
2566 case CLASS_PUNCT:
2567 for (cu = 1; cu <= 255; cu++)
2568 if (ispunct(cu))
2569 regc(cu);
2570 break;
2571 case CLASS_SPACE:
2572 for (cu = 9; cu <= 13; cu++)
2573 regc(cu);
2574 regc(' ');
2575 break;
2576 case CLASS_UPPER:
2577 for (cu = 1; cu <= 255; cu++)
Bram Moolenaara245a5b2007-08-11 11:58:23 +00002578 if (MB_ISUPPER(cu))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002579 regc(cu);
2580 break;
2581 case CLASS_XDIGIT:
2582 for (cu = 1; cu <= 255; cu++)
2583 if (vim_isxdigit(cu))
2584 regc(cu);
2585 break;
2586 case CLASS_TAB:
2587 regc('\t');
2588 break;
2589 case CLASS_RETURN:
2590 regc('\r');
2591 break;
2592 case CLASS_BACKSPACE:
2593 regc('\b');
2594 break;
2595 case CLASS_ESCAPE:
2596 regc('\033');
2597 break;
2598 }
2599 }
2600 else
2601 {
2602#ifdef FEAT_MBYTE
2603 if (has_mbyte)
2604 {
2605 int len;
2606
2607 /* produce a multibyte character, including any
2608 * following composing characters */
2609 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002610 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002611 if (enc_utf8 && utf_char2len(startc) != len)
2612 startc = -1; /* composing chars */
2613 while (--len >= 0)
2614 regc(*regparse++);
2615 }
2616 else
2617#endif
2618 {
2619 startc = *regparse++;
2620 regc(startc);
2621 }
2622 }
2623 }
2624 regc(NUL);
2625 prevchr_len = 1; /* last char was the ']' */
2626 if (*regparse != ']')
2627 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2628 skipchr(); /* let's be friends with the lexer again */
2629 *flagp |= HASWIDTH | SIMPLE;
2630 break;
2631 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002632 else if (reg_strict)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002633 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002634 }
2635 /* FALLTHROUGH */
2636
2637 default:
2638 {
2639 int len;
2640
2641#ifdef FEAT_MBYTE
2642 /* A multi-byte character is handled as a separate atom if it's
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002643 * before a multi and when it's a composing char. */
2644 if (use_multibytecode(c))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002645 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002646do_multibyte:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002647 ret = regnode(MULTIBYTECODE);
2648 regmbc(c);
2649 *flagp |= HASWIDTH | SIMPLE;
2650 break;
2651 }
2652#endif
2653
2654 ret = regnode(EXACTLY);
2655
2656 /*
2657 * Append characters as long as:
2658 * - there is no following multi, we then need the character in
2659 * front of it as a single character operand
2660 * - not running into a Magic character
2661 * - "one_exactly" is not set
2662 * But always emit at least one character. Might be a Multi,
2663 * e.g., a "[" without matching "]".
2664 */
2665 for (len = 0; c != NUL && (len == 0
2666 || (re_multi_type(peekchr()) == NOT_MULTI
2667 && !one_exactly
2668 && !is_Magic(c))); ++len)
2669 {
2670 c = no_Magic(c);
2671#ifdef FEAT_MBYTE
2672 if (has_mbyte)
2673 {
2674 regmbc(c);
2675 if (enc_utf8)
2676 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002677 int l;
2678
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002679 /* Need to get composing character too. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002680 for (;;)
2681 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002682 l = utf_ptr2len(regparse);
2683 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002684 break;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002685 regmbc(utf_ptr2char(regparse));
2686 skipchr();
Bram Moolenaar071d4272004-06-13 20:20:40 +00002687 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002688 }
2689 }
2690 else
2691#endif
2692 regc(c);
2693 c = getchr();
2694 }
2695 ungetchr();
2696
2697 regc(NUL);
2698 *flagp |= HASWIDTH;
2699 if (len == 1)
2700 *flagp |= SIMPLE;
2701 }
2702 break;
2703 }
2704
2705 return ret;
2706}
2707
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002708#ifdef FEAT_MBYTE
2709/*
2710 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2711 * character "c".
2712 */
2713 static int
2714use_multibytecode(c)
2715 int c;
2716{
2717 return has_mbyte && (*mb_char2len)(c) > 1
2718 && (re_multi_type(peekchr()) != NOT_MULTI
2719 || (enc_utf8 && utf_iscomposing(c)));
2720}
2721#endif
2722
Bram Moolenaar071d4272004-06-13 20:20:40 +00002723/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002724 * Emit a node.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002725 * Return pointer to generated code.
2726 */
2727 static char_u *
2728regnode(op)
2729 int op;
2730{
2731 char_u *ret;
2732
2733 ret = regcode;
2734 if (ret == JUST_CALC_SIZE)
2735 regsize += 3;
2736 else
2737 {
2738 *regcode++ = op;
2739 *regcode++ = NUL; /* Null "next" pointer. */
2740 *regcode++ = NUL;
2741 }
2742 return ret;
2743}
2744
2745/*
2746 * Emit (if appropriate) a byte of code
2747 */
2748 static void
2749regc(b)
2750 int b;
2751{
2752 if (regcode == JUST_CALC_SIZE)
2753 regsize++;
2754 else
2755 *regcode++ = b;
2756}
2757
2758#ifdef FEAT_MBYTE
2759/*
2760 * Emit (if appropriate) a multi-byte character of code
2761 */
2762 static void
2763regmbc(c)
2764 int c;
2765{
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002766 if (!has_mbyte && c > 0xff)
2767 return;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002768 if (regcode == JUST_CALC_SIZE)
2769 regsize += (*mb_char2len)(c);
2770 else
2771 regcode += (*mb_char2bytes)(c, regcode);
2772}
2773#endif
2774
2775/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002776 * Insert an operator in front of already-emitted operand
Bram Moolenaar071d4272004-06-13 20:20:40 +00002777 *
2778 * Means relocating the operand.
2779 */
2780 static void
2781reginsert(op, opnd)
2782 int op;
2783 char_u *opnd;
2784{
2785 char_u *src;
2786 char_u *dst;
2787 char_u *place;
2788
2789 if (regcode == JUST_CALC_SIZE)
2790 {
2791 regsize += 3;
2792 return;
2793 }
2794 src = regcode;
2795 regcode += 3;
2796 dst = regcode;
2797 while (src > opnd)
2798 *--dst = *--src;
2799
2800 place = opnd; /* Op node, where operand used to be. */
2801 *place++ = op;
2802 *place++ = NUL;
2803 *place = NUL;
2804}
2805
2806/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002807 * Insert an operator in front of already-emitted operand.
Bram Moolenaar75eb1612013-05-29 18:45:11 +02002808 * Add a number to the operator.
2809 */
2810 static void
2811reginsert_nr(op, val, opnd)
2812 int op;
2813 long val;
2814 char_u *opnd;
2815{
2816 char_u *src;
2817 char_u *dst;
2818 char_u *place;
2819
2820 if (regcode == JUST_CALC_SIZE)
2821 {
2822 regsize += 7;
2823 return;
2824 }
2825 src = regcode;
2826 regcode += 7;
2827 dst = regcode;
2828 while (src > opnd)
2829 *--dst = *--src;
2830
2831 place = opnd; /* Op node, where operand used to be. */
2832 *place++ = op;
2833 *place++ = NUL;
2834 *place++ = NUL;
2835 place = re_put_long(place, (long_u)val);
2836}
2837
2838/*
2839 * Insert an operator in front of already-emitted operand.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002840 * The operator has the given limit values as operands. Also set next pointer.
2841 *
2842 * Means relocating the operand.
2843 */
2844 static void
2845reginsert_limits(op, minval, maxval, opnd)
2846 int op;
2847 long minval;
2848 long maxval;
2849 char_u *opnd;
2850{
2851 char_u *src;
2852 char_u *dst;
2853 char_u *place;
2854
2855 if (regcode == JUST_CALC_SIZE)
2856 {
2857 regsize += 11;
2858 return;
2859 }
2860 src = regcode;
2861 regcode += 11;
2862 dst = regcode;
2863 while (src > opnd)
2864 *--dst = *--src;
2865
2866 place = opnd; /* Op node, where operand used to be. */
2867 *place++ = op;
2868 *place++ = NUL;
2869 *place++ = NUL;
2870 place = re_put_long(place, (long_u)minval);
2871 place = re_put_long(place, (long_u)maxval);
2872 regtail(opnd, place);
2873}
2874
2875/*
2876 * Write a long as four bytes at "p" and return pointer to the next char.
2877 */
2878 static char_u *
2879re_put_long(p, val)
2880 char_u *p;
2881 long_u val;
2882{
2883 *p++ = (char_u) ((val >> 24) & 0377);
2884 *p++ = (char_u) ((val >> 16) & 0377);
2885 *p++ = (char_u) ((val >> 8) & 0377);
2886 *p++ = (char_u) (val & 0377);
2887 return p;
2888}
2889
2890/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002891 * Set the next-pointer at the end of a node chain.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002892 */
2893 static void
2894regtail(p, val)
2895 char_u *p;
2896 char_u *val;
2897{
2898 char_u *scan;
2899 char_u *temp;
2900 int offset;
2901
2902 if (p == JUST_CALC_SIZE)
2903 return;
2904
2905 /* Find last node. */
2906 scan = p;
2907 for (;;)
2908 {
2909 temp = regnext(scan);
2910 if (temp == NULL)
2911 break;
2912 scan = temp;
2913 }
2914
Bram Moolenaar582fd852005-03-28 20:58:01 +00002915 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002916 offset = (int)(scan - val);
2917 else
2918 offset = (int)(val - scan);
Bram Moolenaard3005802009-11-25 17:21:32 +00002919 /* When the offset uses more than 16 bits it can no longer fit in the two
Bram Moolenaar522f9ae2011-07-20 17:58:20 +02002920 * bytes available. Use a global flag to avoid having to check return
Bram Moolenaard3005802009-11-25 17:21:32 +00002921 * values in too many places. */
2922 if (offset > 0xffff)
2923 reg_toolong = TRUE;
2924 else
2925 {
2926 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2927 *(scan + 2) = (char_u) (offset & 0377);
2928 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002929}
2930
2931/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002932 * Like regtail, on item after a BRANCH; nop if none.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002933 */
2934 static void
2935regoptail(p, val)
2936 char_u *p;
2937 char_u *val;
2938{
2939 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2940 if (p == NULL || p == JUST_CALC_SIZE
2941 || (OP(p) != BRANCH
2942 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2943 return;
2944 regtail(OPERAND(p), val);
2945}
2946
2947/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002948 * Functions for getting characters from the regexp input.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002949 */
2950
Bram Moolenaar071d4272004-06-13 20:20:40 +00002951static int at_start; /* True when on the first character */
2952static int prev_at_start; /* True when on the second character */
2953
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002954/*
2955 * Start parsing at "str".
2956 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00002957 static void
2958initchr(str)
2959 char_u *str;
2960{
2961 regparse = str;
2962 prevchr_len = 0;
2963 curchr = prevprevchr = prevchr = nextchr = -1;
2964 at_start = TRUE;
2965 prev_at_start = FALSE;
2966}
2967
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02002968/*
Bram Moolenaar3737fc12013-06-01 14:42:56 +02002969 * Save the current parse state, so that it can be restored and parsing
2970 * starts in the same state again.
2971 */
2972 static void
2973save_parse_state(ps)
2974 parse_state_T *ps;
2975{
2976 ps->regparse = regparse;
2977 ps->prevchr_len = prevchr_len;
2978 ps->curchr = curchr;
2979 ps->prevchr = prevchr;
2980 ps->prevprevchr = prevprevchr;
2981 ps->nextchr = nextchr;
2982 ps->at_start = at_start;
2983 ps->prev_at_start = prev_at_start;
2984 ps->regnpar = regnpar;
2985}
2986
2987/*
2988 * Restore a previously saved parse state.
2989 */
2990 static void
2991restore_parse_state(ps)
2992 parse_state_T *ps;
2993{
2994 regparse = ps->regparse;
2995 prevchr_len = ps->prevchr_len;
2996 curchr = ps->curchr;
2997 prevchr = ps->prevchr;
2998 prevprevchr = ps->prevprevchr;
2999 nextchr = ps->nextchr;
3000 at_start = ps->at_start;
3001 prev_at_start = ps->prev_at_start;
3002 regnpar = ps->regnpar;
3003}
3004
3005
3006/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003007 * Get the next character without advancing.
3008 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003009 static int
3010peekchr()
3011{
Bram Moolenaardf177f62005-02-22 08:39:57 +00003012 static int after_slash = FALSE;
3013
Bram Moolenaar071d4272004-06-13 20:20:40 +00003014 if (curchr == -1)
3015 {
3016 switch (curchr = regparse[0])
3017 {
3018 case '.':
3019 case '[':
3020 case '~':
3021 /* magic when 'magic' is on */
3022 if (reg_magic >= MAGIC_ON)
3023 curchr = Magic(curchr);
3024 break;
3025 case '(':
3026 case ')':
3027 case '{':
3028 case '%':
3029 case '+':
3030 case '=':
3031 case '?':
3032 case '@':
3033 case '!':
3034 case '&':
3035 case '|':
3036 case '<':
3037 case '>':
3038 case '#': /* future ext. */
3039 case '"': /* future ext. */
3040 case '\'': /* future ext. */
3041 case ',': /* future ext. */
3042 case '-': /* future ext. */
3043 case ':': /* future ext. */
3044 case ';': /* future ext. */
3045 case '`': /* future ext. */
3046 case '/': /* Can't be used in / command */
3047 /* magic only after "\v" */
3048 if (reg_magic == MAGIC_ALL)
3049 curchr = Magic(curchr);
3050 break;
3051 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00003052 /* * is not magic as the very first character, eg "?*ptr", when
3053 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
3054 * "\(\*" is not magic, thus must be magic if "after_slash" */
3055 if (reg_magic >= MAGIC_ON
3056 && !at_start
3057 && !(prev_at_start && prevchr == Magic('^'))
3058 && (after_slash
3059 || (prevchr != Magic('(')
3060 && prevchr != Magic('&')
3061 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003062 curchr = Magic('*');
3063 break;
3064 case '^':
3065 /* '^' is only magic as the very first character and if it's after
3066 * "\(", "\|", "\&' or "\n" */
3067 if (reg_magic >= MAGIC_OFF
3068 && (at_start
3069 || reg_magic == MAGIC_ALL
3070 || prevchr == Magic('(')
3071 || prevchr == Magic('|')
3072 || prevchr == Magic('&')
3073 || prevchr == Magic('n')
3074 || (no_Magic(prevchr) == '('
3075 && prevprevchr == Magic('%'))))
3076 {
3077 curchr = Magic('^');
3078 at_start = TRUE;
3079 prev_at_start = FALSE;
3080 }
3081 break;
3082 case '$':
3083 /* '$' is only magic as the very last char and if it's in front of
3084 * either "\|", "\)", "\&", or "\n" */
3085 if (reg_magic >= MAGIC_OFF)
3086 {
3087 char_u *p = regparse + 1;
3088
3089 /* ignore \c \C \m and \M after '$' */
3090 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
3091 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
3092 p += 2;
3093 if (p[0] == NUL
3094 || (p[0] == '\\'
3095 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3096 || p[1] == 'n'))
3097 || reg_magic == MAGIC_ALL)
3098 curchr = Magic('$');
3099 }
3100 break;
3101 case '\\':
3102 {
3103 int c = regparse[1];
3104
3105 if (c == NUL)
3106 curchr = '\\'; /* trailing '\' */
3107 else if (
3108#ifdef EBCDIC
3109 vim_strchr(META, c)
3110#else
3111 c <= '~' && META_flags[c]
3112#endif
3113 )
3114 {
3115 /*
3116 * META contains everything that may be magic sometimes,
3117 * except ^ and $ ("\^" and "\$" are only magic after
3118 * "\v"). We now fetch the next character and toggle its
3119 * magicness. Therefore, \ is so meta-magic that it is
3120 * not in META.
3121 */
3122 curchr = -1;
3123 prev_at_start = at_start;
3124 at_start = FALSE; /* be able to say "/\*ptr" */
3125 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003126 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003127 peekchr();
3128 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00003129 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003130 curchr = toggle_Magic(curchr);
3131 }
3132 else if (vim_strchr(REGEXP_ABBR, c))
3133 {
3134 /*
3135 * Handle abbreviations, like "\t" for TAB -- webb
3136 */
3137 curchr = backslash_trans(c);
3138 }
3139 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3140 curchr = toggle_Magic(c);
3141 else
3142 {
3143 /*
3144 * Next character can never be (made) magic?
3145 * Then backslashing it won't do anything.
3146 */
3147#ifdef FEAT_MBYTE
3148 if (has_mbyte)
3149 curchr = (*mb_ptr2char)(regparse + 1);
3150 else
3151#endif
3152 curchr = c;
3153 }
3154 break;
3155 }
3156
3157#ifdef FEAT_MBYTE
3158 default:
3159 if (has_mbyte)
3160 curchr = (*mb_ptr2char)(regparse);
3161#endif
3162 }
3163 }
3164
3165 return curchr;
3166}
3167
3168/*
3169 * Eat one lexed character. Do this in a way that we can undo it.
3170 */
3171 static void
3172skipchr()
3173{
3174 /* peekchr() eats a backslash, do the same here */
3175 if (*regparse == '\\')
3176 prevchr_len = 1;
3177 else
3178 prevchr_len = 0;
3179 if (regparse[prevchr_len] != NUL)
3180 {
3181#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003182 if (enc_utf8)
Bram Moolenaar8f5c5782007-11-29 20:27:21 +00003183 /* exclude composing chars that mb_ptr2len does include */
3184 prevchr_len += utf_ptr2len(regparse + prevchr_len);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003185 else if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003186 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003187 else
3188#endif
3189 ++prevchr_len;
3190 }
3191 regparse += prevchr_len;
3192 prev_at_start = at_start;
3193 at_start = FALSE;
3194 prevprevchr = prevchr;
3195 prevchr = curchr;
3196 curchr = nextchr; /* use previously unget char, or -1 */
3197 nextchr = -1;
3198}
3199
3200/*
3201 * Skip a character while keeping the value of prev_at_start for at_start.
3202 * prevchr and prevprevchr are also kept.
3203 */
3204 static void
3205skipchr_keepstart()
3206{
3207 int as = prev_at_start;
3208 int pr = prevchr;
3209 int prpr = prevprevchr;
3210
3211 skipchr();
3212 at_start = as;
3213 prevchr = pr;
3214 prevprevchr = prpr;
3215}
3216
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003217/*
3218 * Get the next character from the pattern. We know about magic and such, so
3219 * therefore we need a lexical analyzer.
3220 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003221 static int
3222getchr()
3223{
3224 int chr = peekchr();
3225
3226 skipchr();
3227 return chr;
3228}
3229
3230/*
3231 * put character back. Works only once!
3232 */
3233 static void
3234ungetchr()
3235{
3236 nextchr = curchr;
3237 curchr = prevchr;
3238 prevchr = prevprevchr;
3239 at_start = prev_at_start;
3240 prev_at_start = FALSE;
3241
3242 /* Backup regparse, so that it's at the same position as before the
3243 * getchr(). */
3244 regparse -= prevchr_len;
3245}
3246
3247/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00003248 * Get and return the value of the hex string at the current position.
3249 * Return -1 if there is no valid hex number.
3250 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003251 * blahblah\%x20asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003252 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003253 * The parameter controls the maximum number of input characters. This will be
3254 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3255 */
3256 static int
3257gethexchrs(maxinputlen)
3258 int maxinputlen;
3259{
3260 int nr = 0;
3261 int c;
3262 int i;
3263
3264 for (i = 0; i < maxinputlen; ++i)
3265 {
3266 c = regparse[0];
3267 if (!vim_isxdigit(c))
3268 break;
3269 nr <<= 4;
3270 nr |= hex2nr(c);
3271 ++regparse;
3272 }
3273
3274 if (i == 0)
3275 return -1;
3276 return nr;
3277}
3278
3279/*
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003280 * Get and return the value of the decimal string immediately after the
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003281 * current position. Return -1 for invalid. Consumes all digits.
3282 */
3283 static int
3284getdecchrs()
3285{
3286 int nr = 0;
3287 int c;
3288 int i;
3289
3290 for (i = 0; ; ++i)
3291 {
3292 c = regparse[0];
3293 if (c < '0' || c > '9')
3294 break;
3295 nr *= 10;
3296 nr += c - '0';
3297 ++regparse;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02003298 curchr = -1; /* no longer valid */
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003299 }
3300
3301 if (i == 0)
3302 return -1;
3303 return nr;
3304}
3305
3306/*
3307 * get and return the value of the octal string immediately after the current
3308 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3309 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3310 * treat 8 or 9 as recognised characters. Position is updated:
3311 * blahblah\%o210asdf
Bram Moolenaarc9b4b052006-04-30 18:54:39 +00003312 * before-^ ^-after
Bram Moolenaarc0197e22004-09-13 20:26:32 +00003313 */
3314 static int
3315getoctchrs()
3316{
3317 int nr = 0;
3318 int c;
3319 int i;
3320
3321 for (i = 0; i < 3 && nr < 040; ++i)
3322 {
3323 c = regparse[0];
3324 if (c < '0' || c > '7')
3325 break;
3326 nr <<= 3;
3327 nr |= hex2nr(c);
3328 ++regparse;
3329 }
3330
3331 if (i == 0)
3332 return -1;
3333 return nr;
3334}
3335
3336/*
3337 * Get a number after a backslash that is inside [].
3338 * When nothing is recognized return a backslash.
3339 */
3340 static int
3341coll_get_char()
3342{
3343 int nr = -1;
3344
3345 switch (*regparse++)
3346 {
3347 case 'd': nr = getdecchrs(); break;
3348 case 'o': nr = getoctchrs(); break;
3349 case 'x': nr = gethexchrs(2); break;
3350 case 'u': nr = gethexchrs(4); break;
3351 case 'U': nr = gethexchrs(8); break;
3352 }
3353 if (nr < 0)
3354 {
3355 /* If getting the number fails be backwards compatible: the character
3356 * is a backslash. */
3357 --regparse;
3358 nr = '\\';
3359 }
3360 return nr;
3361}
3362
3363/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003364 * read_limits - Read two integers to be taken as a minimum and maximum.
3365 * If the first character is '-', then the range is reversed.
3366 * Should end with 'end'. If minval is missing, zero is default, if maxval is
3367 * missing, a very big number is the default.
3368 */
3369 static int
3370read_limits(minval, maxval)
3371 long *minval;
3372 long *maxval;
3373{
3374 int reverse = FALSE;
3375 char_u *first_char;
3376 long tmp;
3377
3378 if (*regparse == '-')
3379 {
3380 /* Starts with '-', so reverse the range later */
3381 regparse++;
3382 reverse = TRUE;
3383 }
3384 first_char = regparse;
3385 *minval = getdigits(&regparse);
3386 if (*regparse == ',') /* There is a comma */
3387 {
3388 if (vim_isdigit(*++regparse))
3389 *maxval = getdigits(&regparse);
3390 else
3391 *maxval = MAX_LIMIT;
3392 }
3393 else if (VIM_ISDIGIT(*first_char))
3394 *maxval = *minval; /* It was \{n} or \{-n} */
3395 else
3396 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
3397 if (*regparse == '\\')
3398 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00003399 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00003400 {
3401 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3402 reg_magic == MAGIC_ALL ? "" : "\\");
3403 EMSG_RET_FAIL(IObuff);
3404 }
3405
3406 /*
3407 * Reverse the range if there was a '-', or make sure it is in the right
3408 * order otherwise.
3409 */
3410 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3411 {
3412 tmp = *minval;
3413 *minval = *maxval;
3414 *maxval = tmp;
3415 }
3416 skipchr(); /* let's be friends with the lexer again */
3417 return OK;
3418}
3419
3420/*
3421 * vim_regexec and friends
3422 */
3423
3424/*
3425 * Global work variables for vim_regexec().
3426 */
3427
3428/* The current match-position is remembered with these variables: */
3429static linenr_T reglnum; /* line number, relative to first line */
3430static char_u *regline; /* start of current line */
3431static char_u *reginput; /* current input, points into "regline" */
3432
3433static int need_clear_subexpr; /* subexpressions still need to be
3434 * cleared */
3435#ifdef FEAT_SYN_HL
3436static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
3437 * still need to be cleared */
3438#endif
3439
Bram Moolenaar071d4272004-06-13 20:20:40 +00003440/*
3441 * Structure used to save the current input state, when it needs to be
3442 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00003443 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00003444 */
3445typedef struct
3446{
3447 union
3448 {
3449 char_u *ptr; /* reginput pointer, for single-line regexp */
3450 lpos_T pos; /* reginput pos, for multi-line regexp */
3451 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003452 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003453} regsave_T;
3454
3455/* struct to save start/end pointer/position in for \(\) */
3456typedef struct
3457{
3458 union
3459 {
3460 char_u *ptr;
3461 lpos_T pos;
3462 } se_u;
3463} save_se_T;
3464
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003465/* used for BEHIND and NOBEHIND matching */
3466typedef struct regbehind_S
3467{
3468 regsave_T save_after;
3469 regsave_T save_behind;
Bram Moolenaarfde483c2008-06-15 12:21:50 +00003470 int save_need_clear_subexpr;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003471 save_se_T save_start[NSUBEXP];
3472 save_se_T save_end[NSUBEXP];
3473} regbehind_T;
3474
Bram Moolenaar071d4272004-06-13 20:20:40 +00003475static char_u *reg_getline __ARGS((linenr_T lnum));
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003476static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3477static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003478static void cleanup_subexpr __ARGS((void));
3479#ifdef FEAT_SYN_HL
3480static void cleanup_zsubexpr __ARGS((void));
3481#endif
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003482static void save_subexpr __ARGS((regbehind_T *bp));
3483static void restore_subexpr __ARGS((regbehind_T *bp));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003484static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003485static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3486static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003487static int reg_save_equal __ARGS((regsave_T *save));
3488static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3489static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3490
3491/* Save the sub-expressions before attempting a match. */
3492#define save_se(savep, posp, pp) \
3493 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3494
3495/* After a failed match restore the sub-expressions. */
3496#define restore_se(savep, posp, pp) { \
3497 if (REG_MULTI) \
3498 *(posp) = (savep)->se_u.pos; \
3499 else \
3500 *(pp) = (savep)->se_u.ptr; }
3501
3502static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003503static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003504static int regrepeat __ARGS((char_u *p, long maxcount));
3505
3506#ifdef DEBUG
3507int regnarrate = 0;
3508#endif
3509
3510/*
3511 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3512 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3513 * contains '\c' or '\C' the value is overruled.
3514 */
3515static int ireg_ic;
3516
3517#ifdef FEAT_MBYTE
3518/*
3519 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3520 * in the regexp. Defaults to false, always.
3521 */
3522static int ireg_icombine;
3523#endif
3524
3525/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003526 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3527 * there is no maximum.
3528 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003529static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003530
3531/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003532 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3533 * slow, we keep one allocated piece of memory and only re-allocate it when
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003534 * it's too small. It's freed in bt_regexec_both() when finished.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003535 */
Bram Moolenaard4210772008-01-02 14:35:30 +00003536static char_u *reg_tofree = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003537static unsigned reg_tofreelen;
3538
3539/*
3540 * These variables are set when executing a regexp to speed up the execution.
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00003541 * Which ones are set depends on whether a single-line or multi-line match is
Bram Moolenaar071d4272004-06-13 20:20:40 +00003542 * done:
3543 * single-line multi-line
3544 * reg_match &regmatch_T NULL
3545 * reg_mmatch NULL &regmmatch_T
3546 * reg_startp reg_match->startp <invalid>
3547 * reg_endp reg_match->endp <invalid>
3548 * reg_startpos <invalid> reg_mmatch->startpos
3549 * reg_endpos <invalid> reg_mmatch->endpos
3550 * reg_win NULL window in which to search
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003551 * reg_buf curbuf buffer in which to search
Bram Moolenaar071d4272004-06-13 20:20:40 +00003552 * reg_firstlnum <invalid> first line in which to search
3553 * reg_maxline 0 last line nr
3554 * reg_line_lbr FALSE or TRUE FALSE
3555 */
3556static regmatch_T *reg_match;
3557static regmmatch_T *reg_mmatch;
3558static char_u **reg_startp = NULL;
3559static char_u **reg_endp = NULL;
3560static lpos_T *reg_startpos = NULL;
3561static lpos_T *reg_endpos = NULL;
3562static win_T *reg_win;
3563static buf_T *reg_buf;
3564static linenr_T reg_firstlnum;
3565static linenr_T reg_maxline;
3566static int reg_line_lbr; /* "\n" in string is line break */
3567
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003568/* Values for rs_state in regitem_T. */
3569typedef enum regstate_E
3570{
3571 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3572 , RS_MOPEN /* MOPEN + [0-9] */
3573 , RS_MCLOSE /* MCLOSE + [0-9] */
3574#ifdef FEAT_SYN_HL
3575 , RS_ZOPEN /* ZOPEN + [0-9] */
3576 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3577#endif
3578 , RS_BRANCH /* BRANCH */
3579 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3580 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3581 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3582 , RS_NOMATCH /* NOMATCH */
3583 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3584 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3585 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3586 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3587} regstate_T;
3588
3589/*
3590 * When there are alternatives a regstate_T is put on the regstack to remember
3591 * what we are doing.
3592 * Before it may be another type of item, depending on rs_state, to remember
3593 * more things.
3594 */
3595typedef struct regitem_S
3596{
3597 regstate_T rs_state; /* what we are doing, one of RS_ above */
3598 char_u *rs_scan; /* current node in program */
3599 union
3600 {
3601 save_se_T sesave;
3602 regsave_T regsave;
3603 } rs_un; /* room for saving reginput */
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00003604 short rs_no; /* submatch nr or BEHIND/NOBEHIND */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003605} regitem_T;
3606
3607static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3608static void regstack_pop __ARGS((char_u **scan));
3609
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003610/* used for STAR, PLUS and BRACE_SIMPLE matching */
3611typedef struct regstar_S
3612{
3613 int nextb; /* next byte */
3614 int nextb_ic; /* next byte reverse case */
3615 long count;
3616 long minval;
3617 long maxval;
3618} regstar_T;
3619
3620/* used to store input position when a BACK was encountered, so that we now if
3621 * we made any progress since the last time. */
3622typedef struct backpos_S
3623{
3624 char_u *bp_scan; /* "scan" where BACK was encountered */
3625 regsave_T bp_pos; /* last input position */
3626} backpos_T;
3627
3628/*
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003629 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
3630 * to avoid invoking malloc() and free() often.
3631 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3632 * or regbehind_T.
3633 * "backpos_T" is a table with backpos_T for BACK
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003634 */
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003635static garray_T regstack = {0, 0, 0, 0, NULL};
3636static garray_T backpos = {0, 0, 0, 0, NULL};
3637
3638/*
3639 * Both for regstack and backpos tables we use the following strategy of
3640 * allocation (to reduce malloc/free calls):
3641 * - Initial size is fairly small.
3642 * - When needed, the tables are grown bigger (8 times at first, double after
3643 * that).
3644 * - After executing the match we free the memory only if the array has grown.
3645 * Thus the memory is kept allocated when it's at the initial size.
3646 * This makes it fast while not keeping a lot of memory allocated.
3647 * A three times speed increase was observed when using many simple patterns.
3648 */
3649#define REGSTACK_INITIAL 2048
3650#define BACKPOS_INITIAL 64
3651
3652#if defined(EXITFREE) || defined(PROTO)
3653 void
3654free_regexp_stuff()
3655{
3656 ga_clear(&regstack);
3657 ga_clear(&backpos);
3658 vim_free(reg_tofree);
3659 vim_free(reg_prev_sub);
3660}
3661#endif
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003662
Bram Moolenaar071d4272004-06-13 20:20:40 +00003663/*
3664 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3665 */
3666 static char_u *
3667reg_getline(lnum)
3668 linenr_T lnum;
3669{
3670 /* when looking behind for a match/no-match lnum is negative. But we
3671 * can't go before line 1 */
3672 if (reg_firstlnum + lnum < 1)
3673 return NULL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00003674 if (lnum > reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003675 /* Must have matched the "\n" in the last line. */
3676 return (char_u *)"";
Bram Moolenaar071d4272004-06-13 20:20:40 +00003677 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3678}
3679
3680static regsave_T behind_pos;
3681
3682#ifdef FEAT_SYN_HL
3683static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3684static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3685static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3686static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3687#endif
3688
3689/* TRUE if using multi-line regexp. */
3690#define REG_MULTI (reg_match == NULL)
3691
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003692static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
3693
Bram Moolenaar071d4272004-06-13 20:20:40 +00003694/*
3695 * Match a regexp against a string.
3696 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3697 * Uses curbuf for line count and 'iskeyword'.
3698 *
3699 * Return TRUE if there is a match, FALSE if not.
3700 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003701 static int
3702bt_regexec(rmp, line, col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003703 regmatch_T *rmp;
3704 char_u *line; /* string to match against */
3705 colnr_T col; /* column to start looking for match */
3706{
3707 reg_match = rmp;
3708 reg_mmatch = NULL;
3709 reg_maxline = 0;
3710 reg_line_lbr = FALSE;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003711 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003712 reg_win = NULL;
3713 ireg_ic = rmp->rm_ic;
3714#ifdef FEAT_MBYTE
3715 ireg_icombine = FALSE;
3716#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003717 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003718 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003719}
3720
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003721#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3722 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003723
3724static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
3725
Bram Moolenaar071d4272004-06-13 20:20:40 +00003726/*
3727 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3728 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003729 static int
3730bt_regexec_nl(rmp, line, col)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003731 regmatch_T *rmp;
3732 char_u *line; /* string to match against */
3733 colnr_T col; /* column to start looking for match */
3734{
3735 reg_match = rmp;
3736 reg_mmatch = NULL;
3737 reg_maxline = 0;
3738 reg_line_lbr = TRUE;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01003739 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003740 reg_win = NULL;
3741 ireg_ic = rmp->rm_ic;
3742#ifdef FEAT_MBYTE
3743 ireg_icombine = FALSE;
3744#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003745 ireg_maxcol = 0;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003746 return (bt_regexec_both(line, col, NULL) != 0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003747}
3748#endif
3749
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003750static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
3751
Bram Moolenaar071d4272004-06-13 20:20:40 +00003752/*
3753 * Match a regexp against multiple lines.
3754 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3755 * Uses curbuf for line count and 'iskeyword'.
3756 *
3757 * Return zero if there is no match. Return number of lines contained in the
3758 * match otherwise.
3759 */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003760 static long
3761bt_regexec_multi(rmp, win, buf, lnum, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003762 regmmatch_T *rmp;
3763 win_T *win; /* window in which to search or NULL */
3764 buf_T *buf; /* buffer in which to search */
3765 linenr_T lnum; /* nr of line to start looking for match */
3766 colnr_T col; /* column to start looking for match */
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003767 proftime_T *tm; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003768{
3769 long r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003770
3771 reg_match = NULL;
3772 reg_mmatch = rmp;
3773 reg_buf = buf;
3774 reg_win = win;
3775 reg_firstlnum = lnum;
3776 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3777 reg_line_lbr = FALSE;
3778 ireg_ic = rmp->rmm_ic;
3779#ifdef FEAT_MBYTE
3780 ireg_icombine = FALSE;
3781#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003782 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003783
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003784 r = bt_regexec_both(NULL, col, tm);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003785
3786 return r;
3787}
3788
3789/*
3790 * Match a regexp against a string ("line" points to the string) or multiple
3791 * lines ("line" is NULL, use reg_getline()).
3792 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003793 static long
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003794bt_regexec_both(line, col, tm)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003795 char_u *line;
3796 colnr_T col; /* column to start looking for match */
Bram Moolenaar78a15312009-05-15 19:33:18 +00003797 proftime_T *tm UNUSED; /* timeout limit or NULL */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003798{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003799 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003800 char_u *s;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003801 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003802
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003803 /* Create "regstack" and "backpos" if they are not allocated yet.
3804 * We allocate *_INITIAL amount of bytes first and then set the grow size
3805 * to much bigger value to avoid many malloc calls in case of deep regular
3806 * expressions. */
3807 if (regstack.ga_data == NULL)
3808 {
3809 /* Use an item size of 1 byte, since we push different things
3810 * onto the regstack. */
3811 ga_init2(&regstack, 1, REGSTACK_INITIAL);
3812 ga_grow(&regstack, REGSTACK_INITIAL);
3813 regstack.ga_growsize = REGSTACK_INITIAL * 8;
3814 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003815
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00003816 if (backpos.ga_data == NULL)
3817 {
3818 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3819 ga_grow(&backpos, BACKPOS_INITIAL);
3820 backpos.ga_growsize = BACKPOS_INITIAL * 8;
3821 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003822
Bram Moolenaar071d4272004-06-13 20:20:40 +00003823 if (REG_MULTI)
3824 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003825 prog = (bt_regprog_T *)reg_mmatch->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003826 line = reg_getline((linenr_T)0);
3827 reg_startpos = reg_mmatch->startpos;
3828 reg_endpos = reg_mmatch->endpos;
3829 }
3830 else
3831 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02003832 prog = (bt_regprog_T *)reg_match->regprog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003833 reg_startp = reg_match->startp;
3834 reg_endp = reg_match->endp;
3835 }
3836
3837 /* Be paranoid... */
3838 if (prog == NULL || line == NULL)
3839 {
3840 EMSG(_(e_null));
3841 goto theend;
3842 }
3843
3844 /* Check validity of program. */
3845 if (prog_magic_wrong())
3846 goto theend;
3847
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003848 /* If the start column is past the maximum column: no need to try. */
3849 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3850 goto theend;
3851
Bram Moolenaar071d4272004-06-13 20:20:40 +00003852 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3853 if (prog->regflags & RF_ICASE)
3854 ireg_ic = TRUE;
3855 else if (prog->regflags & RF_NOICASE)
3856 ireg_ic = FALSE;
3857
3858#ifdef FEAT_MBYTE
3859 /* If pattern contains "\Z" overrule value of ireg_icombine */
3860 if (prog->regflags & RF_ICOMBINE)
3861 ireg_icombine = TRUE;
3862#endif
3863
3864 /* If there is a "must appear" string, look for it. */
3865 if (prog->regmust != NULL)
3866 {
3867 int c;
3868
3869#ifdef FEAT_MBYTE
3870 if (has_mbyte)
3871 c = (*mb_ptr2char)(prog->regmust);
3872 else
3873#endif
3874 c = *prog->regmust;
3875 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003876
3877 /*
3878 * This is used very often, esp. for ":global". Use three versions of
3879 * the loop to avoid overhead of conditions.
3880 */
3881 if (!ireg_ic
3882#ifdef FEAT_MBYTE
3883 && !has_mbyte
3884#endif
3885 )
3886 while ((s = vim_strbyte(s, c)) != NULL)
3887 {
3888 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3889 break; /* Found it. */
3890 ++s;
3891 }
3892#ifdef FEAT_MBYTE
3893 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3894 while ((s = vim_strchr(s, c)) != NULL)
3895 {
3896 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3897 break; /* Found it. */
3898 mb_ptr_adv(s);
3899 }
3900#endif
3901 else
3902 while ((s = cstrchr(s, c)) != NULL)
3903 {
3904 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3905 break; /* Found it. */
3906 mb_ptr_adv(s);
3907 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003908 if (s == NULL) /* Not present. */
3909 goto theend;
3910 }
3911
3912 regline = line;
3913 reglnum = 0;
Bram Moolenaar73a92fe2010-09-14 10:55:47 +02003914 reg_toolong = FALSE;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003915
3916 /* Simplest case: Anchored match need be tried only once. */
3917 if (prog->reganch)
3918 {
3919 int c;
3920
3921#ifdef FEAT_MBYTE
3922 if (has_mbyte)
3923 c = (*mb_ptr2char)(regline + col);
3924 else
3925#endif
3926 c = regline[col];
3927 if (prog->regstart == NUL
3928 || prog->regstart == c
3929 || (ireg_ic && ((
3930#ifdef FEAT_MBYTE
3931 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3932 || (c < 255 && prog->regstart < 255 &&
3933#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00003934 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00003935 retval = regtry(prog, col);
3936 else
3937 retval = 0;
3938 }
3939 else
3940 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003941#ifdef FEAT_RELTIME
3942 int tm_count = 0;
3943#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00003944 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003945 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003946 {
3947 if (prog->regstart != NUL)
3948 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003949 /* Skip until the char we know it must start with.
3950 * Used often, do some work to avoid call overhead. */
3951 if (!ireg_ic
3952#ifdef FEAT_MBYTE
3953 && !has_mbyte
3954#endif
3955 )
3956 s = vim_strbyte(regline + col, prog->regstart);
3957 else
3958 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003959 if (s == NULL)
3960 {
3961 retval = 0;
3962 break;
3963 }
3964 col = (int)(s - regline);
3965 }
3966
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003967 /* Check for maximum column to try. */
3968 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3969 {
3970 retval = 0;
3971 break;
3972 }
3973
Bram Moolenaar071d4272004-06-13 20:20:40 +00003974 retval = regtry(prog, col);
3975 if (retval > 0)
3976 break;
3977
3978 /* if not currently on the first line, get it again */
3979 if (reglnum != 0)
3980 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003981 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003982 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003983 }
3984 if (regline[col] == NUL)
3985 break;
3986#ifdef FEAT_MBYTE
3987 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003988 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003989 else
3990#endif
3991 ++col;
Bram Moolenaar91a4e822008-01-19 14:59:58 +00003992#ifdef FEAT_RELTIME
3993 /* Check for timeout once in a twenty times to avoid overhead. */
3994 if (tm != NULL && ++tm_count == 20)
3995 {
3996 tm_count = 0;
3997 if (profile_passed_limit(tm))
3998 break;
3999 }
4000#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00004001 }
4002 }
4003
Bram Moolenaar071d4272004-06-13 20:20:40 +00004004theend:
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004005 /* Free "reg_tofree" when it's a bit big.
4006 * Free regstack and backpos if they are bigger than their initial size. */
4007 if (reg_tofreelen > 400)
4008 {
4009 vim_free(reg_tofree);
4010 reg_tofree = NULL;
4011 }
4012 if (regstack.ga_maxlen > REGSTACK_INITIAL)
4013 ga_clear(&regstack);
4014 if (backpos.ga_maxlen > BACKPOS_INITIAL)
4015 ga_clear(&backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004016
Bram Moolenaar071d4272004-06-13 20:20:40 +00004017 return retval;
4018}
4019
4020#ifdef FEAT_SYN_HL
4021static reg_extmatch_T *make_extmatch __ARGS((void));
4022
4023/*
4024 * Create a new extmatch and mark it as referenced once.
4025 */
4026 static reg_extmatch_T *
4027make_extmatch()
4028{
4029 reg_extmatch_T *em;
4030
4031 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4032 if (em != NULL)
4033 em->refcnt = 1;
4034 return em;
4035}
4036
4037/*
4038 * Add a reference to an extmatch.
4039 */
4040 reg_extmatch_T *
4041ref_extmatch(em)
4042 reg_extmatch_T *em;
4043{
4044 if (em != NULL)
4045 em->refcnt++;
4046 return em;
4047}
4048
4049/*
4050 * Remove a reference to an extmatch. If there are no references left, free
4051 * the info.
4052 */
4053 void
4054unref_extmatch(em)
4055 reg_extmatch_T *em;
4056{
4057 int i;
4058
4059 if (em != NULL && --em->refcnt <= 0)
4060 {
4061 for (i = 0; i < NSUBEXP; ++i)
4062 vim_free(em->matches[i]);
4063 vim_free(em);
4064 }
4065}
4066#endif
4067
4068/*
4069 * regtry - try match of "prog" with at regline["col"].
4070 * Returns 0 for failure, number of lines contained in the match otherwise.
4071 */
4072 static long
4073regtry(prog, col)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004074 bt_regprog_T *prog;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004075 colnr_T col;
4076{
4077 reginput = regline + col;
4078 need_clear_subexpr = TRUE;
4079#ifdef FEAT_SYN_HL
4080 /* Clear the external match subpointers if necessary. */
4081 if (prog->reghasz == REX_SET)
4082 need_clear_zsubexpr = TRUE;
4083#endif
4084
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004085 if (regmatch(prog->program + 1) == 0)
4086 return 0;
4087
4088 cleanup_subexpr();
4089 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004090 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004091 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004092 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004093 reg_startpos[0].lnum = 0;
4094 reg_startpos[0].col = col;
4095 }
4096 if (reg_endpos[0].lnum < 0)
4097 {
4098 reg_endpos[0].lnum = reglnum;
4099 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004100 }
4101 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004102 /* Use line number of "\ze". */
4103 reglnum = reg_endpos[0].lnum;
4104 }
4105 else
4106 {
4107 if (reg_startp[0] == NULL)
4108 reg_startp[0] = regline + col;
4109 if (reg_endp[0] == NULL)
4110 reg_endp[0] = reginput;
4111 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004112#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004113 /* Package any found \z(...\) matches for export. Default is none. */
4114 unref_extmatch(re_extmatch_out);
4115 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004116
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004117 if (prog->reghasz == REX_SET)
4118 {
4119 int i;
4120
4121 cleanup_zsubexpr();
4122 re_extmatch_out = make_extmatch();
4123 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004124 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004125 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004126 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004127 /* Only accept single line matches. */
4128 if (reg_startzpos[i].lnum >= 0
4129 && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
4130 re_extmatch_out->matches[i] =
4131 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004132 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004133 reg_endzpos[i].col - reg_startzpos[i].col);
4134 }
4135 else
4136 {
4137 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4138 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00004139 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004140 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004141 }
4142 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004143 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004144#endif
4145 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004146}
4147
4148#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00004149static int reg_prev_class __ARGS((void));
4150
Bram Moolenaar071d4272004-06-13 20:20:40 +00004151/*
4152 * Get class of previous character.
4153 */
4154 static int
4155reg_prev_class()
4156{
4157 if (reginput > regline)
Bram Moolenaarf813a182013-01-30 13:59:37 +01004158 return mb_get_class_buf(reginput - 1
4159 - (*mb_head_off)(regline, reginput - 1), reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004160 return -1;
4161}
4162
Bram Moolenaar071d4272004-06-13 20:20:40 +00004163#endif
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00004164#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004165
4166/*
4167 * The arguments from BRACE_LIMITS are stored here. They are actually local
4168 * to regmatch(), but they are here to reduce the amount of stack space used
4169 * (it can be called recursively many times).
4170 */
4171static long bl_minval;
4172static long bl_maxval;
4173
4174/*
4175 * regmatch - main matching routine
4176 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004177 * Conceptually the strategy is simple: Check to see whether the current node
4178 * matches, push an item onto the regstack and loop to see whether the rest
4179 * matches, and then act accordingly. In practice we make some effort to
4180 * avoid using the regstack, in particular by going through "ordinary" nodes
4181 * (that don't need to know whether the rest of the match failed) by a nested
4182 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004183 *
4184 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
4185 * the last matched character.
4186 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
4187 * undefined state!
4188 */
4189 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004190regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004191 char_u *scan; /* Current node. */
4192{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004193 char_u *next; /* Next node. */
4194 int op;
4195 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004196 regitem_T *rp;
4197 int no;
4198 int status; /* one of the RA_ values: */
4199#define RA_FAIL 1 /* something failed, abort */
4200#define RA_CONT 2 /* continue in inner loop */
4201#define RA_BREAK 3 /* break inner loop */
4202#define RA_MATCH 4 /* successful match */
4203#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004204
Bram Moolenaar4bad6c82008-01-18 19:37:23 +00004205 /* Make "regstack" and "backpos" empty. They are allocated and freed in
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004206 * bt_regexec_both() to reduce malloc()/free() calls. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004207 regstack.ga_len = 0;
4208 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004209
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004210 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004211 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004212 */
4213 for (;;)
4214 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004215 /* Some patterns may cause a long time to match, even though they are not
Bram Moolenaar071d4272004-06-13 20:20:40 +00004216 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
4217 fast_breakcheck();
4218
4219#ifdef DEBUG
4220 if (scan != NULL && regnarrate)
4221 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004222 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004223 mch_errmsg("(\n");
4224 }
4225#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004226
4227 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00004228 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004229 * regstack.
4230 */
4231 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004232 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004233 if (got_int || scan == NULL)
4234 {
4235 status = RA_FAIL;
4236 break;
4237 }
4238 status = RA_CONT;
4239
Bram Moolenaar071d4272004-06-13 20:20:40 +00004240#ifdef DEBUG
4241 if (regnarrate)
4242 {
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004243 mch_errmsg((char *)regprop(scan));
Bram Moolenaar071d4272004-06-13 20:20:40 +00004244 mch_errmsg("...\n");
4245# ifdef FEAT_SYN_HL
4246 if (re_extmatch_in != NULL)
4247 {
4248 int i;
4249
4250 mch_errmsg(_("External submatches:\n"));
4251 for (i = 0; i < NSUBEXP; i++)
4252 {
4253 mch_errmsg(" \"");
4254 if (re_extmatch_in->matches[i] != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02004255 mch_errmsg((char *)re_extmatch_in->matches[i]);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004256 mch_errmsg("\"\n");
4257 }
4258 }
4259# endif
4260 }
4261#endif
4262 next = regnext(scan);
4263
4264 op = OP(scan);
4265 /* Check for character class with NL added. */
Bram Moolenaar640009d2006-10-17 16:48:26 +00004266 if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
4267 && *reginput == NUL && reglnum <= reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004268 {
4269 reg_nextline();
4270 }
4271 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
4272 {
4273 ADVANCE_REGINPUT();
4274 }
4275 else
4276 {
4277 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004278 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004279#ifdef FEAT_MBYTE
4280 if (has_mbyte)
4281 c = (*mb_ptr2char)(reginput);
4282 else
4283#endif
4284 c = *reginput;
4285 switch (op)
4286 {
4287 case BOL:
4288 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004289 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004290 break;
4291
4292 case EOL:
4293 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004294 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004295 break;
4296
4297 case RE_BOF:
Bram Moolenaara7139332007-12-09 18:26:22 +00004298 /* We're not at the beginning of the file when below the first
4299 * line where we started, not at the start of the line or we
4300 * didn't start at the first line of the buffer. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004301 if (reglnum != 0 || reginput != regline
Bram Moolenaara7139332007-12-09 18:26:22 +00004302 || (REG_MULTI && reg_firstlnum > 1))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004303 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004304 break;
4305
4306 case RE_EOF:
4307 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004308 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004309 break;
4310
4311 case CURSOR:
4312 /* Check if the buffer is in a window and compare the
4313 * reg_win->w_cursor position to the match position. */
4314 if (reg_win == NULL
4315 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
4316 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004317 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004318 break;
4319
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004320 case RE_MARK:
4321 /* Compare the mark position to the match position. NOTE: Always
4322 * uses the current buffer. */
4323 {
4324 int mark = OPERAND(scan)[0];
4325 int cmp = OPERAND(scan)[1];
4326 pos_T *pos;
4327
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004328 pos = getmark_buf(reg_buf, mark, FALSE);
Bram Moolenaare9400a42007-05-06 13:04:32 +00004329 if (pos == NULL /* mark doesn't exist */
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004330 || pos->lnum <= 0 /* mark isn't set (in curbuf) */
4331 || (pos->lnum == reglnum + reg_firstlnum
4332 ? (pos->col == (colnr_T)(reginput - regline)
4333 ? (cmp == '<' || cmp == '>')
4334 : (pos->col < (colnr_T)(reginput - regline)
4335 ? cmp != '>'
4336 : cmp != '<'))
4337 : (pos->lnum < reglnum + reg_firstlnum
4338 ? cmp != '>'
4339 : cmp != '<')))
4340 status = RA_NOMATCH;
4341 }
4342 break;
4343
4344 case RE_VISUAL:
4345#ifdef FEAT_VISUAL
4346 /* Check if the buffer is the current buffer. and whether the
4347 * position is inside the Visual area. */
4348 if (reg_buf != curbuf || VIsual.lnum == 0)
4349 status = RA_NOMATCH;
4350 else
4351 {
4352 pos_T top, bot;
4353 linenr_T lnum;
4354 colnr_T col;
4355 win_T *wp = reg_win == NULL ? curwin : reg_win;
4356 int mode;
4357
4358 if (VIsual_active)
4359 {
4360 if (lt(VIsual, wp->w_cursor))
4361 {
4362 top = VIsual;
4363 bot = wp->w_cursor;
4364 }
4365 else
4366 {
4367 top = wp->w_cursor;
4368 bot = VIsual;
4369 }
4370 mode = VIsual_mode;
4371 }
4372 else
4373 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004374 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004375 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004376 top = curbuf->b_visual.vi_start;
4377 bot = curbuf->b_visual.vi_end;
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004378 }
4379 else
4380 {
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004381 top = curbuf->b_visual.vi_end;
4382 bot = curbuf->b_visual.vi_start;
Bram Moolenaar32466aa2006-02-24 23:53:04 +00004383 }
Bram Moolenaara23ccb82006-02-27 00:08:02 +00004384 mode = curbuf->b_visual.vi_mode;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004385 }
4386 lnum = reglnum + reg_firstlnum;
4387 col = (colnr_T)(reginput - regline);
4388 if (lnum < top.lnum || lnum > bot.lnum)
4389 status = RA_NOMATCH;
4390 else if (mode == 'v')
4391 {
4392 if ((lnum == top.lnum && col < top.col)
4393 || (lnum == bot.lnum
4394 && col >= bot.col + (*p_sel != 'e')))
4395 status = RA_NOMATCH;
4396 }
4397 else if (mode == Ctrl_V)
4398 {
4399 colnr_T start, end;
4400 colnr_T start2, end2;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004401 colnr_T cols;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004402
4403 getvvcol(wp, &top, &start, NULL, &end);
4404 getvvcol(wp, &bot, &start2, NULL, &end2);
4405 if (start2 < start)
4406 start = start2;
4407 if (end2 > end)
4408 end = end2;
4409 if (top.col == MAXCOL || bot.col == MAXCOL)
4410 end = MAXCOL;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004411 cols = win_linetabsize(wp,
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004412 regline, (colnr_T)(reginput - regline));
Bram Moolenaar89d40322006-08-29 15:30:07 +00004413 if (cols < start || cols > end - (*p_sel == 'e'))
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00004414 status = RA_NOMATCH;
4415 }
4416 }
4417#else
4418 status = RA_NOMATCH;
4419#endif
4420 break;
4421
Bram Moolenaar071d4272004-06-13 20:20:40 +00004422 case RE_LNUM:
4423 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4424 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004425 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004426 break;
4427
4428 case RE_COL:
4429 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004430 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004431 break;
4432
4433 case RE_VCOL:
4434 if (!re_num_cmp((long_u)win_linetabsize(
4435 reg_win == NULL ? curwin : reg_win,
4436 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004437 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004438 break;
4439
4440 case BOW: /* \<word; reginput points to w */
4441 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004442 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004443#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004444 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004445 {
4446 int this_class;
4447
4448 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004449 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004450 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004451 status = RA_NOMATCH; /* not on a word at all */
4452 else if (reg_prev_class() == this_class)
4453 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004454 }
4455#endif
4456 else
4457 {
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01004458 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline
4459 && vim_iswordc_buf(reginput[-1], reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004460 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004461 }
4462 break;
4463
4464 case EOW: /* word\>; reginput points after d */
4465 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004466 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004467#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004468 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004469 {
4470 int this_class, prev_class;
4471
4472 /* Get class of current and previous char (if it exists). */
Bram Moolenaarf813a182013-01-30 13:59:37 +01004473 this_class = mb_get_class_buf(reginput, reg_buf);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004474 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004475 if (this_class == prev_class
4476 || prev_class == 0 || prev_class == 1)
4477 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004478 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004479#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004480 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004481 {
Bram Moolenaar9d182dd2013-01-23 15:53:15 +01004482 if (!vim_iswordc_buf(reginput[-1], reg_buf)
4483 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004484 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004485 }
4486 break; /* Matched with EOW */
4487
4488 case ANY:
Bram Moolenaare337e5f2013-01-30 18:21:51 +01004489 /* ANY does not match new lines. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004490 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004491 status = RA_NOMATCH;
4492 else
4493 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004494 break;
4495
4496 case IDENT:
4497 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004498 status = RA_NOMATCH;
4499 else
4500 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004501 break;
4502
4503 case SIDENT:
4504 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004505 status = RA_NOMATCH;
4506 else
4507 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004508 break;
4509
4510 case KWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004511 if (!vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004512 status = RA_NOMATCH;
4513 else
4514 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004515 break;
4516
4517 case SKWORD:
Bram Moolenaarf813a182013-01-30 13:59:37 +01004518 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004519 status = RA_NOMATCH;
4520 else
4521 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004522 break;
4523
4524 case FNAME:
4525 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004526 status = RA_NOMATCH;
4527 else
4528 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004529 break;
4530
4531 case SFNAME:
4532 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004533 status = RA_NOMATCH;
4534 else
4535 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004536 break;
4537
4538 case PRINT:
4539 if (ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004540 status = RA_NOMATCH;
4541 else
4542 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004543 break;
4544
4545 case SPRINT:
4546 if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004547 status = RA_NOMATCH;
4548 else
4549 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004550 break;
4551
4552 case WHITE:
4553 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004554 status = RA_NOMATCH;
4555 else
4556 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004557 break;
4558
4559 case NWHITE:
4560 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004561 status = RA_NOMATCH;
4562 else
4563 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004564 break;
4565
4566 case DIGIT:
4567 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004568 status = RA_NOMATCH;
4569 else
4570 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004571 break;
4572
4573 case NDIGIT:
4574 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004575 status = RA_NOMATCH;
4576 else
4577 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004578 break;
4579
4580 case HEX:
4581 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004582 status = RA_NOMATCH;
4583 else
4584 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004585 break;
4586
4587 case NHEX:
4588 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004589 status = RA_NOMATCH;
4590 else
4591 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004592 break;
4593
4594 case OCTAL:
4595 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004596 status = RA_NOMATCH;
4597 else
4598 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004599 break;
4600
4601 case NOCTAL:
4602 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004603 status = RA_NOMATCH;
4604 else
4605 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004606 break;
4607
4608 case WORD:
4609 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004610 status = RA_NOMATCH;
4611 else
4612 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004613 break;
4614
4615 case NWORD:
4616 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004617 status = RA_NOMATCH;
4618 else
4619 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004620 break;
4621
4622 case HEAD:
4623 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004624 status = RA_NOMATCH;
4625 else
4626 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004627 break;
4628
4629 case NHEAD:
4630 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004631 status = RA_NOMATCH;
4632 else
4633 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004634 break;
4635
4636 case ALPHA:
4637 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004638 status = RA_NOMATCH;
4639 else
4640 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004641 break;
4642
4643 case NALPHA:
4644 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004645 status = RA_NOMATCH;
4646 else
4647 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004648 break;
4649
4650 case LOWER:
4651 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004652 status = RA_NOMATCH;
4653 else
4654 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004655 break;
4656
4657 case NLOWER:
4658 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004659 status = RA_NOMATCH;
4660 else
4661 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004662 break;
4663
4664 case UPPER:
4665 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004666 status = RA_NOMATCH;
4667 else
4668 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004669 break;
4670
4671 case NUPPER:
4672 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004673 status = RA_NOMATCH;
4674 else
4675 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004676 break;
4677
4678 case EXACTLY:
4679 {
4680 int len;
4681 char_u *opnd;
4682
4683 opnd = OPERAND(scan);
4684 /* Inline the first byte, for speed. */
4685 if (*opnd != *reginput
4686 && (!ireg_ic || (
4687#ifdef FEAT_MBYTE
4688 !enc_utf8 &&
4689#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00004690 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004691 status = RA_NOMATCH;
4692 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004693 {
4694 /* match empty string always works; happens when "~" is
4695 * empty. */
4696 }
4697 else if (opnd[1] == NUL
4698#ifdef FEAT_MBYTE
4699 && !(enc_utf8 && ireg_ic)
4700#endif
4701 )
4702 ++reginput; /* matched a single char */
4703 else
4704 {
4705 len = (int)STRLEN(opnd);
4706 /* Need to match first byte again for multi-byte. */
4707 if (cstrncmp(opnd, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004708 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004709#ifdef FEAT_MBYTE
4710 /* Check for following composing character. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004711 else if (enc_utf8
4712 && UTF_COMPOSINGLIKE(reginput, reginput + len))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004713 {
4714 /* raaron: This code makes a composing character get
4715 * ignored, which is the correct behavior (sometimes)
4716 * for voweled Hebrew texts. */
4717 if (!ireg_icombine)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004718 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004719 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004720#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004721 else
4722 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004723 }
4724 }
4725 break;
4726
4727 case ANYOF:
4728 case ANYBUT:
4729 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004730 status = RA_NOMATCH;
4731 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4732 status = RA_NOMATCH;
4733 else
4734 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004735 break;
4736
4737#ifdef FEAT_MBYTE
4738 case MULTIBYTECODE:
4739 if (has_mbyte)
4740 {
4741 int i, len;
4742 char_u *opnd;
Bram Moolenaar89d40322006-08-29 15:30:07 +00004743 int opndc = 0, inpc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004744
4745 opnd = OPERAND(scan);
4746 /* Safety check (just in case 'encoding' was changed since
4747 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004748 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004749 {
4750 status = RA_NOMATCH;
4751 break;
4752 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004753 if (enc_utf8)
4754 opndc = mb_ptr2char(opnd);
4755 if (enc_utf8 && utf_iscomposing(opndc))
4756 {
4757 /* When only a composing char is given match at any
4758 * position where that composing char appears. */
4759 status = RA_NOMATCH;
4760 for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004761 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004762 inpc = mb_ptr2char(reginput + i);
4763 if (!utf_iscomposing(inpc))
4764 {
4765 if (i > 0)
4766 break;
4767 }
4768 else if (opndc == inpc)
4769 {
4770 /* Include all following composing chars. */
4771 len = i + mb_ptr2len(reginput + i);
4772 status = RA_MATCH;
4773 break;
4774 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004775 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004776 }
4777 else
4778 for (i = 0; i < len; ++i)
4779 if (opnd[i] != reginput[i])
4780 {
4781 status = RA_NOMATCH;
4782 break;
4783 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004784 reginput += len;
4785 }
4786 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004787 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004788 break;
4789#endif
4790
4791 case NOTHING:
4792 break;
4793
4794 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004795 {
4796 int i;
4797 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004798
Bram Moolenaar582fd852005-03-28 20:58:01 +00004799 /*
4800 * When we run into BACK we need to check if we don't keep
4801 * looping without matching any input. The second and later
4802 * times a BACK is encountered it fails if the input is still
4803 * at the same position as the previous time.
4804 * The positions are stored in "backpos" and found by the
4805 * current value of "scan", the position in the RE program.
4806 */
4807 bp = (backpos_T *)backpos.ga_data;
4808 for (i = 0; i < backpos.ga_len; ++i)
4809 if (bp[i].bp_scan == scan)
4810 break;
4811 if (i == backpos.ga_len)
4812 {
4813 /* First time at this BACK, make room to store the pos. */
4814 if (ga_grow(&backpos, 1) == FAIL)
4815 status = RA_FAIL;
4816 else
4817 {
4818 /* get "ga_data" again, it may have changed */
4819 bp = (backpos_T *)backpos.ga_data;
4820 bp[i].bp_scan = scan;
4821 ++backpos.ga_len;
4822 }
4823 }
4824 else if (reg_save_equal(&bp[i].bp_pos))
4825 /* Still at same position as last time, fail. */
4826 status = RA_NOMATCH;
4827
4828 if (status != RA_FAIL && status != RA_NOMATCH)
4829 reg_save(&bp[i].bp_pos, &backpos);
4830 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004831 break;
4832
Bram Moolenaar071d4272004-06-13 20:20:40 +00004833 case MOPEN + 0: /* Match start: \zs */
4834 case MOPEN + 1: /* \( */
4835 case MOPEN + 2:
4836 case MOPEN + 3:
4837 case MOPEN + 4:
4838 case MOPEN + 5:
4839 case MOPEN + 6:
4840 case MOPEN + 7:
4841 case MOPEN + 8:
4842 case MOPEN + 9:
4843 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004844 no = op - MOPEN;
4845 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004846 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004847 if (rp == NULL)
4848 status = RA_FAIL;
4849 else
4850 {
4851 rp->rs_no = no;
4852 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4853 &reg_startp[no]);
4854 /* We simply continue and handle the result when done. */
4855 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004856 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004857 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004858
4859 case NOPEN: /* \%( */
4860 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004861 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004862 status = RA_FAIL;
4863 /* We simply continue and handle the result when done. */
4864 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004865
4866#ifdef FEAT_SYN_HL
4867 case ZOPEN + 1:
4868 case ZOPEN + 2:
4869 case ZOPEN + 3:
4870 case ZOPEN + 4:
4871 case ZOPEN + 5:
4872 case ZOPEN + 6:
4873 case ZOPEN + 7:
4874 case ZOPEN + 8:
4875 case ZOPEN + 9:
4876 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004877 no = op - ZOPEN;
4878 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004879 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004880 if (rp == NULL)
4881 status = RA_FAIL;
4882 else
4883 {
4884 rp->rs_no = no;
4885 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4886 &reg_startzp[no]);
4887 /* We simply continue and handle the result when done. */
4888 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004889 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004890 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004891#endif
4892
4893 case MCLOSE + 0: /* Match end: \ze */
4894 case MCLOSE + 1: /* \) */
4895 case MCLOSE + 2:
4896 case MCLOSE + 3:
4897 case MCLOSE + 4:
4898 case MCLOSE + 5:
4899 case MCLOSE + 6:
4900 case MCLOSE + 7:
4901 case MCLOSE + 8:
4902 case MCLOSE + 9:
4903 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004904 no = op - MCLOSE;
4905 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004906 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004907 if (rp == NULL)
4908 status = RA_FAIL;
4909 else
4910 {
4911 rp->rs_no = no;
4912 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4913 /* We simply continue and handle the result when done. */
4914 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004915 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004916 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004917
4918#ifdef FEAT_SYN_HL
4919 case ZCLOSE + 1: /* \) after \z( */
4920 case ZCLOSE + 2:
4921 case ZCLOSE + 3:
4922 case ZCLOSE + 4:
4923 case ZCLOSE + 5:
4924 case ZCLOSE + 6:
4925 case ZCLOSE + 7:
4926 case ZCLOSE + 8:
4927 case ZCLOSE + 9:
4928 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004929 no = op - ZCLOSE;
4930 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004931 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004932 if (rp == NULL)
4933 status = RA_FAIL;
4934 else
4935 {
4936 rp->rs_no = no;
4937 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4938 &reg_endzp[no]);
4939 /* We simply continue and handle the result when done. */
4940 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004941 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004942 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004943#endif
4944
4945 case BACKREF + 1:
4946 case BACKREF + 2:
4947 case BACKREF + 3:
4948 case BACKREF + 4:
4949 case BACKREF + 5:
4950 case BACKREF + 6:
4951 case BACKREF + 7:
4952 case BACKREF + 8:
4953 case BACKREF + 9:
4954 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004955 int len;
4956 linenr_T clnum;
4957 colnr_T ccol;
4958 char_u *p;
4959
4960 no = op - BACKREF;
4961 cleanup_subexpr();
4962 if (!REG_MULTI) /* Single-line regexp */
4963 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004964 if (reg_startp[no] == NULL || reg_endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004965 {
4966 /* Backref was not set: Match an empty string. */
4967 len = 0;
4968 }
4969 else
4970 {
4971 /* Compare current input with back-ref in the same
4972 * line. */
4973 len = (int)(reg_endp[no] - reg_startp[no]);
4974 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004975 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004976 }
4977 }
4978 else /* Multi-line regexp */
4979 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00004980 if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004981 {
4982 /* Backref was not set: Match an empty string. */
4983 len = 0;
4984 }
4985 else
4986 {
4987 if (reg_startpos[no].lnum == reglnum
4988 && reg_endpos[no].lnum == reglnum)
4989 {
4990 /* Compare back-ref within the current line. */
4991 len = reg_endpos[no].col - reg_startpos[no].col;
4992 if (cstrncmp(regline + reg_startpos[no].col,
4993 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004994 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004995 }
4996 else
4997 {
4998 /* Messy situation: Need to compare between two
4999 * lines. */
5000 ccol = reg_startpos[no].col;
5001 clnum = reg_startpos[no].lnum;
5002 for (;;)
5003 {
5004 /* Since getting one line may invalidate
5005 * the other, need to make copy. Slow! */
5006 if (regline != reg_tofree)
5007 {
5008 len = (int)STRLEN(regline);
5009 if (reg_tofree == NULL
5010 || len >= (int)reg_tofreelen)
5011 {
5012 len += 50; /* get some extra */
5013 vim_free(reg_tofree);
5014 reg_tofree = alloc(len);
5015 if (reg_tofree == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005016 {
5017 status = RA_FAIL; /* outof memory!*/
5018 break;
5019 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005020 reg_tofreelen = len;
5021 }
5022 STRCPY(reg_tofree, regline);
5023 reginput = reg_tofree
5024 + (reginput - regline);
5025 regline = reg_tofree;
5026 }
5027
5028 /* Get the line to compare with. */
5029 p = reg_getline(clnum);
5030 if (clnum == reg_endpos[no].lnum)
5031 len = reg_endpos[no].col - ccol;
5032 else
5033 len = (int)STRLEN(p + ccol);
5034
5035 if (cstrncmp(p + ccol, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005036 {
5037 status = RA_NOMATCH; /* doesn't match */
5038 break;
5039 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005040 if (clnum == reg_endpos[no].lnum)
5041 break; /* match and at end! */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005042 if (reglnum >= reg_maxline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005043 {
5044 status = RA_NOMATCH; /* text too short */
5045 break;
5046 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005047
5048 /* Advance to next line. */
5049 reg_nextline();
5050 ++clnum;
5051 ccol = 0;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005052 if (got_int)
5053 {
5054 status = RA_FAIL;
5055 break;
5056 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005057 }
5058
5059 /* found a match! Note that regline may now point
5060 * to a copy of the line, that should not matter. */
5061 }
5062 }
5063 }
5064
5065 /* Matched the backref, skip over it. */
5066 reginput += len;
5067 }
5068 break;
5069
5070#ifdef FEAT_SYN_HL
5071 case ZREF + 1:
5072 case ZREF + 2:
5073 case ZREF + 3:
5074 case ZREF + 4:
5075 case ZREF + 5:
5076 case ZREF + 6:
5077 case ZREF + 7:
5078 case ZREF + 8:
5079 case ZREF + 9:
5080 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005081 int len;
5082
5083 cleanup_zsubexpr();
5084 no = op - ZREF;
5085 if (re_extmatch_in != NULL
5086 && re_extmatch_in->matches[no] != NULL)
5087 {
5088 len = (int)STRLEN(re_extmatch_in->matches[no]);
5089 if (cstrncmp(re_extmatch_in->matches[no],
5090 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005091 status = RA_NOMATCH;
5092 else
5093 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005094 }
5095 else
5096 {
5097 /* Backref was not set: Match an empty string. */
5098 }
5099 }
5100 break;
5101#endif
5102
5103 case BRANCH:
5104 {
5105 if (OP(next) != BRANCH) /* No choice. */
5106 next = OPERAND(scan); /* Avoid recursion. */
5107 else
5108 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005109 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005110 if (rp == NULL)
5111 status = RA_FAIL;
5112 else
5113 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005114 }
5115 }
5116 break;
5117
5118 case BRACE_LIMITS:
5119 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005120 if (OP(next) == BRACE_SIMPLE)
5121 {
5122 bl_minval = OPERAND_MIN(scan);
5123 bl_maxval = OPERAND_MAX(scan);
5124 }
5125 else if (OP(next) >= BRACE_COMPLEX
5126 && OP(next) < BRACE_COMPLEX + 10)
5127 {
5128 no = OP(next) - BRACE_COMPLEX;
5129 brace_min[no] = OPERAND_MIN(scan);
5130 brace_max[no] = OPERAND_MAX(scan);
5131 brace_count[no] = 0;
5132 }
5133 else
5134 {
5135 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005136 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005137 }
5138 }
5139 break;
5140
5141 case BRACE_COMPLEX + 0:
5142 case BRACE_COMPLEX + 1:
5143 case BRACE_COMPLEX + 2:
5144 case BRACE_COMPLEX + 3:
5145 case BRACE_COMPLEX + 4:
5146 case BRACE_COMPLEX + 5:
5147 case BRACE_COMPLEX + 6:
5148 case BRACE_COMPLEX + 7:
5149 case BRACE_COMPLEX + 8:
5150 case BRACE_COMPLEX + 9:
5151 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00005152 no = op - BRACE_COMPLEX;
5153 ++brace_count[no];
5154
5155 /* If not matched enough times yet, try one more */
5156 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005157 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005158 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005159 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005160 if (rp == NULL)
5161 status = RA_FAIL;
5162 else
5163 {
5164 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005165 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005166 next = OPERAND(scan);
5167 /* We continue and handle the result when done. */
5168 }
5169 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005170 }
5171
5172 /* If matched enough times, may try matching some more */
5173 if (brace_min[no] <= brace_max[no])
5174 {
5175 /* Range is the normal way around, use longest match */
5176 if (brace_count[no] <= brace_max[no])
5177 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005178 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005179 if (rp == NULL)
5180 status = RA_FAIL;
5181 else
5182 {
5183 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005184 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005185 next = OPERAND(scan);
5186 /* We continue and handle the result when done. */
5187 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005188 }
5189 }
5190 else
5191 {
5192 /* Range is backwards, use shortest match first */
5193 if (brace_count[no] <= brace_min[no])
5194 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005195 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005196 if (rp == NULL)
5197 status = RA_FAIL;
5198 else
5199 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005200 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005201 /* We continue and handle the result when done. */
5202 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005203 }
5204 }
5205 }
5206 break;
5207
5208 case BRACE_SIMPLE:
5209 case STAR:
5210 case PLUS:
5211 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005212 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005213
5214 /*
5215 * Lookahead to avoid useless match attempts when we know
5216 * what character comes next.
5217 */
5218 if (OP(next) == EXACTLY)
5219 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005220 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005221 if (ireg_ic)
5222 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005223 if (MB_ISUPPER(rst.nextb))
5224 rst.nextb_ic = MB_TOLOWER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005225 else
Bram Moolenaara245a5b2007-08-11 11:58:23 +00005226 rst.nextb_ic = MB_TOUPPER(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005227 }
5228 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005229 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005230 }
5231 else
5232 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005233 rst.nextb = NUL;
5234 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005235 }
5236 if (op != BRACE_SIMPLE)
5237 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005238 rst.minval = (op == STAR) ? 0 : 1;
5239 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005240 }
5241 else
5242 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005243 rst.minval = bl_minval;
5244 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005245 }
5246
5247 /*
5248 * When maxval > minval, try matching as much as possible, up
5249 * to maxval. When maxval < minval, try matching at least the
5250 * minimal number (since the range is backwards, that's also
5251 * maxval!).
5252 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005253 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005254 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005255 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005256 status = RA_FAIL;
5257 break;
5258 }
5259 if (rst.minval <= rst.maxval
5260 ? rst.count >= rst.minval : rst.count >= rst.maxval)
5261 {
5262 /* It could match. Prepare for trying to match what
5263 * follows. The code is below. Parameters are stored in
5264 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005265 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005266 {
5267 EMSG(_(e_maxmempat));
5268 status = RA_FAIL;
5269 }
5270 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005271 status = RA_FAIL;
5272 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005273 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005274 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005275 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00005276 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005277 if (rp == NULL)
5278 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005279 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005280 {
5281 *(((regstar_T *)rp) - 1) = rst;
5282 status = RA_BREAK; /* skip the restore bits */
5283 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005284 }
5285 }
5286 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005287 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005288
Bram Moolenaar071d4272004-06-13 20:20:40 +00005289 }
5290 break;
5291
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005292 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00005293 case MATCH:
5294 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005295 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005296 if (rp == NULL)
5297 status = RA_FAIL;
5298 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005299 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005300 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005301 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005302 next = OPERAND(scan);
5303 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005304 }
5305 break;
5306
5307 case BEHIND:
5308 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005309 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00005310 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005311 {
5312 EMSG(_(e_maxmempat));
5313 status = RA_FAIL;
5314 }
5315 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005316 status = RA_FAIL;
5317 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005318 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005319 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005320 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005321 if (rp == NULL)
5322 status = RA_FAIL;
5323 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00005324 {
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005325 /* Need to save the subexpr to be able to restore them
5326 * when there is a match but we don't use it. */
5327 save_subexpr(((regbehind_T *)rp) - 1);
5328
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005329 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005330 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005331 /* First try if what follows matches. If it does then we
5332 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005333 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00005334 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005335 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005336
5337 case BHPOS:
5338 if (REG_MULTI)
5339 {
5340 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5341 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005342 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005343 }
5344 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005345 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005346 break;
5347
5348 case NEWL:
Bram Moolenaar640009d2006-10-17 16:48:26 +00005349 if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
5350 || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005351 status = RA_NOMATCH;
5352 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005353 ADVANCE_REGINPUT();
5354 else
5355 reg_nextline();
5356 break;
5357
5358 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005359 status = RA_MATCH; /* Success! */
5360 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005361
5362 default:
5363 EMSG(_(e_re_corr));
5364#ifdef DEBUG
5365 printf("Illegal op code %d\n", op);
5366#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005367 status = RA_FAIL;
5368 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005369 }
5370 }
5371
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005372 /* If we can't continue sequentially, break the inner loop. */
5373 if (status != RA_CONT)
5374 break;
5375
5376 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005377 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005378
5379 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00005380
5381 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005382 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00005383 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00005384 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005385 while (regstack.ga_len > 0 && status != RA_FAIL)
5386 {
5387 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5388 switch (rp->rs_state)
5389 {
5390 case RS_NOPEN:
5391 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005392 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005393 break;
5394
5395 case RS_MOPEN:
5396 /* Pop the state. Restore pointers when there is no match. */
5397 if (status == RA_NOMATCH)
5398 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5399 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005400 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005401 break;
5402
5403#ifdef FEAT_SYN_HL
5404 case RS_ZOPEN:
5405 /* Pop the state. Restore pointers when there is no match. */
5406 if (status == RA_NOMATCH)
5407 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5408 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005409 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005410 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005411#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005412
5413 case RS_MCLOSE:
5414 /* Pop the state. Restore pointers when there is no match. */
5415 if (status == RA_NOMATCH)
5416 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5417 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005418 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005419 break;
5420
5421#ifdef FEAT_SYN_HL
5422 case RS_ZCLOSE:
5423 /* Pop the state. Restore pointers when there is no match. */
5424 if (status == RA_NOMATCH)
5425 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5426 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005427 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005428 break;
5429#endif
5430
5431 case RS_BRANCH:
5432 if (status == RA_MATCH)
5433 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005434 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005435 else
5436 {
5437 if (status != RA_BREAK)
5438 {
5439 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005440 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005441 scan = rp->rs_scan;
5442 }
5443 if (scan == NULL || OP(scan) != BRANCH)
5444 {
5445 /* no more branches, didn't find a match */
5446 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005447 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005448 }
5449 else
5450 {
5451 /* Prepare to try a branch. */
5452 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00005453 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005454 scan = OPERAND(scan);
5455 }
5456 }
5457 break;
5458
5459 case RS_BRCPLX_MORE:
5460 /* Pop the state. Restore pointers when there is no match. */
5461 if (status == RA_NOMATCH)
5462 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005463 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005464 --brace_count[rp->rs_no]; /* decrement match count */
5465 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005466 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005467 break;
5468
5469 case RS_BRCPLX_LONG:
5470 /* Pop the state. Restore pointers when there is no match. */
5471 if (status == RA_NOMATCH)
5472 {
5473 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005474 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005475 --brace_count[rp->rs_no];
5476 /* continue with the items after "\{}" */
5477 status = RA_CONT;
5478 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005479 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005480 if (status == RA_CONT)
5481 scan = regnext(scan);
5482 break;
5483
5484 case RS_BRCPLX_SHORT:
5485 /* Pop the state. Restore pointers when there is no match. */
5486 if (status == RA_NOMATCH)
5487 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005488 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005489 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005490 if (status == RA_NOMATCH)
5491 {
5492 scan = OPERAND(scan);
5493 status = RA_CONT;
5494 }
5495 break;
5496
5497 case RS_NOMATCH:
5498 /* Pop the state. If the operand matches for NOMATCH or
5499 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
5500 * except for SUBPAT, and continue with the next item. */
5501 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5502 status = RA_NOMATCH;
5503 else
5504 {
5505 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005506 if (rp->rs_no != SUBPAT) /* zero-width */
5507 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005508 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005509 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005510 if (status == RA_CONT)
5511 scan = regnext(scan);
5512 break;
5513
5514 case RS_BEHIND1:
5515 if (status == RA_NOMATCH)
5516 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005517 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005518 regstack.ga_len -= sizeof(regbehind_T);
5519 }
5520 else
5521 {
5522 /* The stuff after BEHIND/NOBEHIND matches. Now try if
5523 * the behind part does (not) match before the current
5524 * position in the input. This must be done at every
5525 * position in the input and checking if the match ends at
5526 * the current position. */
5527
5528 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005529 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005530
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005531 /* Start looking for a match with operand at the current
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00005532 * position. Go back one character until we find the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005533 * result, hitting the start of the line or the previous
5534 * line (for multi-line matching).
5535 * Set behind_pos to where the match should end, BHPOS
5536 * will match it. Save the current value. */
5537 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5538 behind_pos = rp->rs_un.regsave;
5539
5540 rp->rs_state = RS_BEHIND2;
5541
Bram Moolenaar582fd852005-03-28 20:58:01 +00005542 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005543 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005544 }
5545 break;
5546
5547 case RS_BEHIND2:
5548 /*
5549 * Looping for BEHIND / NOBEHIND match.
5550 */
5551 if (status == RA_MATCH && reg_save_equal(&behind_pos))
5552 {
5553 /* found a match that ends where "next" started */
5554 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5555 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005556 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5557 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005558 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005559 {
5560 /* But we didn't want a match. Need to restore the
5561 * subexpr, because what follows matched, so they have
5562 * been set. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005563 status = RA_NOMATCH;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005564 restore_subexpr(((regbehind_T *)rp) - 1);
5565 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005566 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005567 regstack.ga_len -= sizeof(regbehind_T);
5568 }
5569 else
5570 {
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005571 long limit;
5572
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005573 /* No match or a match that doesn't end where we want it: Go
5574 * back one character. May go to previous line once. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005575 no = OK;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005576 limit = OPERAND_MIN(rp->rs_scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005577 if (REG_MULTI)
5578 {
Bram Moolenaar61602c52013-06-01 19:54:43 +02005579 if (limit > 0
5580 && ((rp->rs_un.regsave.rs_u.pos.lnum
5581 < behind_pos.rs_u.pos.lnum
5582 ? (colnr_T)STRLEN(regline)
5583 : behind_pos.rs_u.pos.col)
5584 - rp->rs_un.regsave.rs_u.pos.col >= limit))
5585 no = FAIL;
5586 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005587 {
5588 if (rp->rs_un.regsave.rs_u.pos.lnum
5589 < behind_pos.rs_u.pos.lnum
5590 || reg_getline(
5591 --rp->rs_un.regsave.rs_u.pos.lnum)
5592 == NULL)
5593 no = FAIL;
5594 else
5595 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005596 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005597 rp->rs_un.regsave.rs_u.pos.col =
5598 (colnr_T)STRLEN(regline);
5599 }
5600 }
5601 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005602 {
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005603#ifdef FEAT_MBYTE
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005604 if (has_mbyte)
5605 rp->rs_un.regsave.rs_u.pos.col -=
5606 (*mb_head_off)(regline, regline
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005607 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005608 else
Bram Moolenaarf5e44a72013-02-26 18:46:01 +01005609#endif
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005610 --rp->rs_un.regsave.rs_u.pos.col;
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005611 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005612 }
5613 else
5614 {
5615 if (rp->rs_un.regsave.rs_u.ptr == regline)
5616 no = FAIL;
5617 else
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005618 {
5619 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
5620 if (limit > 0 && (long)(behind_pos.rs_u.ptr
5621 - rp->rs_un.regsave.rs_u.ptr) > limit)
5622 no = FAIL;
5623 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005624 }
5625 if (no == OK)
5626 {
5627 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00005628 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar75eb1612013-05-29 18:45:11 +02005629 scan = OPERAND(rp->rs_scan) + 4;
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005630 if (status == RA_MATCH)
5631 {
5632 /* We did match, so subexpr may have been changed,
5633 * need to restore them for the next try. */
5634 status = RA_NOMATCH;
5635 restore_subexpr(((regbehind_T *)rp) - 1);
5636 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005637 }
5638 else
5639 {
5640 /* Can't advance. For NOBEHIND that's a match. */
5641 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5642 if (rp->rs_no == NOBEHIND)
5643 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005644 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5645 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005646 status = RA_MATCH;
5647 }
5648 else
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00005649 {
5650 /* We do want a proper match. Need to restore the
5651 * subexpr if we had a match, because they may have
5652 * been set. */
5653 if (status == RA_MATCH)
5654 {
5655 status = RA_NOMATCH;
5656 restore_subexpr(((regbehind_T *)rp) - 1);
5657 }
5658 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005659 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005660 regstack.ga_len -= sizeof(regbehind_T);
5661 }
5662 }
5663 break;
5664
5665 case RS_STAR_LONG:
5666 case RS_STAR_SHORT:
5667 {
5668 regstar_T *rst = ((regstar_T *)rp) - 1;
5669
5670 if (status == RA_MATCH)
5671 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005672 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005673 regstack.ga_len -= sizeof(regstar_T);
5674 break;
5675 }
5676
5677 /* Tried once already, restore input pointers. */
5678 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00005679 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005680
5681 /* Repeat until we found a position where it could match. */
5682 for (;;)
5683 {
5684 if (status != RA_BREAK)
5685 {
5686 /* Tried first position already, advance. */
5687 if (rp->rs_state == RS_STAR_LONG)
5688 {
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005689 /* Trying for longest match, but couldn't or
5690 * didn't match -- back up one char. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005691 if (--rst->count < rst->minval)
5692 break;
5693 if (reginput == regline)
5694 {
5695 /* backup to last char of previous line */
5696 --reglnum;
5697 regline = reg_getline(reglnum);
5698 /* Just in case regrepeat() didn't count
5699 * right. */
5700 if (regline == NULL)
5701 break;
5702 reginput = regline + STRLEN(regline);
5703 fast_breakcheck();
5704 }
5705 else
5706 mb_ptr_back(regline, reginput);
5707 }
5708 else
5709 {
5710 /* Range is backwards, use shortest match first.
5711 * Careful: maxval and minval are exchanged!
5712 * Couldn't or didn't match: try advancing one
5713 * char. */
5714 if (rst->count == rst->minval
5715 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5716 break;
5717 ++rst->count;
5718 }
5719 if (got_int)
5720 break;
5721 }
5722 else
5723 status = RA_NOMATCH;
5724
5725 /* If it could match, try it. */
5726 if (rst->nextb == NUL || *reginput == rst->nextb
5727 || *reginput == rst->nextb_ic)
5728 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005729 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005730 scan = regnext(rp->rs_scan);
5731 status = RA_CONT;
5732 break;
5733 }
5734 }
5735 if (status != RA_CONT)
5736 {
5737 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005738 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005739 regstack.ga_len -= sizeof(regstar_T);
5740 status = RA_NOMATCH;
5741 }
5742 }
5743 break;
5744 }
5745
Bram Moolenaar32466aa2006-02-24 23:53:04 +00005746 /* If we want to continue the inner loop or didn't pop a state
5747 * continue matching loop */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005748 if (status == RA_CONT || rp == (regitem_T *)
5749 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5750 break;
5751 }
5752
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005753 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005754 if (status == RA_CONT)
5755 continue;
5756
5757 /*
5758 * If the regstack is empty or something failed we are done.
5759 */
5760 if (regstack.ga_len == 0 || status == RA_FAIL)
5761 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005762 if (scan == NULL)
5763 {
5764 /*
5765 * We get here only if there's trouble -- normally "case END" is
5766 * the terminating point.
5767 */
5768 EMSG(_(e_re_corr));
5769#ifdef DEBUG
5770 printf("Premature EOL\n");
5771#endif
5772 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005773 if (status == RA_FAIL)
5774 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005775 return (status == RA_MATCH);
5776 }
5777
5778 } /* End of loop until the regstack is empty. */
5779
5780 /* NOTREACHED */
5781}
5782
5783/*
5784 * Push an item onto the regstack.
5785 * Returns pointer to new item. Returns NULL when out of memory.
5786 */
5787 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005788regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005789 regstate_T state;
5790 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005791{
5792 regitem_T *rp;
5793
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005794 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005795 {
5796 EMSG(_(e_maxmempat));
5797 return NULL;
5798 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005799 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005800 return NULL;
5801
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005802 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005803 rp->rs_state = state;
5804 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005805
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005806 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005807 return rp;
5808}
5809
5810/*
5811 * Pop an item from the regstack.
5812 */
5813 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005814regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005815 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005816{
5817 regitem_T *rp;
5818
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005819 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005820 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005821
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005822 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005823}
5824
Bram Moolenaar071d4272004-06-13 20:20:40 +00005825/*
5826 * regrepeat - repeatedly match something simple, return how many.
5827 * Advances reginput (and reglnum) to just after the matched chars.
5828 */
5829 static int
5830regrepeat(p, maxcount)
5831 char_u *p;
5832 long maxcount; /* maximum number of matches allowed */
5833{
5834 long count = 0;
5835 char_u *scan;
5836 char_u *opnd;
5837 int mask;
5838 int testval = 0;
5839
5840 scan = reginput; /* Make local copy of reginput for speed. */
5841 opnd = OPERAND(p);
5842 switch (OP(p))
5843 {
5844 case ANY:
5845 case ANY + ADD_NL:
5846 while (count < maxcount)
5847 {
5848 /* Matching anything means we continue until end-of-line (or
5849 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5850 while (*scan != NUL && count < maxcount)
5851 {
5852 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005853 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005854 }
Bram Moolenaar640009d2006-10-17 16:48:26 +00005855 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5856 || reg_line_lbr || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005857 break;
5858 ++count; /* count the line-break */
5859 reg_nextline();
5860 scan = reginput;
5861 if (got_int)
5862 break;
5863 }
5864 break;
5865
5866 case IDENT:
5867 case IDENT + ADD_NL:
5868 testval = TRUE;
5869 /*FALLTHROUGH*/
5870 case SIDENT:
5871 case SIDENT + ADD_NL:
5872 while (count < maxcount)
5873 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005874 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005875 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005876 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005877 }
5878 else if (*scan == NUL)
5879 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005880 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5881 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005882 break;
5883 reg_nextline();
5884 scan = reginput;
5885 if (got_int)
5886 break;
5887 }
5888 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5889 ++scan;
5890 else
5891 break;
5892 ++count;
5893 }
5894 break;
5895
5896 case KWORD:
5897 case KWORD + ADD_NL:
5898 testval = TRUE;
5899 /*FALLTHROUGH*/
5900 case SKWORD:
5901 case SKWORD + ADD_NL:
5902 while (count < maxcount)
5903 {
Bram Moolenaarf813a182013-01-30 13:59:37 +01005904 if (vim_iswordp_buf(scan, reg_buf)
5905 && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005906 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005907 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005908 }
5909 else if (*scan == NUL)
5910 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005911 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5912 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005913 break;
5914 reg_nextline();
5915 scan = reginput;
5916 if (got_int)
5917 break;
5918 }
5919 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5920 ++scan;
5921 else
5922 break;
5923 ++count;
5924 }
5925 break;
5926
5927 case FNAME:
5928 case FNAME + ADD_NL:
5929 testval = TRUE;
5930 /*FALLTHROUGH*/
5931 case SFNAME:
5932 case SFNAME + ADD_NL:
5933 while (count < maxcount)
5934 {
Bram Moolenaar09ea9fc2013-05-21 00:03:02 +02005935 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00005936 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005937 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005938 }
5939 else if (*scan == NUL)
5940 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005941 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5942 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005943 break;
5944 reg_nextline();
5945 scan = reginput;
5946 if (got_int)
5947 break;
5948 }
5949 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5950 ++scan;
5951 else
5952 break;
5953 ++count;
5954 }
5955 break;
5956
5957 case PRINT:
5958 case PRINT + ADD_NL:
5959 testval = TRUE;
5960 /*FALLTHROUGH*/
5961 case SPRINT:
5962 case SPRINT + ADD_NL:
5963 while (count < maxcount)
5964 {
5965 if (*scan == NUL)
5966 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005967 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5968 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005969 break;
5970 reg_nextline();
5971 scan = reginput;
5972 if (got_int)
5973 break;
5974 }
5975 else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5976 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005977 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005978 }
5979 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5980 ++scan;
5981 else
5982 break;
5983 ++count;
5984 }
5985 break;
5986
5987 case WHITE:
5988 case WHITE + ADD_NL:
5989 testval = mask = RI_WHITE;
5990do_class:
5991 while (count < maxcount)
5992 {
5993#ifdef FEAT_MBYTE
5994 int l;
5995#endif
5996 if (*scan == NUL)
5997 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00005998 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5999 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006000 break;
6001 reg_nextline();
6002 scan = reginput;
6003 if (got_int)
6004 break;
6005 }
6006#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006007 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006008 {
6009 if (testval != 0)
6010 break;
6011 scan += l;
6012 }
6013#endif
6014 else if ((class_tab[*scan] & mask) == testval)
6015 ++scan;
6016 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6017 ++scan;
6018 else
6019 break;
6020 ++count;
6021 }
6022 break;
6023
6024 case NWHITE:
6025 case NWHITE + ADD_NL:
6026 mask = RI_WHITE;
6027 goto do_class;
6028 case DIGIT:
6029 case DIGIT + ADD_NL:
6030 testval = mask = RI_DIGIT;
6031 goto do_class;
6032 case NDIGIT:
6033 case NDIGIT + ADD_NL:
6034 mask = RI_DIGIT;
6035 goto do_class;
6036 case HEX:
6037 case HEX + ADD_NL:
6038 testval = mask = RI_HEX;
6039 goto do_class;
6040 case NHEX:
6041 case NHEX + ADD_NL:
6042 mask = RI_HEX;
6043 goto do_class;
6044 case OCTAL:
6045 case OCTAL + ADD_NL:
6046 testval = mask = RI_OCTAL;
6047 goto do_class;
6048 case NOCTAL:
6049 case NOCTAL + ADD_NL:
6050 mask = RI_OCTAL;
6051 goto do_class;
6052 case WORD:
6053 case WORD + ADD_NL:
6054 testval = mask = RI_WORD;
6055 goto do_class;
6056 case NWORD:
6057 case NWORD + ADD_NL:
6058 mask = RI_WORD;
6059 goto do_class;
6060 case HEAD:
6061 case HEAD + ADD_NL:
6062 testval = mask = RI_HEAD;
6063 goto do_class;
6064 case NHEAD:
6065 case NHEAD + ADD_NL:
6066 mask = RI_HEAD;
6067 goto do_class;
6068 case ALPHA:
6069 case ALPHA + ADD_NL:
6070 testval = mask = RI_ALPHA;
6071 goto do_class;
6072 case NALPHA:
6073 case NALPHA + ADD_NL:
6074 mask = RI_ALPHA;
6075 goto do_class;
6076 case LOWER:
6077 case LOWER + ADD_NL:
6078 testval = mask = RI_LOWER;
6079 goto do_class;
6080 case NLOWER:
6081 case NLOWER + ADD_NL:
6082 mask = RI_LOWER;
6083 goto do_class;
6084 case UPPER:
6085 case UPPER + ADD_NL:
6086 testval = mask = RI_UPPER;
6087 goto do_class;
6088 case NUPPER:
6089 case NUPPER + ADD_NL:
6090 mask = RI_UPPER;
6091 goto do_class;
6092
6093 case EXACTLY:
6094 {
6095 int cu, cl;
6096
6097 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006098 * would have been used for it. It does handle single-byte
6099 * characters, such as latin1. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006100 if (ireg_ic)
6101 {
Bram Moolenaara245a5b2007-08-11 11:58:23 +00006102 cu = MB_TOUPPER(*opnd);
6103 cl = MB_TOLOWER(*opnd);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006104 while (count < maxcount && (*scan == cu || *scan == cl))
6105 {
6106 count++;
6107 scan++;
6108 }
6109 }
6110 else
6111 {
6112 cu = *opnd;
6113 while (count < maxcount && *scan == cu)
6114 {
6115 count++;
6116 scan++;
6117 }
6118 }
6119 break;
6120 }
6121
6122#ifdef FEAT_MBYTE
6123 case MULTIBYTECODE:
6124 {
6125 int i, len, cf = 0;
6126
6127 /* Safety check (just in case 'encoding' was changed since
6128 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006129 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006130 {
6131 if (ireg_ic && enc_utf8)
6132 cf = utf_fold(utf_ptr2char(opnd));
6133 while (count < maxcount)
6134 {
6135 for (i = 0; i < len; ++i)
6136 if (opnd[i] != scan[i])
6137 break;
6138 if (i < len && (!ireg_ic || !enc_utf8
6139 || utf_fold(utf_ptr2char(scan)) != cf))
6140 break;
6141 scan += len;
6142 ++count;
6143 }
6144 }
6145 }
6146 break;
6147#endif
6148
6149 case ANYOF:
6150 case ANYOF + ADD_NL:
6151 testval = TRUE;
6152 /*FALLTHROUGH*/
6153
6154 case ANYBUT:
6155 case ANYBUT + ADD_NL:
6156 while (count < maxcount)
6157 {
6158#ifdef FEAT_MBYTE
6159 int len;
6160#endif
6161 if (*scan == NUL)
6162 {
Bram Moolenaar640009d2006-10-17 16:48:26 +00006163 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6164 || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006165 break;
6166 reg_nextline();
6167 scan = reginput;
6168 if (got_int)
6169 break;
6170 }
6171 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6172 ++scan;
6173#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006174 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006175 {
6176 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6177 break;
6178 scan += len;
6179 }
6180#endif
6181 else
6182 {
6183 if ((cstrchr(opnd, *scan) == NULL) == testval)
6184 break;
6185 ++scan;
6186 }
6187 ++count;
6188 }
6189 break;
6190
6191 case NEWL:
6192 while (count < maxcount
Bram Moolenaar640009d2006-10-17 16:48:26 +00006193 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
6194 && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006195 {
6196 count++;
6197 if (reg_line_lbr)
6198 ADVANCE_REGINPUT();
6199 else
6200 reg_nextline();
6201 scan = reginput;
6202 if (got_int)
6203 break;
6204 }
6205 break;
6206
6207 default: /* Oh dear. Called inappropriately. */
6208 EMSG(_(e_re_corr));
6209#ifdef DEBUG
6210 printf("Called regrepeat with op code %d\n", OP(p));
6211#endif
6212 break;
6213 }
6214
6215 reginput = scan;
6216
6217 return (int)count;
6218}
6219
6220/*
6221 * regnext - dig the "next" pointer out of a node
Bram Moolenaard3005802009-11-25 17:21:32 +00006222 * Returns NULL when calculating size, when there is no next item and when
6223 * there is an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +00006224 */
6225 static char_u *
6226regnext(p)
6227 char_u *p;
6228{
6229 int offset;
6230
Bram Moolenaard3005802009-11-25 17:21:32 +00006231 if (p == JUST_CALC_SIZE || reg_toolong)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006232 return NULL;
6233
6234 offset = NEXT(p);
6235 if (offset == 0)
6236 return NULL;
6237
Bram Moolenaar582fd852005-03-28 20:58:01 +00006238 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006239 return p - offset;
6240 else
6241 return p + offset;
6242}
6243
6244/*
6245 * Check the regexp program for its magic number.
6246 * Return TRUE if it's wrong.
6247 */
6248 static int
6249prog_magic_wrong()
6250{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006251 regprog_T *prog;
6252
6253 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
6254 if (prog->engine == &nfa_regengine)
6255 /* For NFA matcher we don't check the magic */
6256 return FALSE;
6257
6258 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006259 {
6260 EMSG(_(e_re_corr));
6261 return TRUE;
6262 }
6263 return FALSE;
6264}
6265
6266/*
6267 * Cleanup the subexpressions, if this wasn't done yet.
6268 * This construction is used to clear the subexpressions only when they are
6269 * used (to increase speed).
6270 */
6271 static void
6272cleanup_subexpr()
6273{
6274 if (need_clear_subexpr)
6275 {
6276 if (REG_MULTI)
6277 {
6278 /* Use 0xff to set lnum to -1 */
6279 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6280 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6281 }
6282 else
6283 {
6284 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6285 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
6286 }
6287 need_clear_subexpr = FALSE;
6288 }
6289}
6290
6291#ifdef FEAT_SYN_HL
6292 static void
6293cleanup_zsubexpr()
6294{
6295 if (need_clear_zsubexpr)
6296 {
6297 if (REG_MULTI)
6298 {
6299 /* Use 0xff to set lnum to -1 */
6300 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6301 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6302 }
6303 else
6304 {
6305 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6306 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6307 }
6308 need_clear_zsubexpr = FALSE;
6309 }
6310}
6311#endif
6312
6313/*
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006314 * Save the current subexpr to "bp", so that they can be restored
6315 * later by restore_subexpr().
6316 */
6317 static void
6318save_subexpr(bp)
6319 regbehind_T *bp;
6320{
6321 int i;
6322
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006323 /* When "need_clear_subexpr" is set we don't need to save the values, only
6324 * remember that this flag needs to be set again when restoring. */
6325 bp->save_need_clear_subexpr = need_clear_subexpr;
6326 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006327 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006328 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006329 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006330 if (REG_MULTI)
6331 {
6332 bp->save_start[i].se_u.pos = reg_startpos[i];
6333 bp->save_end[i].se_u.pos = reg_endpos[i];
6334 }
6335 else
6336 {
6337 bp->save_start[i].se_u.ptr = reg_startp[i];
6338 bp->save_end[i].se_u.ptr = reg_endp[i];
6339 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006340 }
6341 }
6342}
6343
6344/*
6345 * Restore the subexpr from "bp".
6346 */
6347 static void
6348restore_subexpr(bp)
6349 regbehind_T *bp;
6350{
6351 int i;
6352
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006353 /* Only need to restore saved values when they are not to be cleared. */
6354 need_clear_subexpr = bp->save_need_clear_subexpr;
6355 if (!need_clear_subexpr)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006356 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006357 for (i = 0; i < NSUBEXP; ++i)
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006358 {
Bram Moolenaarfde483c2008-06-15 12:21:50 +00006359 if (REG_MULTI)
6360 {
6361 reg_startpos[i] = bp->save_start[i].se_u.pos;
6362 reg_endpos[i] = bp->save_end[i].se_u.pos;
6363 }
6364 else
6365 {
6366 reg_startp[i] = bp->save_start[i].se_u.ptr;
6367 reg_endp[i] = bp->save_end[i].se_u.ptr;
6368 }
Bram Moolenaar34cbfdf2008-04-09 10:16:02 +00006369 }
6370 }
6371}
6372
6373/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00006374 * Advance reglnum, regline and reginput to the next line.
6375 */
6376 static void
6377reg_nextline()
6378{
6379 regline = reg_getline(++reglnum);
6380 reginput = regline;
6381 fast_breakcheck();
6382}
6383
6384/*
6385 * Save the input line and position in a regsave_T.
6386 */
6387 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006388reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006389 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006390 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006391{
6392 if (REG_MULTI)
6393 {
6394 save->rs_u.pos.col = (colnr_T)(reginput - regline);
6395 save->rs_u.pos.lnum = reglnum;
6396 }
6397 else
6398 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006399 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006400}
6401
6402/*
6403 * Restore the input line and position from a regsave_T.
6404 */
6405 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00006406reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006407 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006408 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006409{
6410 if (REG_MULTI)
6411 {
6412 if (reglnum != save->rs_u.pos.lnum)
6413 {
6414 /* only call reg_getline() when the line number changed to save
6415 * a bit of time */
6416 reglnum = save->rs_u.pos.lnum;
6417 regline = reg_getline(reglnum);
6418 }
6419 reginput = regline + save->rs_u.pos.col;
6420 }
6421 else
6422 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00006423 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006424}
6425
6426/*
6427 * Return TRUE if current position is equal to saved position.
6428 */
6429 static int
6430reg_save_equal(save)
6431 regsave_T *save;
6432{
6433 if (REG_MULTI)
6434 return reglnum == save->rs_u.pos.lnum
6435 && reginput == regline + save->rs_u.pos.col;
6436 return reginput == save->rs_u.ptr;
6437}
6438
6439/*
6440 * Tentatively set the sub-expression start to the current position (after
6441 * calling regmatch() they will have changed). Need to save the existing
6442 * values for when there is no match.
6443 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6444 * depending on REG_MULTI.
6445 */
6446 static void
6447save_se_multi(savep, posp)
6448 save_se_T *savep;
6449 lpos_T *posp;
6450{
6451 savep->se_u.pos = *posp;
6452 posp->lnum = reglnum;
6453 posp->col = (colnr_T)(reginput - regline);
6454}
6455
6456 static void
6457save_se_one(savep, pp)
6458 save_se_T *savep;
6459 char_u **pp;
6460{
6461 savep->se_u.ptr = *pp;
6462 *pp = reginput;
6463}
6464
6465/*
6466 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6467 */
6468 static int
6469re_num_cmp(val, scan)
6470 long_u val;
6471 char_u *scan;
6472{
6473 long_u n = OPERAND_MIN(scan);
6474
6475 if (OPERAND_CMP(scan) == '>')
6476 return val > n;
6477 if (OPERAND_CMP(scan) == '<')
6478 return val < n;
6479 return val == n;
6480}
6481
6482
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006483#ifdef BT_REGEXP_DUMP
Bram Moolenaar071d4272004-06-13 20:20:40 +00006484
6485/*
6486 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6487 */
6488 static void
6489regdump(pattern, r)
6490 char_u *pattern;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006491 bt_regprog_T *r;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006492{
6493 char_u *s;
6494 int op = EXACTLY; /* Arbitrary non-END op. */
6495 char_u *next;
6496 char_u *end = NULL;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006497 FILE *f;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006498
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006499#ifdef BT_REGEXP_LOG
6500 f = fopen("bt_regexp_log.log", "a");
6501#else
6502 f = stdout;
6503#endif
6504 if (f == NULL)
6505 return;
6506 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006507
6508 s = r->program + 1;
6509 /*
6510 * Loop until we find the END that isn't before a referred next (an END
6511 * can also appear in a NOMATCH operand).
6512 */
6513 while (op != END || s <= end)
6514 {
6515 op = OP(s);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006516 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006517 next = regnext(s);
6518 if (next == NULL) /* Next ptr. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006519 fprintf(f, "(0)");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006520 else
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006521 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006522 if (end < next)
6523 end = next;
6524 if (op == BRACE_LIMITS)
6525 {
6526 /* Two short ints */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006527 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00006528 s += 8;
6529 }
6530 s += 3;
6531 if (op == ANYOF || op == ANYOF + ADD_NL
6532 || op == ANYBUT || op == ANYBUT + ADD_NL
6533 || op == EXACTLY)
6534 {
6535 /* Literal string, where present. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006536 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006537 while (*s != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006538 fprintf(f, "%c", *s++);
6539 fprintf(f, "\nxxxxxxxxx\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006540 s++;
6541 }
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006542 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006543 }
6544
6545 /* Header fields of interest. */
6546 if (r->regstart != NUL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006547 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
Bram Moolenaar071d4272004-06-13 20:20:40 +00006548 ? (char *)transchar(r->regstart)
6549 : "multibyte", r->regstart);
6550 if (r->reganch)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006551 fprintf(f, "anchored; ");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006552 if (r->regmust != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006553 fprintf(f, "must have \"%s\"", r->regmust);
6554 fprintf(f, "\r\n");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006555
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006556#ifdef BT_REGEXP_LOG
6557 fclose(f);
6558#endif
6559}
6560#endif /* BT_REGEXP_DUMP */
6561
6562#ifdef DEBUG
Bram Moolenaar071d4272004-06-13 20:20:40 +00006563/*
6564 * regprop - printable representation of opcode
6565 */
6566 static char_u *
6567regprop(op)
6568 char_u *op;
6569{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006570 char *p;
6571 static char buf[50];
Bram Moolenaar071d4272004-06-13 20:20:40 +00006572
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006573 STRCPY(buf, ":");
Bram Moolenaar071d4272004-06-13 20:20:40 +00006574
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006575 switch ((int) OP(op))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006576 {
6577 case BOL:
6578 p = "BOL";
6579 break;
6580 case EOL:
6581 p = "EOL";
6582 break;
6583 case RE_BOF:
6584 p = "BOF";
6585 break;
6586 case RE_EOF:
6587 p = "EOF";
6588 break;
6589 case CURSOR:
6590 p = "CURSOR";
6591 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006592 case RE_VISUAL:
6593 p = "RE_VISUAL";
6594 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006595 case RE_LNUM:
6596 p = "RE_LNUM";
6597 break;
Bram Moolenaar71fe80d2006-01-22 23:25:56 +00006598 case RE_MARK:
6599 p = "RE_MARK";
6600 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006601 case RE_COL:
6602 p = "RE_COL";
6603 break;
6604 case RE_VCOL:
6605 p = "RE_VCOL";
6606 break;
6607 case BOW:
6608 p = "BOW";
6609 break;
6610 case EOW:
6611 p = "EOW";
6612 break;
6613 case ANY:
6614 p = "ANY";
6615 break;
6616 case ANY + ADD_NL:
6617 p = "ANY+NL";
6618 break;
6619 case ANYOF:
6620 p = "ANYOF";
6621 break;
6622 case ANYOF + ADD_NL:
6623 p = "ANYOF+NL";
6624 break;
6625 case ANYBUT:
6626 p = "ANYBUT";
6627 break;
6628 case ANYBUT + ADD_NL:
6629 p = "ANYBUT+NL";
6630 break;
6631 case IDENT:
6632 p = "IDENT";
6633 break;
6634 case IDENT + ADD_NL:
6635 p = "IDENT+NL";
6636 break;
6637 case SIDENT:
6638 p = "SIDENT";
6639 break;
6640 case SIDENT + ADD_NL:
6641 p = "SIDENT+NL";
6642 break;
6643 case KWORD:
6644 p = "KWORD";
6645 break;
6646 case KWORD + ADD_NL:
6647 p = "KWORD+NL";
6648 break;
6649 case SKWORD:
6650 p = "SKWORD";
6651 break;
6652 case SKWORD + ADD_NL:
6653 p = "SKWORD+NL";
6654 break;
6655 case FNAME:
6656 p = "FNAME";
6657 break;
6658 case FNAME + ADD_NL:
6659 p = "FNAME+NL";
6660 break;
6661 case SFNAME:
6662 p = "SFNAME";
6663 break;
6664 case SFNAME + ADD_NL:
6665 p = "SFNAME+NL";
6666 break;
6667 case PRINT:
6668 p = "PRINT";
6669 break;
6670 case PRINT + ADD_NL:
6671 p = "PRINT+NL";
6672 break;
6673 case SPRINT:
6674 p = "SPRINT";
6675 break;
6676 case SPRINT + ADD_NL:
6677 p = "SPRINT+NL";
6678 break;
6679 case WHITE:
6680 p = "WHITE";
6681 break;
6682 case WHITE + ADD_NL:
6683 p = "WHITE+NL";
6684 break;
6685 case NWHITE:
6686 p = "NWHITE";
6687 break;
6688 case NWHITE + ADD_NL:
6689 p = "NWHITE+NL";
6690 break;
6691 case DIGIT:
6692 p = "DIGIT";
6693 break;
6694 case DIGIT + ADD_NL:
6695 p = "DIGIT+NL";
6696 break;
6697 case NDIGIT:
6698 p = "NDIGIT";
6699 break;
6700 case NDIGIT + ADD_NL:
6701 p = "NDIGIT+NL";
6702 break;
6703 case HEX:
6704 p = "HEX";
6705 break;
6706 case HEX + ADD_NL:
6707 p = "HEX+NL";
6708 break;
6709 case NHEX:
6710 p = "NHEX";
6711 break;
6712 case NHEX + ADD_NL:
6713 p = "NHEX+NL";
6714 break;
6715 case OCTAL:
6716 p = "OCTAL";
6717 break;
6718 case OCTAL + ADD_NL:
6719 p = "OCTAL+NL";
6720 break;
6721 case NOCTAL:
6722 p = "NOCTAL";
6723 break;
6724 case NOCTAL + ADD_NL:
6725 p = "NOCTAL+NL";
6726 break;
6727 case WORD:
6728 p = "WORD";
6729 break;
6730 case WORD + ADD_NL:
6731 p = "WORD+NL";
6732 break;
6733 case NWORD:
6734 p = "NWORD";
6735 break;
6736 case NWORD + ADD_NL:
6737 p = "NWORD+NL";
6738 break;
6739 case HEAD:
6740 p = "HEAD";
6741 break;
6742 case HEAD + ADD_NL:
6743 p = "HEAD+NL";
6744 break;
6745 case NHEAD:
6746 p = "NHEAD";
6747 break;
6748 case NHEAD + ADD_NL:
6749 p = "NHEAD+NL";
6750 break;
6751 case ALPHA:
6752 p = "ALPHA";
6753 break;
6754 case ALPHA + ADD_NL:
6755 p = "ALPHA+NL";
6756 break;
6757 case NALPHA:
6758 p = "NALPHA";
6759 break;
6760 case NALPHA + ADD_NL:
6761 p = "NALPHA+NL";
6762 break;
6763 case LOWER:
6764 p = "LOWER";
6765 break;
6766 case LOWER + ADD_NL:
6767 p = "LOWER+NL";
6768 break;
6769 case NLOWER:
6770 p = "NLOWER";
6771 break;
6772 case NLOWER + ADD_NL:
6773 p = "NLOWER+NL";
6774 break;
6775 case UPPER:
6776 p = "UPPER";
6777 break;
6778 case UPPER + ADD_NL:
6779 p = "UPPER+NL";
6780 break;
6781 case NUPPER:
6782 p = "NUPPER";
6783 break;
6784 case NUPPER + ADD_NL:
6785 p = "NUPPER+NL";
6786 break;
6787 case BRANCH:
6788 p = "BRANCH";
6789 break;
6790 case EXACTLY:
6791 p = "EXACTLY";
6792 break;
6793 case NOTHING:
6794 p = "NOTHING";
6795 break;
6796 case BACK:
6797 p = "BACK";
6798 break;
6799 case END:
6800 p = "END";
6801 break;
6802 case MOPEN + 0:
6803 p = "MATCH START";
6804 break;
6805 case MOPEN + 1:
6806 case MOPEN + 2:
6807 case MOPEN + 3:
6808 case MOPEN + 4:
6809 case MOPEN + 5:
6810 case MOPEN + 6:
6811 case MOPEN + 7:
6812 case MOPEN + 8:
6813 case MOPEN + 9:
6814 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6815 p = NULL;
6816 break;
6817 case MCLOSE + 0:
6818 p = "MATCH END";
6819 break;
6820 case MCLOSE + 1:
6821 case MCLOSE + 2:
6822 case MCLOSE + 3:
6823 case MCLOSE + 4:
6824 case MCLOSE + 5:
6825 case MCLOSE + 6:
6826 case MCLOSE + 7:
6827 case MCLOSE + 8:
6828 case MCLOSE + 9:
6829 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6830 p = NULL;
6831 break;
6832 case BACKREF + 1:
6833 case BACKREF + 2:
6834 case BACKREF + 3:
6835 case BACKREF + 4:
6836 case BACKREF + 5:
6837 case BACKREF + 6:
6838 case BACKREF + 7:
6839 case BACKREF + 8:
6840 case BACKREF + 9:
6841 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6842 p = NULL;
6843 break;
6844 case NOPEN:
6845 p = "NOPEN";
6846 break;
6847 case NCLOSE:
6848 p = "NCLOSE";
6849 break;
6850#ifdef FEAT_SYN_HL
6851 case ZOPEN + 1:
6852 case ZOPEN + 2:
6853 case ZOPEN + 3:
6854 case ZOPEN + 4:
6855 case ZOPEN + 5:
6856 case ZOPEN + 6:
6857 case ZOPEN + 7:
6858 case ZOPEN + 8:
6859 case ZOPEN + 9:
6860 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6861 p = NULL;
6862 break;
6863 case ZCLOSE + 1:
6864 case ZCLOSE + 2:
6865 case ZCLOSE + 3:
6866 case ZCLOSE + 4:
6867 case ZCLOSE + 5:
6868 case ZCLOSE + 6:
6869 case ZCLOSE + 7:
6870 case ZCLOSE + 8:
6871 case ZCLOSE + 9:
6872 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6873 p = NULL;
6874 break;
6875 case ZREF + 1:
6876 case ZREF + 2:
6877 case ZREF + 3:
6878 case ZREF + 4:
6879 case ZREF + 5:
6880 case ZREF + 6:
6881 case ZREF + 7:
6882 case ZREF + 8:
6883 case ZREF + 9:
6884 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6885 p = NULL;
6886 break;
6887#endif
6888 case STAR:
6889 p = "STAR";
6890 break;
6891 case PLUS:
6892 p = "PLUS";
6893 break;
6894 case NOMATCH:
6895 p = "NOMATCH";
6896 break;
6897 case MATCH:
6898 p = "MATCH";
6899 break;
6900 case BEHIND:
6901 p = "BEHIND";
6902 break;
6903 case NOBEHIND:
6904 p = "NOBEHIND";
6905 break;
6906 case SUBPAT:
6907 p = "SUBPAT";
6908 break;
6909 case BRACE_LIMITS:
6910 p = "BRACE_LIMITS";
6911 break;
6912 case BRACE_SIMPLE:
6913 p = "BRACE_SIMPLE";
6914 break;
6915 case BRACE_COMPLEX + 0:
6916 case BRACE_COMPLEX + 1:
6917 case BRACE_COMPLEX + 2:
6918 case BRACE_COMPLEX + 3:
6919 case BRACE_COMPLEX + 4:
6920 case BRACE_COMPLEX + 5:
6921 case BRACE_COMPLEX + 6:
6922 case BRACE_COMPLEX + 7:
6923 case BRACE_COMPLEX + 8:
6924 case BRACE_COMPLEX + 9:
6925 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6926 p = NULL;
6927 break;
6928#ifdef FEAT_MBYTE
6929 case MULTIBYTECODE:
6930 p = "MULTIBYTECODE";
6931 break;
6932#endif
6933 case NEWL:
6934 p = "NEWL";
6935 break;
6936 default:
6937 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6938 p = NULL;
6939 break;
6940 }
6941 if (p != NULL)
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006942 STRCAT(buf, p);
6943 return (char_u *)buf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006944}
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02006945#endif /* DEBUG */
Bram Moolenaar071d4272004-06-13 20:20:40 +00006946
6947#ifdef FEAT_MBYTE
6948static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6949
6950typedef struct
6951{
6952 int a, b, c;
6953} decomp_T;
6954
6955
6956/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006957static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006958{
6959 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6960 {0x5d0,0,0}, /* 0xfb21 alt alef */
6961 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6962 {0x5d4,0,0}, /* 0xfb23 alt he */
6963 {0x5db,0,0}, /* 0xfb24 alt kaf */
6964 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6965 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6966 {0x5e8,0,0}, /* 0xfb27 alt resh */
6967 {0x5ea,0,0}, /* 0xfb28 alt tav */
6968 {'+', 0, 0}, /* 0xfb29 alt plus */
6969 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6970 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6971 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6972 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6973 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6974 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6975 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6976 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6977 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6978 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6979 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6980 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6981 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
6982 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
6983 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
6984 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
6985 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
6986 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
6987 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
6988 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
6989 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
6990 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
6991 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
6992 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
6993 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
6994 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
6995 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
6996 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
6997 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
6998 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
6999 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
7000 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
7001 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
7002 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
7003 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
7004 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
7005 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
7006 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
7007};
7008
7009 static void
7010mb_decompose(c, c1, c2, c3)
7011 int c, *c1, *c2, *c3;
7012{
7013 decomp_T d;
7014
Bram Moolenaar2eec59e2013-05-21 21:37:20 +02007015 if (c >= 0xfb20 && c <= 0xfb4f)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007016 {
7017 d = decomp_table[c - 0xfb20];
7018 *c1 = d.a;
7019 *c2 = d.b;
7020 *c3 = d.c;
7021 }
7022 else
7023 {
7024 *c1 = c;
7025 *c2 = *c3 = 0;
7026 }
7027}
7028#endif
7029
7030/*
7031 * Compare two strings, ignore case if ireg_ic set.
7032 * Return 0 if strings match, non-zero otherwise.
7033 * Correct the length "*n" when composing characters are ignored.
7034 */
7035 static int
7036cstrncmp(s1, s2, n)
7037 char_u *s1, *s2;
7038 int *n;
7039{
7040 int result;
7041
7042 if (!ireg_ic)
7043 result = STRNCMP(s1, s2, *n);
7044 else
7045 result = MB_STRNICMP(s1, s2, *n);
7046
7047#ifdef FEAT_MBYTE
7048 /* if it failed and it's utf8 and we want to combineignore: */
7049 if (result != 0 && enc_utf8 && ireg_icombine)
7050 {
7051 char_u *str1, *str2;
7052 int c1, c2, c11, c12;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007053 int junk;
7054
7055 /* we have to handle the strcmp ourselves, since it is necessary to
7056 * deal with the composing characters by ignoring them: */
7057 str1 = s1;
7058 str2 = s2;
7059 c1 = c2 = 0;
Bram Moolenaarcafda4f2005-09-06 19:25:11 +00007060 while ((int)(str1 - s1) < *n)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007061 {
7062 c1 = mb_ptr2char_adv(&str1);
7063 c2 = mb_ptr2char_adv(&str2);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007064
7065 /* decompose the character if necessary, into 'base' characters
7066 * because I don't care about Arabic, I will hard-code the Hebrew
7067 * which I *do* care about! So sue me... */
7068 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
7069 {
7070 /* decomposition necessary? */
7071 mb_decompose(c1, &c11, &junk, &junk);
7072 mb_decompose(c2, &c12, &junk, &junk);
7073 c1 = c11;
7074 c2 = c12;
7075 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
7076 break;
7077 }
7078 }
7079 result = c2 - c1;
7080 if (result == 0)
7081 *n = (int)(str2 - s2);
7082 }
7083#endif
7084
7085 return result;
7086}
7087
7088/*
7089 * cstrchr: This function is used a lot for simple searches, keep it fast!
7090 */
7091 static char_u *
7092cstrchr(s, c)
7093 char_u *s;
7094 int c;
7095{
7096 char_u *p;
7097 int cc;
7098
7099 if (!ireg_ic
7100#ifdef FEAT_MBYTE
7101 || (!enc_utf8 && mb_char2len(c) > 1)
7102#endif
7103 )
7104 return vim_strchr(s, c);
7105
7106 /* tolower() and toupper() can be slow, comparing twice should be a lot
7107 * faster (esp. when using MS Visual C++!).
7108 * For UTF-8 need to use folded case. */
7109#ifdef FEAT_MBYTE
7110 if (enc_utf8 && c > 0x80)
7111 cc = utf_fold(c);
7112 else
7113#endif
Bram Moolenaara245a5b2007-08-11 11:58:23 +00007114 if (MB_ISUPPER(c))
7115 cc = MB_TOLOWER(c);
7116 else if (MB_ISLOWER(c))
7117 cc = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007118 else
7119 return vim_strchr(s, c);
7120
7121#ifdef FEAT_MBYTE
7122 if (has_mbyte)
7123 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007124 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007125 {
7126 if (enc_utf8 && c > 0x80)
7127 {
7128 if (utf_fold(utf_ptr2char(p)) == cc)
7129 return p;
7130 }
7131 else if (*p == c || *p == cc)
7132 return p;
7133 }
7134 }
7135 else
7136#endif
7137 /* Faster version for when there are no multi-byte characters. */
7138 for (p = s; *p != NUL; ++p)
7139 if (*p == c || *p == cc)
7140 return p;
7141
7142 return NULL;
7143}
7144
7145/***************************************************************
7146 * regsub stuff *
7147 ***************************************************************/
7148
7149/* This stuff below really confuses cc on an SGI -- webb */
7150#ifdef __sgi
7151# undef __ARGS
7152# define __ARGS(x) ()
7153#endif
7154
7155/*
7156 * We should define ftpr as a pointer to a function returning a pointer to
7157 * a function returning a pointer to a function ...
7158 * This is impossible, so we declare a pointer to a function returning a
7159 * pointer to a function returning void. This should work for all compilers.
7160 */
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007161typedef void (*(*fptr_T) __ARGS((int *, int)))();
Bram Moolenaar071d4272004-06-13 20:20:40 +00007162
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007163static fptr_T do_upper __ARGS((int *, int));
7164static fptr_T do_Upper __ARGS((int *, int));
7165static fptr_T do_lower __ARGS((int *, int));
7166static fptr_T do_Lower __ARGS((int *, int));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007167
7168static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
7169
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007170 static fptr_T
Bram Moolenaar071d4272004-06-13 20:20:40 +00007171do_upper(d, c)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007172 int *d;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007173 int c;
7174{
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007175 *d = MB_TOUPPER(c);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007176
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007177 return (fptr_T)NULL;
7178}
7179
7180 static fptr_T
7181do_Upper(d, c)
7182 int *d;
7183 int c;
7184{
7185 *d = MB_TOUPPER(c);
7186
7187 return (fptr_T)do_Upper;
7188}
7189
7190 static fptr_T
7191do_lower(d, c)
7192 int *d;
7193 int c;
7194{
7195 *d = MB_TOLOWER(c);
7196
7197 return (fptr_T)NULL;
7198}
7199
7200 static fptr_T
7201do_Lower(d, c)
7202 int *d;
7203 int c;
7204{
7205 *d = MB_TOLOWER(c);
7206
7207 return (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007208}
7209
7210/*
7211 * regtilde(): Replace tildes in the pattern by the old pattern.
7212 *
7213 * Short explanation of the tilde: It stands for the previous replacement
7214 * pattern. If that previous pattern also contains a ~ we should go back a
7215 * step further... But we insert the previous pattern into the current one
7216 * and remember that.
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007217 * This still does not handle the case where "magic" changes. So require the
7218 * user to keep his hands off of "magic".
Bram Moolenaar071d4272004-06-13 20:20:40 +00007219 *
7220 * The tildes are parsed once before the first call to vim_regsub().
7221 */
7222 char_u *
7223regtilde(source, magic)
7224 char_u *source;
7225 int magic;
7226{
7227 char_u *newsub = source;
7228 char_u *tmpsub;
7229 char_u *p;
7230 int len;
7231 int prevlen;
7232
7233 for (p = newsub; *p; ++p)
7234 {
7235 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7236 {
7237 if (reg_prev_sub != NULL)
7238 {
7239 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
7240 prevlen = (int)STRLEN(reg_prev_sub);
7241 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7242 if (tmpsub != NULL)
7243 {
7244 /* copy prefix */
7245 len = (int)(p - newsub); /* not including ~ */
7246 mch_memmove(tmpsub, newsub, (size_t)len);
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007247 /* interpret tilde */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007248 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7249 /* copy postfix */
7250 if (!magic)
7251 ++p; /* back off \ */
7252 STRCPY(tmpsub + len + prevlen, p + 1);
7253
7254 if (newsub != source) /* already allocated newsub */
7255 vim_free(newsub);
7256 newsub = tmpsub;
7257 p = newsub + len + prevlen;
7258 }
7259 }
7260 else if (magic)
Bram Moolenaar446cb832008-06-24 21:56:24 +00007261 STRMOVE(p, p + 1); /* remove '~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007262 else
Bram Moolenaar446cb832008-06-24 21:56:24 +00007263 STRMOVE(p, p + 2); /* remove '\~' */
Bram Moolenaar071d4272004-06-13 20:20:40 +00007264 --p;
7265 }
7266 else
7267 {
7268 if (*p == '\\' && p[1]) /* skip escaped characters */
7269 ++p;
7270#ifdef FEAT_MBYTE
7271 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007272 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007273#endif
7274 }
7275 }
7276
7277 vim_free(reg_prev_sub);
7278 if (newsub != source) /* newsub was allocated, just keep it */
7279 reg_prev_sub = newsub;
7280 else /* no ~ found, need to save newsub */
7281 reg_prev_sub = vim_strsave(newsub);
7282 return newsub;
7283}
7284
7285#ifdef FEAT_EVAL
7286static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
7287
7288/* These pointers are used instead of reg_match and reg_mmatch for
7289 * reg_submatch(). Needed for when the substitution string is an expression
7290 * that contains a call to substitute() and submatch(). */
7291static regmatch_T *submatch_match;
7292static regmmatch_T *submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007293static linenr_T submatch_firstlnum;
7294static linenr_T submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007295static int submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007296#endif
7297
7298#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
7299/*
7300 * vim_regsub() - perform substitutions after a vim_regexec() or
7301 * vim_regexec_multi() match.
7302 *
7303 * If "copy" is TRUE really copy into "dest".
7304 * If "copy" is FALSE nothing is copied, this is just to find out the length
7305 * of the result.
7306 *
7307 * If "backslash" is TRUE, a backslash will be removed later, need to double
7308 * them to keep them, and insert a backslash before a CR to avoid it being
7309 * replaced with a line break later.
7310 *
7311 * Note: The matched text must not change between the call of
7312 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
7313 * references invalid!
7314 *
7315 * Returns the size of the replacement, including terminating NUL.
7316 */
7317 int
7318vim_regsub(rmp, source, dest, copy, magic, backslash)
7319 regmatch_T *rmp;
7320 char_u *source;
7321 char_u *dest;
7322 int copy;
7323 int magic;
7324 int backslash;
7325{
7326 reg_match = rmp;
7327 reg_mmatch = NULL;
7328 reg_maxline = 0;
Bram Moolenaar2f315ab2013-01-25 20:11:01 +01007329 reg_buf = curbuf;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007330 return vim_regsub_both(source, dest, copy, magic, backslash);
7331}
7332#endif
7333
7334 int
7335vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
7336 regmmatch_T *rmp;
7337 linenr_T lnum;
7338 char_u *source;
7339 char_u *dest;
7340 int copy;
7341 int magic;
7342 int backslash;
7343{
7344 reg_match = NULL;
7345 reg_mmatch = rmp;
7346 reg_buf = curbuf; /* always works on the current buffer! */
7347 reg_firstlnum = lnum;
7348 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7349 return vim_regsub_both(source, dest, copy, magic, backslash);
7350}
7351
7352 static int
7353vim_regsub_both(source, dest, copy, magic, backslash)
7354 char_u *source;
7355 char_u *dest;
7356 int copy;
7357 int magic;
7358 int backslash;
7359{
7360 char_u *src;
7361 char_u *dst;
7362 char_u *s;
7363 int c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007364 int cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007365 int no = -1;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007366 fptr_T func_all = (fptr_T)NULL;
7367 fptr_T func_one = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007368 linenr_T clnum = 0; /* init for GCC */
7369 int len = 0; /* init for GCC */
7370#ifdef FEAT_EVAL
7371 static char_u *eval_result = NULL;
7372#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +00007373
7374 /* Be paranoid... */
7375 if (source == NULL || dest == NULL)
7376 {
7377 EMSG(_(e_null));
7378 return 0;
7379 }
7380 if (prog_magic_wrong())
7381 return 0;
7382 src = source;
7383 dst = dest;
7384
7385 /*
7386 * When the substitute part starts with "\=" evaluate it as an expression.
7387 */
7388 if (source[0] == '\\' && source[1] == '='
7389#ifdef FEAT_EVAL
7390 && !can_f_submatch /* can't do this recursively */
7391#endif
7392 )
7393 {
7394#ifdef FEAT_EVAL
7395 /* To make sure that the length doesn't change between checking the
7396 * length and copying the string, and to speed up things, the
7397 * resulting string is saved from the call with "copy" == FALSE to the
7398 * call with "copy" == TRUE. */
7399 if (copy)
7400 {
7401 if (eval_result != NULL)
7402 {
7403 STRCPY(dest, eval_result);
7404 dst += STRLEN(eval_result);
7405 vim_free(eval_result);
7406 eval_result = NULL;
7407 }
7408 }
7409 else
7410 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007411 win_T *save_reg_win;
7412 int save_ireg_ic;
7413
7414 vim_free(eval_result);
7415
7416 /* The expression may contain substitute(), which calls us
7417 * recursively. Make sure submatch() gets the text from the first
7418 * level. Don't need to save "reg_buf", because
7419 * vim_regexec_multi() can't be called recursively. */
7420 submatch_match = reg_match;
7421 submatch_mmatch = reg_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007422 submatch_firstlnum = reg_firstlnum;
7423 submatch_maxline = reg_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007424 submatch_line_lbr = reg_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007425 save_reg_win = reg_win;
7426 save_ireg_ic = ireg_ic;
7427 can_f_submatch = TRUE;
7428
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007429 eval_result = eval_to_string(source + 2, NULL, TRUE);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007430 if (eval_result != NULL)
7431 {
Bram Moolenaar06975a42010-03-23 16:27:22 +01007432 int had_backslash = FALSE;
7433
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00007434 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00007435 {
Bram Moolenaar978287b2011-06-19 04:32:15 +02007436 /* Change NL to CR, so that it becomes a line break,
7437 * unless called from vim_regexec_nl().
Bram Moolenaar071d4272004-06-13 20:20:40 +00007438 * Skip over a backslashed character. */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007439 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007440 *s = CAR;
7441 else if (*s == '\\' && s[1] != NUL)
Bram Moolenaar06975a42010-03-23 16:27:22 +01007442 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00007443 ++s;
Bram Moolenaar60190782010-05-21 13:08:58 +02007444 /* Change NL to CR here too, so that this works:
7445 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
7446 * abc\
7447 * def
Bram Moolenaar978287b2011-06-19 04:32:15 +02007448 * Not when called from vim_regexec_nl().
Bram Moolenaar60190782010-05-21 13:08:58 +02007449 */
Bram Moolenaar978287b2011-06-19 04:32:15 +02007450 if (*s == NL && !submatch_line_lbr)
Bram Moolenaar60190782010-05-21 13:08:58 +02007451 *s = CAR;
Bram Moolenaar06975a42010-03-23 16:27:22 +01007452 had_backslash = TRUE;
7453 }
7454 }
7455 if (had_backslash && backslash)
7456 {
7457 /* Backslashes will be consumed, need to double them. */
7458 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7459 if (s != NULL)
7460 {
7461 vim_free(eval_result);
7462 eval_result = s;
7463 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007464 }
7465
7466 dst += STRLEN(eval_result);
7467 }
7468
7469 reg_match = submatch_match;
7470 reg_mmatch = submatch_mmatch;
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007471 reg_firstlnum = submatch_firstlnum;
7472 reg_maxline = submatch_maxline;
Bram Moolenaar978287b2011-06-19 04:32:15 +02007473 reg_line_lbr = submatch_line_lbr;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007474 reg_win = save_reg_win;
7475 ireg_ic = save_ireg_ic;
7476 can_f_submatch = FALSE;
7477 }
7478#endif
7479 }
7480 else
7481 while ((c = *src++) != NUL)
7482 {
7483 if (c == '&' && magic)
7484 no = 0;
7485 else if (c == '\\' && *src != NUL)
7486 {
7487 if (*src == '&' && !magic)
7488 {
7489 ++src;
7490 no = 0;
7491 }
7492 else if ('0' <= *src && *src <= '9')
7493 {
7494 no = *src++ - '0';
7495 }
7496 else if (vim_strchr((char_u *)"uUlLeE", *src))
7497 {
7498 switch (*src++)
7499 {
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007500 case 'u': func_one = (fptr_T)do_upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007501 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007502 case 'U': func_all = (fptr_T)do_Upper;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007503 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007504 case 'l': func_one = (fptr_T)do_lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007505 continue;
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007506 case 'L': func_all = (fptr_T)do_Lower;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007507 continue;
7508 case 'e':
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007509 case 'E': func_one = func_all = (fptr_T)NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007510 continue;
7511 }
7512 }
7513 }
7514 if (no < 0) /* Ordinary character. */
7515 {
Bram Moolenaardb552d602006-03-23 22:59:57 +00007516 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7517 {
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007518 /* Copy a special key as-is. */
Bram Moolenaardb552d602006-03-23 22:59:57 +00007519 if (copy)
7520 {
7521 *dst++ = c;
7522 *dst++ = *src++;
7523 *dst++ = *src++;
7524 }
7525 else
7526 {
7527 dst += 3;
7528 src += 2;
7529 }
7530 continue;
7531 }
7532
Bram Moolenaar071d4272004-06-13 20:20:40 +00007533 if (c == '\\' && *src != NUL)
7534 {
7535 /* Check for abbreviations -- webb */
7536 switch (*src)
7537 {
7538 case 'r': c = CAR; ++src; break;
7539 case 'n': c = NL; ++src; break;
7540 case 't': c = TAB; ++src; break;
7541 /* Oh no! \e already has meaning in subst pat :-( */
7542 /* case 'e': c = ESC; ++src; break; */
7543 case 'b': c = Ctrl_H; ++src; break;
7544
7545 /* If "backslash" is TRUE the backslash will be removed
7546 * later. Used to insert a literal CR. */
7547 default: if (backslash)
7548 {
7549 if (copy)
7550 *dst = '\\';
7551 ++dst;
7552 }
7553 c = *src++;
7554 }
7555 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007556#ifdef FEAT_MBYTE
Bram Moolenaardb552d602006-03-23 22:59:57 +00007557 else if (has_mbyte)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007558 c = mb_ptr2char(src - 1);
7559#endif
7560
Bram Moolenaardb552d602006-03-23 22:59:57 +00007561 /* Write to buffer, if copy is set. */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007562 if (func_one != (fptr_T)NULL)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007563 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007564 func_one = (fptr_T)(func_one(&cc, c));
7565 else if (func_all != (fptr_T)NULL)
7566 /* Turbo C complains without the typecast */
7567 func_all = (fptr_T)(func_all(&cc, c));
7568 else /* just copy */
7569 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007570
7571#ifdef FEAT_MBYTE
7572 if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007573 {
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007574 int totlen = mb_ptr2len(src - 1);
7575
Bram Moolenaar071d4272004-06-13 20:20:40 +00007576 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007577 mb_char2bytes(cc, dst);
7578 dst += mb_char2len(cc) - 1;
Bram Moolenaar0c56c602010-07-12 22:42:33 +02007579 if (enc_utf8)
7580 {
7581 int clen = utf_ptr2len(src - 1);
7582
7583 /* If the character length is shorter than "totlen", there
7584 * are composing characters; copy them as-is. */
7585 if (clen < totlen)
7586 {
7587 if (copy)
7588 mch_memmove(dst + 1, src - 1 + clen,
7589 (size_t)(totlen - clen));
7590 dst += totlen - clen;
7591 }
7592 }
7593 src += totlen - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007594 }
7595 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007596#endif
7597 if (copy)
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007598 *dst = cc;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007599 dst++;
7600 }
7601 else
7602 {
7603 if (REG_MULTI)
7604 {
7605 clnum = reg_mmatch->startpos[no].lnum;
7606 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7607 s = NULL;
7608 else
7609 {
7610 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7611 if (reg_mmatch->endpos[no].lnum == clnum)
7612 len = reg_mmatch->endpos[no].col
7613 - reg_mmatch->startpos[no].col;
7614 else
7615 len = (int)STRLEN(s);
7616 }
7617 }
7618 else
7619 {
7620 s = reg_match->startp[no];
7621 if (reg_match->endp[no] == NULL)
7622 s = NULL;
7623 else
7624 len = (int)(reg_match->endp[no] - s);
7625 }
7626 if (s != NULL)
7627 {
7628 for (;;)
7629 {
7630 if (len == 0)
7631 {
7632 if (REG_MULTI)
7633 {
7634 if (reg_mmatch->endpos[no].lnum == clnum)
7635 break;
7636 if (copy)
7637 *dst = CAR;
7638 ++dst;
7639 s = reg_getline(++clnum);
7640 if (reg_mmatch->endpos[no].lnum == clnum)
7641 len = reg_mmatch->endpos[no].col;
7642 else
7643 len = (int)STRLEN(s);
7644 }
7645 else
7646 break;
7647 }
7648 else if (*s == NUL) /* we hit NUL. */
7649 {
7650 if (copy)
7651 EMSG(_(e_re_damg));
7652 goto exit;
7653 }
7654 else
7655 {
7656 if (backslash && (*s == CAR || *s == '\\'))
7657 {
7658 /*
7659 * Insert a backslash in front of a CR, otherwise
7660 * it will be replaced by a line break.
7661 * Number of backslashes will be halved later,
7662 * double them here.
7663 */
7664 if (copy)
7665 {
7666 dst[0] = '\\';
7667 dst[1] = *s;
7668 }
7669 dst += 2;
7670 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00007671 else
7672 {
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007673#ifdef FEAT_MBYTE
7674 if (has_mbyte)
7675 c = mb_ptr2char(s);
7676 else
7677#endif
7678 c = *s;
7679
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007680 if (func_one != (fptr_T)NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007681 /* Turbo C complains without the typecast */
Bram Moolenaarc2c355d2013-03-19 17:42:15 +01007682 func_one = (fptr_T)(func_one(&cc, c));
7683 else if (func_all != (fptr_T)NULL)
7684 /* Turbo C complains without the typecast */
7685 func_all = (fptr_T)(func_all(&cc, c));
7686 else /* just copy */
7687 cc = c;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007688
7689#ifdef FEAT_MBYTE
7690 if (has_mbyte)
7691 {
Bram Moolenaar9225efb2007-07-30 20:32:53 +00007692 int l;
7693
7694 /* Copy composing characters separately, one
7695 * at a time. */
7696 if (enc_utf8)
7697 l = utf_ptr2len(s) - 1;
7698 else
7699 l = mb_ptr2len(s) - 1;
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007700
7701 s += l;
7702 len -= l;
7703 if (copy)
7704 mb_char2bytes(cc, dst);
7705 dst += mb_char2len(cc) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007706 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007707 else
7708#endif
7709 if (copy)
7710 *dst = cc;
7711 dst++;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007712 }
Bram Moolenaarefd2bf12006-03-16 21:41:35 +00007713
Bram Moolenaar071d4272004-06-13 20:20:40 +00007714 ++s;
7715 --len;
7716 }
7717 }
7718 }
7719 no = -1;
7720 }
7721 }
7722 if (copy)
7723 *dst = NUL;
7724
7725exit:
7726 return (int)((dst - dest) + 1);
7727}
7728
7729#ifdef FEAT_EVAL
Bram Moolenaard32a3192009-11-26 19:40:49 +00007730static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7731
Bram Moolenaar071d4272004-06-13 20:20:40 +00007732/*
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007733 * Call reg_getline() with the line numbers from the submatch. If a
7734 * substitute() was used the reg_maxline and other values have been
7735 * overwritten.
7736 */
7737 static char_u *
7738reg_getline_submatch(lnum)
7739 linenr_T lnum;
7740{
7741 char_u *s;
7742 linenr_T save_first = reg_firstlnum;
7743 linenr_T save_max = reg_maxline;
7744
7745 reg_firstlnum = submatch_firstlnum;
7746 reg_maxline = submatch_maxline;
7747
7748 s = reg_getline(lnum);
7749
7750 reg_firstlnum = save_first;
7751 reg_maxline = save_max;
7752 return s;
7753}
7754
7755/*
Bram Moolenaar7aa9f6a2007-05-10 18:00:30 +00007756 * Used for the submatch() function: get the string from the n'th submatch in
Bram Moolenaar071d4272004-06-13 20:20:40 +00007757 * allocated memory.
7758 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7759 */
7760 char_u *
7761reg_submatch(no)
7762 int no;
7763{
7764 char_u *retval = NULL;
7765 char_u *s;
7766 int len;
7767 int round;
7768 linenr_T lnum;
7769
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007770 if (!can_f_submatch || no < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007771 return NULL;
7772
7773 if (submatch_match == NULL)
7774 {
7775 /*
7776 * First round: compute the length and allocate memory.
7777 * Second round: copy the text.
7778 */
7779 for (round = 1; round <= 2; ++round)
7780 {
7781 lnum = submatch_mmatch->startpos[no].lnum;
7782 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7783 return NULL;
7784
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007785 s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
Bram Moolenaar071d4272004-06-13 20:20:40 +00007786 if (s == NULL) /* anti-crash check, cannot happen? */
7787 break;
7788 if (submatch_mmatch->endpos[no].lnum == lnum)
7789 {
7790 /* Within one line: take form start to end col. */
7791 len = submatch_mmatch->endpos[no].col
7792 - submatch_mmatch->startpos[no].col;
7793 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00007794 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007795 ++len;
7796 }
7797 else
7798 {
7799 /* Multiple lines: take start line from start col, middle
7800 * lines completely and end line up to end col. */
7801 len = (int)STRLEN(s);
7802 if (round == 2)
7803 {
7804 STRCPY(retval, s);
7805 retval[len] = '\n';
7806 }
7807 ++len;
7808 ++lnum;
7809 while (lnum < submatch_mmatch->endpos[no].lnum)
7810 {
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007811 s = reg_getline_submatch(lnum++);
Bram Moolenaar071d4272004-06-13 20:20:40 +00007812 if (round == 2)
7813 STRCPY(retval + len, s);
7814 len += (int)STRLEN(s);
7815 if (round == 2)
7816 retval[len] = '\n';
7817 ++len;
7818 }
7819 if (round == 2)
Bram Moolenaar5ea08a82009-11-25 18:51:24 +00007820 STRNCPY(retval + len, reg_getline_submatch(lnum),
Bram Moolenaar071d4272004-06-13 20:20:40 +00007821 submatch_mmatch->endpos[no].col);
7822 len += submatch_mmatch->endpos[no].col;
7823 if (round == 2)
7824 retval[len] = NUL;
7825 ++len;
7826 }
7827
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007828 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007829 {
7830 retval = lalloc((long_u)len, TRUE);
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007831 if (retval == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007832 return NULL;
7833 }
7834 }
7835 }
7836 else
7837 {
Bram Moolenaar7670fa02009-02-21 21:04:20 +00007838 s = submatch_match->startp[no];
7839 if (s == NULL || submatch_match->endp[no] == NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00007840 retval = NULL;
7841 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00007842 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
Bram Moolenaar071d4272004-06-13 20:20:40 +00007843 }
7844
7845 return retval;
7846}
7847#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007848
7849static regengine_T bt_regengine =
7850{
7851 bt_regcomp,
7852 bt_regexec,
7853#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7854 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7855 bt_regexec_nl,
7856#endif
7857 bt_regexec_multi
7858#ifdef DEBUG
7859 ,(char_u *)""
7860#endif
7861};
7862
7863
7864#include "regexp_nfa.c"
7865
7866static regengine_T nfa_regengine =
7867{
7868 nfa_regcomp,
7869 nfa_regexec,
7870#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
7871 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
7872 nfa_regexec_nl,
7873#endif
7874 nfa_regexec_multi
7875#ifdef DEBUG
7876 ,(char_u *)""
7877#endif
7878};
7879
7880/* Which regexp engine to use? Needed for vim_regcomp().
7881 * Must match with 'regexpengine'. */
7882static int regexp_engine = 0;
7883#define AUTOMATIC_ENGINE 0
7884#define BACKTRACKING_ENGINE 1
7885#define NFA_ENGINE 2
7886#ifdef DEBUG
7887static char_u regname[][30] = {
7888 "AUTOMATIC Regexp Engine",
Bram Moolenaar75eb1612013-05-29 18:45:11 +02007889 "BACKTRACKING Regexp Engine",
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007890 "NFA Regexp Engine"
7891 };
7892#endif
7893
7894/*
7895 * Compile a regular expression into internal code.
7896 * Returns the program in allocated memory. Returns NULL for an error.
7897 */
7898 regprog_T *
7899vim_regcomp(expr_arg, re_flags)
7900 char_u *expr_arg;
7901 int re_flags;
7902{
7903 regprog_T *prog = NULL;
7904 char_u *expr = expr_arg;
7905
7906 syntax_error = FALSE;
7907 regexp_engine = p_re;
7908
7909 /* Check for prefix "\%#=", that sets the regexp engine */
7910 if (STRNCMP(expr, "\\%#=", 4) == 0)
7911 {
7912 int newengine = expr[4] - '0';
7913
7914 if (newengine == AUTOMATIC_ENGINE
7915 || newengine == BACKTRACKING_ENGINE
7916 || newengine == NFA_ENGINE)
7917 {
7918 regexp_engine = expr[4] - '0';
7919 expr += 5;
7920#ifdef DEBUG
7921 EMSG3("New regexp mode selected (%d): %s", regexp_engine,
7922 regname[newengine]);
7923#endif
7924 }
7925 else
7926 {
7927 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
7928 regexp_engine = AUTOMATIC_ENGINE;
7929 }
7930 }
7931#ifdef DEBUG
7932 bt_regengine.expr = expr;
7933 nfa_regengine.expr = expr;
7934#endif
7935
7936 /*
7937 * First try the NFA engine, unless backtracking was requested.
7938 */
7939 if (regexp_engine != BACKTRACKING_ENGINE)
7940 prog = nfa_regengine.regcomp(expr, re_flags);
7941 else
7942 prog = bt_regengine.regcomp(expr, re_flags);
7943
7944 if (prog == NULL) /* error compiling regexp with initial engine */
7945 {
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007946#ifdef BT_REGEXP_DEBUG_LOG
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007947 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
7948 {
7949 FILE *f;
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007950 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007951 if (f)
7952 {
7953 if (!syntax_error)
7954 fprintf(f, "NFA engine could not handle \"%s\"\n", expr);
7955 else
7956 fprintf(f, "Syntax error in \"%s\"\n", expr);
7957 fclose(f);
7958 }
7959 else
Bram Moolenaar7fcff1f2013-05-20 21:49:13 +02007960 EMSG2("(NFA) Could not open \"%s\" to write !!!",
7961 BT_REGEXP_DEBUG_LOG_NAME);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +02007962 /*
7963 if (syntax_error)
7964 EMSG("NFA Regexp: Syntax Error !");
7965 */
7966 }
7967#endif
7968 /*
7969 * If NFA engine failed, then revert to the backtracking engine.
7970 * Except when there was a syntax error, which was properly handled by
7971 * NFA engine.
7972 */
7973 if (regexp_engine == AUTOMATIC_ENGINE)
7974 if (!syntax_error)
7975 prog = bt_regengine.regcomp(expr, re_flags);
7976
7977 } /* endif prog==NULL */
7978
7979
7980 return prog;
7981}
7982
7983/*
7984 * Match a regexp against a string.
7985 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
7986 * Uses curbuf for line count and 'iskeyword'.
7987 *
7988 * Return TRUE if there is a match, FALSE if not.
7989 */
7990 int
7991vim_regexec(rmp, line, col)
7992 regmatch_T *rmp;
7993 char_u *line; /* string to match against */
7994 colnr_T col; /* column to start looking for match */
7995{
7996 return rmp->regprog->engine->regexec(rmp, line, col);
7997}
7998
7999#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8000 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8001/*
8002 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
8003 */
8004 int
8005vim_regexec_nl(rmp, line, col)
8006 regmatch_T *rmp;
8007 char_u *line;
8008 colnr_T col;
8009{
8010 return rmp->regprog->engine->regexec_nl(rmp, line, col);
8011}
8012#endif
8013
8014/*
8015 * Match a regexp against multiple lines.
8016 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
8017 * Uses curbuf for line count and 'iskeyword'.
8018 *
8019 * Return zero if there is no match. Return number of lines contained in the
8020 * match otherwise.
8021 */
8022 long
8023vim_regexec_multi(rmp, win, buf, lnum, col, tm)
8024 regmmatch_T *rmp;
8025 win_T *win; /* window in which to search or NULL */
8026 buf_T *buf; /* buffer in which to search */
8027 linenr_T lnum; /* nr of line to start looking for match */
8028 colnr_T col; /* column to start looking for match */
8029 proftime_T *tm; /* timeout limit or NULL */
8030{
8031 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
8032}