blob: 7ff271b4394074f54bdc857edf0dcc98d0224228 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
41#include "vim.h"
42
43#undef DEBUG
44
45/*
46 * The "internal use only" fields in regexp.h are present to pass info from
47 * compile to execute that permits the execute phase to run lots faster on
48 * simple cases. They are:
49 *
50 * regstart char that must begin a match; NUL if none obvious; Can be a
51 * multi-byte character.
52 * reganch is the match anchored (at beginning-of-line only)?
53 * regmust string (pointer into program) that match must include, or NULL
54 * regmlen length of regmust string
55 * regflags RF_ values or'ed together
56 *
57 * Regstart and reganch permit very fast decisions on suitable starting points
58 * for a match, cutting down the work a lot. Regmust permits fast rejection
59 * of lines that cannot possibly match. The regmust tests are costly enough
60 * that vim_regcomp() supplies a regmust only if the r.e. contains something
61 * potentially expensive (at present, the only such thing detected is * or +
62 * at the start of the r.e., which can involve a lot of backup). Regmlen is
63 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
64 * computing it anyway.
65 */
66
67/*
68 * Structure for regexp "program". This is essentially a linear encoding
69 * of a nondeterministic finite-state machine (aka syntax charts or
70 * "railroad normal form" in parsing technology). Each node is an opcode
71 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
72 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
73 * pointer with a BRANCH on both ends of it is connecting two alternatives.
74 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
75 * (as opposed to a collection of them) is never concatenated with anything
76 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000077 * node points to the node after the stuff to be repeated.
78 * The operand of some types of node is a literal string; for others, it is a
79 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
80 * is the first node of the branch.
81 * (NB this is *not* a tree structure: the tail of the branch connects to the
82 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000083 *
84 * pattern is coded like:
85 *
86 * +-----------------+
87 * | V
88 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
89 * | ^ | ^
90 * +------+ +----------+
91 *
92 *
93 * +------------------+
94 * V |
95 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
96 * | | ^ ^
97 * | +---------------+ |
98 * +---------------------------------------------+
99 *
100 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000101 * +----------------------+
102 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000103 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000104 * | | ^ ^
105 * | +-----------+ |
106 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000107 *
108 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 * +-------------------------+
110 * V |
111 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
112 * | | ^
113 * | +----------------+
114 * +-----------------------------------------------+
115 *
116 *
117 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
118 * | | ^ ^
119 * | +----------------+ |
120 * +--------------------------------+
121 *
122 * +---------+
123 * | V
124 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
125 * | | | | ^ ^
126 * | | | +-----+ |
127 * | | +----------------+ |
128 * | +---------------------------+ |
129 * +------------------------------------------------------+
130 *
131 * They all start with a BRANCH for "\|" alternaties, even when there is only
132 * one alternative.
133 */
134
135/*
136 * The opcodes are:
137 */
138
139/* definition number opnd? meaning */
140#define END 0 /* End of program or NOMATCH operand. */
141#define BOL 1 /* Match "" at beginning of line. */
142#define EOL 2 /* Match "" at end of line. */
143#define BRANCH 3 /* node Match this alternative, or the
144 * next... */
145#define BACK 4 /* Match "", "next" ptr points backward. */
146#define EXACTLY 5 /* str Match this string. */
147#define NOTHING 6 /* Match empty string. */
148#define STAR 7 /* node Match this (simple) thing 0 or more
149 * times. */
150#define PLUS 8 /* node Match this (simple) thing 1 or more
151 * times. */
152#define MATCH 9 /* node match the operand zero-width */
153#define NOMATCH 10 /* node check for no match with operand */
154#define BEHIND 11 /* node look behind for a match with operand */
155#define NOBEHIND 12 /* node look behind for no match with operand */
156#define SUBPAT 13 /* node match the operand here */
157#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
158 * n times (\{m,n\}). */
159#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
160#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
161#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
162 * and BRACE_COMPLEX. */
163#define NEWL 18 /* Match line-break */
164#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
165
166
167/* character classes: 20-48 normal, 50-78 include a line-break */
168#define ADD_NL 30
169#define FIRST_NL ANY + ADD_NL
170#define ANY 20 /* Match any one character. */
171#define ANYOF 21 /* str Match any character in this string. */
172#define ANYBUT 22 /* str Match any character not in this
173 * string. */
174#define IDENT 23 /* Match identifier char */
175#define SIDENT 24 /* Match identifier char but no digit */
176#define KWORD 25 /* Match keyword char */
177#define SKWORD 26 /* Match word char but no digit */
178#define FNAME 27 /* Match file name char */
179#define SFNAME 28 /* Match file name char but no digit */
180#define PRINT 29 /* Match printable char */
181#define SPRINT 30 /* Match printable char but no digit */
182#define WHITE 31 /* Match whitespace char */
183#define NWHITE 32 /* Match non-whitespace char */
184#define DIGIT 33 /* Match digit char */
185#define NDIGIT 34 /* Match non-digit char */
186#define HEX 35 /* Match hex char */
187#define NHEX 36 /* Match non-hex char */
188#define OCTAL 37 /* Match octal char */
189#define NOCTAL 38 /* Match non-octal char */
190#define WORD 39 /* Match word char */
191#define NWORD 40 /* Match non-word char */
192#define HEAD 41 /* Match head char */
193#define NHEAD 42 /* Match non-head char */
194#define ALPHA 43 /* Match alpha char */
195#define NALPHA 44 /* Match non-alpha char */
196#define LOWER 45 /* Match lowercase char */
197#define NLOWER 46 /* Match non-lowercase char */
198#define UPPER 47 /* Match uppercase char */
199#define NUPPER 48 /* Match non-uppercase char */
200#define LAST_NL NUPPER + ADD_NL
201#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
202
203#define MOPEN 80 /* -89 Mark this point in input as start of
204 * \( subexpr. MOPEN + 0 marks start of
205 * match. */
206#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
207 * end of match. */
208#define BACKREF 100 /* -109 node Match same string again \1-\9 */
209
210#ifdef FEAT_SYN_HL
211# define ZOPEN 110 /* -119 Mark this point in input as start of
212 * \z( subexpr. */
213# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
214# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
215#endif
216
217#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
218
219#define NOPEN 150 /* Mark this point in input as start of
220 \%( subexpr. */
221#define NCLOSE 151 /* Analogous to NOPEN. */
222
223#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
224#define RE_BOF 201 /* Match "" at beginning of file. */
225#define RE_EOF 202 /* Match "" at end of file. */
226#define CURSOR 203 /* Match location of cursor. */
227
228#define RE_LNUM 204 /* nr cmp Match line number */
229#define RE_COL 205 /* nr cmp Match column number */
230#define RE_VCOL 206 /* nr cmp Match virtual column number */
231
232/*
233 * Magic characters have a special meaning, they don't match literally.
234 * Magic characters are negative. This separates them from literal characters
235 * (possibly multi-byte). Only ASCII characters can be Magic.
236 */
237#define Magic(x) ((int)(x) - 256)
238#define un_Magic(x) ((x) + 256)
239#define is_Magic(x) ((x) < 0)
240
241static int no_Magic __ARGS((int x));
242static int toggle_Magic __ARGS((int x));
243
244 static int
245no_Magic(x)
246 int x;
247{
248 if (is_Magic(x))
249 return un_Magic(x);
250 return x;
251}
252
253 static int
254toggle_Magic(x)
255 int x;
256{
257 if (is_Magic(x))
258 return un_Magic(x);
259 return Magic(x);
260}
261
262/*
263 * The first byte of the regexp internal "program" is actually this magic
264 * number; the start node begins in the second byte. It's used to catch the
265 * most severe mutilation of the program by the caller.
266 */
267
268#define REGMAGIC 0234
269
270/*
271 * Opcode notes:
272 *
273 * BRANCH The set of branches constituting a single choice are hooked
274 * together with their "next" pointers, since precedence prevents
275 * anything being concatenated to any individual branch. The
276 * "next" pointer of the last BRANCH in a choice points to the
277 * thing following the whole choice. This is also where the
278 * final "next" pointer of each individual branch points; each
279 * branch starts with the operand node of a BRANCH node.
280 *
281 * BACK Normal "next" pointers all implicitly point forward; BACK
282 * exists to make loop structures possible.
283 *
284 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
285 * BRANCH structures using BACK. Simple cases (one character
286 * per match) are implemented with STAR and PLUS for speed
287 * and to minimize recursive plunges.
288 *
289 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
290 * node, and defines the min and max limits to be used for that
291 * node.
292 *
293 * MOPEN,MCLOSE ...are numbered at compile time.
294 * ZOPEN,ZCLOSE ...ditto
295 */
296
297/*
298 * A node is one char of opcode followed by two chars of "next" pointer.
299 * "Next" pointers are stored as two 8-bit bytes, high order first. The
300 * value is a positive offset from the opcode of the node containing it.
301 * An operand, if any, simply follows the node. (Note that much of the
302 * code generation knows about this implicit relationship.)
303 *
304 * Using two bytes for the "next" pointer is vast overkill for most things,
305 * but allows patterns to get big without disasters.
306 */
307#define OP(p) ((int)*(p))
308#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
309#define OPERAND(p) ((p) + 3)
310/* Obtain an operand that was stored as four bytes, MSB first. */
311#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
312 + ((long)(p)[5] << 8) + (long)(p)[6])
313/* Obtain a second operand stored as four bytes. */
314#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
315/* Obtain a second single-byte operand stored after a four bytes operand. */
316#define OPERAND_CMP(p) (p)[7]
317
318/*
319 * Utility definitions.
320 */
321#define UCHARAT(p) ((int)*(char_u *)(p))
322
323/* Used for an error (down from) vim_regcomp(): give the error message, set
324 * rc_did_emsg and return NULL */
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000325#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, NULL)
326#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, NULL)
327#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
329
330#define MAX_LIMIT (32767L << 16L)
331
332static int re_multi_type __ARGS((int));
333static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
334static char_u *cstrchr __ARGS((char_u *, int));
335
336#ifdef DEBUG
337static void regdump __ARGS((char_u *, regprog_T *));
338static char_u *regprop __ARGS((char_u *));
339#endif
340
341#define NOT_MULTI 0
342#define MULTI_ONE 1
343#define MULTI_MULT 2
344/*
345 * Return NOT_MULTI if c is not a "multi" operator.
346 * Return MULTI_ONE if c is a single "multi" operator.
347 * Return MULTI_MULT if c is a multi "multi" operator.
348 */
349 static int
350re_multi_type(c)
351 int c;
352{
353 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
354 return MULTI_ONE;
355 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
356 return MULTI_MULT;
357 return NOT_MULTI;
358}
359
360/*
361 * Flags to be passed up and down.
362 */
363#define HASWIDTH 0x1 /* Known never to match null string. */
364#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
365#define SPSTART 0x4 /* Starts with * or +. */
366#define HASNL 0x8 /* Contains some \n. */
367#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
368#define WORST 0 /* Worst case. */
369
370/*
371 * When regcode is set to this value, code is not emitted and size is computed
372 * instead.
373 */
374#define JUST_CALC_SIZE ((char_u *) -1)
375
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000376static char_u *reg_prev_sub = NULL;
377
378#if defined(EXITFREE) || defined(PROTO)
379 void
380free_regexp_stuff()
381{
382 vim_free(reg_prev_sub);
383}
384#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000385
386/*
387 * REGEXP_INRANGE contains all characters which are always special in a []
388 * range after '\'.
389 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
390 * These are:
391 * \n - New line (NL).
392 * \r - Carriage Return (CR).
393 * \t - Tab (TAB).
394 * \e - Escape (ESC).
395 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000396 * \d - Character code in decimal, eg \d123
397 * \o - Character code in octal, eg \o80
398 * \x - Character code in hex, eg \x4a
399 * \u - Multibyte character code, eg \u20ac
400 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000401 */
402static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000403static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
405static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000406static int get_char_class __ARGS((char_u **pp));
407static int get_equi_class __ARGS((char_u **pp));
408static void reg_equi_class __ARGS((int c));
409static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410static char_u *skip_anyof __ARGS((char_u *p));
411static void init_class_tab __ARGS((void));
412
413/*
414 * Translate '\x' to its control character, except "\n", which is Magic.
415 */
416 static int
417backslash_trans(c)
418 int c;
419{
420 switch (c)
421 {
422 case 'r': return CAR;
423 case 't': return TAB;
424 case 'e': return ESC;
425 case 'b': return BS;
426 }
427 return c;
428}
429
430/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000431 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000432 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
433 * recognized. Otherwise "pp" is advanced to after the item.
434 */
435 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000436get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000437 char_u **pp;
438{
439 static const char *(class_names[]) =
440 {
441 "alnum:]",
442#define CLASS_ALNUM 0
443 "alpha:]",
444#define CLASS_ALPHA 1
445 "blank:]",
446#define CLASS_BLANK 2
447 "cntrl:]",
448#define CLASS_CNTRL 3
449 "digit:]",
450#define CLASS_DIGIT 4
451 "graph:]",
452#define CLASS_GRAPH 5
453 "lower:]",
454#define CLASS_LOWER 6
455 "print:]",
456#define CLASS_PRINT 7
457 "punct:]",
458#define CLASS_PUNCT 8
459 "space:]",
460#define CLASS_SPACE 9
461 "upper:]",
462#define CLASS_UPPER 10
463 "xdigit:]",
464#define CLASS_XDIGIT 11
465 "tab:]",
466#define CLASS_TAB 12
467 "return:]",
468#define CLASS_RETURN 13
469 "backspace:]",
470#define CLASS_BACKSPACE 14
471 "escape:]",
472#define CLASS_ESCAPE 15
473 };
474#define CLASS_NONE 99
475 int i;
476
477 if ((*pp)[1] == ':')
478 {
479 for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
480 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
481 {
482 *pp += STRLEN(class_names[i]) + 2;
483 return i;
484 }
485 }
486 return CLASS_NONE;
487}
488
489/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490 * Specific version of character class functions.
491 * Using a table to keep this fast.
492 */
493static short class_tab[256];
494
495#define RI_DIGIT 0x01
496#define RI_HEX 0x02
497#define RI_OCTAL 0x04
498#define RI_WORD 0x08
499#define RI_HEAD 0x10
500#define RI_ALPHA 0x20
501#define RI_LOWER 0x40
502#define RI_UPPER 0x80
503#define RI_WHITE 0x100
504
505 static void
506init_class_tab()
507{
508 int i;
509 static int done = FALSE;
510
511 if (done)
512 return;
513
514 for (i = 0; i < 256; ++i)
515 {
516 if (i >= '0' && i <= '7')
517 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
518 else if (i >= '8' && i <= '9')
519 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
520 else if (i >= 'a' && i <= 'f')
521 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
522#ifdef EBCDIC
523 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
524 || (i >= 's' && i <= 'z'))
525#else
526 else if (i >= 'g' && i <= 'z')
527#endif
528 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
529 else if (i >= 'A' && i <= 'F')
530 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
531#ifdef EBCDIC
532 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
533 || (i >= 'S' && i <= 'Z'))
534#else
535 else if (i >= 'G' && i <= 'Z')
536#endif
537 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
538 else if (i == '_')
539 class_tab[i] = RI_WORD + RI_HEAD;
540 else
541 class_tab[i] = 0;
542 }
543 class_tab[' '] |= RI_WHITE;
544 class_tab['\t'] |= RI_WHITE;
545 done = TRUE;
546}
547
548#ifdef FEAT_MBYTE
549# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
550# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
551# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
552# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
553# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
554# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
555# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
556# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
557# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
558#else
559# define ri_digit(c) (class_tab[c] & RI_DIGIT)
560# define ri_hex(c) (class_tab[c] & RI_HEX)
561# define ri_octal(c) (class_tab[c] & RI_OCTAL)
562# define ri_word(c) (class_tab[c] & RI_WORD)
563# define ri_head(c) (class_tab[c] & RI_HEAD)
564# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
565# define ri_lower(c) (class_tab[c] & RI_LOWER)
566# define ri_upper(c) (class_tab[c] & RI_UPPER)
567# define ri_white(c) (class_tab[c] & RI_WHITE)
568#endif
569
570/* flags for regflags */
571#define RF_ICASE 1 /* ignore case */
572#define RF_NOICASE 2 /* don't ignore case */
573#define RF_HASNL 4 /* can match a NL */
574#define RF_ICOMBINE 8 /* ignore combining characters */
575#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
576
577/*
578 * Global work variables for vim_regcomp().
579 */
580
581static char_u *regparse; /* Input-scan pointer. */
582static int prevchr_len; /* byte length of previous char */
583static int num_complex_braces; /* Complex \{...} count */
584static int regnpar; /* () count. */
585#ifdef FEAT_SYN_HL
586static int regnzpar; /* \z() count. */
587static int re_has_z; /* \z item detected */
588#endif
589static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
590static long regsize; /* Code size. */
591static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
592static unsigned regflags; /* RF_ flags for prog */
593static long brace_min[10]; /* Minimums for complex brace repeats */
594static long brace_max[10]; /* Maximums for complex brace repeats */
595static int brace_count[10]; /* Current counts for complex brace repeats */
596#if defined(FEAT_SYN_HL) || defined(PROTO)
597static int had_eol; /* TRUE when EOL found by vim_regcomp() */
598#endif
599static int one_exactly = FALSE; /* only do one char for EXACTLY */
600
601static int reg_magic; /* magicness of the pattern: */
602#define MAGIC_NONE 1 /* "\V" very unmagic */
603#define MAGIC_OFF 2 /* "\M" or 'magic' off */
604#define MAGIC_ON 3 /* "\m" or 'magic' */
605#define MAGIC_ALL 4 /* "\v" very magic */
606
607static int reg_string; /* matching with a string instead of a buffer
608 line */
609
610/*
611 * META contains all characters that may be magic, except '^' and '$'.
612 */
613
614#ifdef EBCDIC
615static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
616#else
617/* META[] is used often enough to justify turning it into a table. */
618static char_u META_flags[] = {
619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
621/* % & ( ) * + . */
622 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
623/* 1 2 3 4 5 6 7 8 9 < = > ? */
624 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
625/* @ A C D F H I K L M O */
626 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
627/* P S U V W X Z [ _ */
628 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
629/* a c d f h i k l m n o */
630 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
631/* p s u v w x z { | ~ */
632 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
633};
634#endif
635
636static int curchr;
637
638/* arguments for reg() */
639#define REG_NOPAREN 0 /* toplevel reg() */
640#define REG_PAREN 1 /* \(\) */
641#define REG_ZPAREN 2 /* \z(\) */
642#define REG_NPAREN 3 /* \%(\) */
643
644/*
645 * Forward declarations for vim_regcomp()'s friends.
646 */
647static void initchr __ARGS((char_u *));
648static int getchr __ARGS((void));
649static void skipchr_keepstart __ARGS((void));
650static int peekchr __ARGS((void));
651static void skipchr __ARGS((void));
652static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000653static int gethexchrs __ARGS((int maxinputlen));
654static int getoctchrs __ARGS((void));
655static int getdecchrs __ARGS((void));
656static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000657static void regcomp_start __ARGS((char_u *expr, int flags));
658static char_u *reg __ARGS((int, int *));
659static char_u *regbranch __ARGS((int *flagp));
660static char_u *regconcat __ARGS((int *flagp));
661static char_u *regpiece __ARGS((int *));
662static char_u *regatom __ARGS((int *));
663static char_u *regnode __ARGS((int));
664static int prog_magic_wrong __ARGS((void));
665static char_u *regnext __ARGS((char_u *));
666static void regc __ARGS((int b));
667#ifdef FEAT_MBYTE
668static void regmbc __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000669#else
670# define regmbc(c) regc(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000671#endif
672static void reginsert __ARGS((int, char_u *));
673static void reginsert_limits __ARGS((int, long, long, char_u *));
674static char_u *re_put_long __ARGS((char_u *pr, long_u val));
675static int read_limits __ARGS((long *, long *));
676static void regtail __ARGS((char_u *, char_u *));
677static void regoptail __ARGS((char_u *, char_u *));
678
679/*
680 * Return TRUE if compiled regular expression "prog" can match a line break.
681 */
682 int
683re_multiline(prog)
684 regprog_T *prog;
685{
686 return (prog->regflags & RF_HASNL);
687}
688
689/*
690 * Return TRUE if compiled regular expression "prog" looks before the start
691 * position (pattern contains "\@<=" or "\@<!").
692 */
693 int
694re_lookbehind(prog)
695 regprog_T *prog;
696{
697 return (prog->regflags & RF_LOOKBH);
698}
699
700/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000701 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
702 * Returns a character representing the class. Zero means that no item was
703 * recognized. Otherwise "pp" is advanced to after the item.
704 */
705 static int
706get_equi_class(pp)
707 char_u **pp;
708{
709 int c;
710 int l = 1;
711 char_u *p = *pp;
712
713 if (p[1] == '=')
714 {
715#ifdef FEAT_MBYTE
716 if (has_mbyte)
717 l = mb_ptr2len_check(p + 2);
718#endif
719 if (p[l + 2] == '=' && p[l + 3] == ']')
720 {
721#ifdef FEAT_MBYTE
722 if (has_mbyte)
723 c = mb_ptr2char(p + 2);
724 else
725#endif
726 c = p[2];
727 *pp += l + 4;
728 return c;
729 }
730 }
731 return 0;
732}
733
734/*
735 * Produce the bytes for equivalence class "c".
736 * Currently only handles latin1, latin9 and utf-8.
737 */
738 static void
739reg_equi_class(c)
740 int c;
741{
742#ifdef FEAT_MBYTE
743 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
744 || STRCMP(p_enc, "latin9") == 0)
745#endif
746 {
747 switch (c)
748 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000749 case 'A': case '\300': case '\301': case '\302':
750 case '\303': case '\304': case '\305':
751 regmbc('A'); regmbc('\300'); regmbc('\301');
752 regmbc('\302'); regmbc('\303'); regmbc('\304');
753 regmbc('\305');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000754 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000755 case 'C': case '\307':
756 regmbc('C'); regmbc('\307');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000757 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000758 case 'E': case '\310': case '\311': case '\312': case '\313':
759 regmbc('E'); regmbc('\310'); regmbc('\311');
760 regmbc('\312'); regmbc('\313');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000761 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000762 case 'I': case '\314': case '\315': case '\316': case '\317':
763 regmbc('I'); regmbc('\314'); regmbc('\315');
764 regmbc('\316'); regmbc('\317');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000765 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000766 case 'N': case '\321':
767 regmbc('N'); regmbc('\321');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000768 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000769 case 'O': case '\322': case '\323': case '\324': case '\325':
770 case '\326':
771 regmbc('O'); regmbc('\322'); regmbc('\323');
772 regmbc('\324'); regmbc('\325'); regmbc('\326');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000773 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000774 case 'U': case '\331': case '\332': case '\333': case '\334':
775 regmbc('U'); regmbc('\331'); regmbc('\332');
776 regmbc('\333'); regmbc('\334');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000777 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000778 case 'Y': case '\335':
779 regmbc('Y'); regmbc('\335');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000780 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000781 case 'a': case '\340': case '\341': case '\342':
782 case '\343': case '\344': case '\345':
783 regmbc('a'); regmbc('\340'); regmbc('\341');
784 regmbc('\342'); regmbc('\343'); regmbc('\344');
785 regmbc('\345');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000786 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000787 case 'c': case '\347':
788 regmbc('c'); regmbc('\347');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000789 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000790 case 'e': case '\350': case '\351': case '\352': case '\353':
791 regmbc('e'); regmbc('\350'); regmbc('\351');
792 regmbc('\352'); regmbc('\353');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000793 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000794 case 'i': case '\354': case '\355': case '\356': case '\357':
795 regmbc('i'); regmbc('\354'); regmbc('\355');
796 regmbc('\356'); regmbc('\357');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000797 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000798 case 'n': case '\361':
799 regmbc('n'); regmbc('\361');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000800 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000801 case 'o': case '\362': case '\363': case '\364': case '\365':
802 case '\366':
803 regmbc('o'); regmbc('\362'); regmbc('\363');
804 regmbc('\364'); regmbc('\365'); regmbc('\366');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000805 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000806 case 'u': case '\371': case '\372': case '\373': case '\374':
807 regmbc('u'); regmbc('\371'); regmbc('\372');
808 regmbc('\373'); regmbc('\374');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000809 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000810 case 'y': case '\375': case '\377':
811 regmbc('y'); regmbc('\375'); regmbc('\377');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000812 return;
813 }
814 }
815 regmbc(c);
816}
817
818/*
819 * Check for a collating element "[.a.]". "pp" points to the '['.
820 * Returns a character. Zero means that no item was recognized. Otherwise
821 * "pp" is advanced to after the item.
822 * Currently only single characters are recognized!
823 */
824 static int
825get_coll_element(pp)
826 char_u **pp;
827{
828 int c;
829 int l = 1;
830 char_u *p = *pp;
831
832 if (p[1] == '.')
833 {
834#ifdef FEAT_MBYTE
835 if (has_mbyte)
836 l = mb_ptr2len_check(p + 2);
837#endif
838 if (p[l + 2] == '.' && p[l + 3] == ']')
839 {
840#ifdef FEAT_MBYTE
841 if (has_mbyte)
842 c = mb_ptr2char(p + 2);
843 else
844#endif
845 c = p[2];
846 *pp += l + 4;
847 return c;
848 }
849 }
850 return 0;
851}
852
853
854/*
855 * Skip over a "[]" range.
856 * "p" must point to the character after the '['.
857 * The returned pointer is on the matching ']', or the terminating NUL.
858 */
859 static char_u *
860skip_anyof(p)
861 char_u *p;
862{
863 int cpo_lit; /* 'cpoptions' contains 'l' flag */
864 int cpo_bsl; /* 'cpoptions' contains '\' flag */
865#ifdef FEAT_MBYTE
866 int l;
867#endif
868
Bram Moolenaar3b56eb32005-07-11 22:40:32 +0000869 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
870 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000871
872 if (*p == '^') /* Complement of range. */
873 ++p;
874 if (*p == ']' || *p == '-')
875 ++p;
876 while (*p != NUL && *p != ']')
877 {
878#ifdef FEAT_MBYTE
879 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
880 p += l;
881 else
882#endif
883 if (*p == '-')
884 {
885 ++p;
886 if (*p != ']' && *p != NUL)
887 mb_ptr_adv(p);
888 }
889 else if (*p == '\\'
890 && !cpo_bsl
891 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
892 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
893 p += 2;
894 else if (*p == '[')
895 {
896 if (get_char_class(&p) == CLASS_NONE
897 && get_equi_class(&p) == 0
898 && get_coll_element(&p) == 0)
899 ++p; /* It was not a class name */
900 }
901 else
902 ++p;
903 }
904
905 return p;
906}
907
908/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000909 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +0000910 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000911 * Take care of characters with a backslash in front of it.
912 * Skip strings inside [ and ].
913 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
914 * expression and change "\?" to "?". If "*newp" is not NULL the expression
915 * is changed in-place.
916 */
917 char_u *
918skip_regexp(startp, dirc, magic, newp)
919 char_u *startp;
920 int dirc;
921 int magic;
922 char_u **newp;
923{
924 int mymagic;
925 char_u *p = startp;
926
927 if (magic)
928 mymagic = MAGIC_ON;
929 else
930 mymagic = MAGIC_OFF;
931
Bram Moolenaar1cd871b2004-12-19 22:46:22 +0000932 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000933 {
934 if (p[0] == dirc) /* found end of regexp */
935 break;
936 if ((p[0] == '[' && mymagic >= MAGIC_ON)
937 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
938 {
939 p = skip_anyof(p + 1);
940 if (p[0] == NUL)
941 break;
942 }
943 else if (p[0] == '\\' && p[1] != NUL)
944 {
945 if (dirc == '?' && newp != NULL && p[1] == '?')
946 {
947 /* change "\?" to "?", make a copy first. */
948 if (*newp == NULL)
949 {
950 *newp = vim_strsave(startp);
951 if (*newp != NULL)
952 p = *newp + (p - startp);
953 }
954 if (*newp != NULL)
955 mch_memmove(p, p + 1, STRLEN(p));
956 else
957 ++p;
958 }
959 else
960 ++p; /* skip next character */
961 if (*p == 'v')
962 mymagic = MAGIC_ALL;
963 else if (*p == 'V')
964 mymagic = MAGIC_NONE;
965 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000966 }
967 return p;
968}
969
970/*
Bram Moolenaar86b68352004-12-27 21:59:20 +0000971 * vim_regcomp() - compile a regular expression into internal code
972 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000973 *
974 * We can't allocate space until we know how big the compiled form will be,
975 * but we can't compile it (and thus know how big it is) until we've got a
976 * place to put the code. So we cheat: we compile it twice, once with code
977 * generation turned off and size counting turned on, and once "for real".
978 * This also means that we don't allocate space until we are sure that the
979 * thing really will compile successfully, and we never have to move the
980 * code and thus invalidate pointers into it. (Note that it has to be in
981 * one piece because vim_free() must be able to free it all.)
982 *
983 * Whether upper/lower case is to be ignored is decided when executing the
984 * program, it does not matter here.
985 *
986 * Beware that the optimization-preparation code in here knows about some
987 * of the structure of the compiled regexp.
988 * "re_flags": RE_MAGIC and/or RE_STRING.
989 */
990 regprog_T *
991vim_regcomp(expr, re_flags)
992 char_u *expr;
993 int re_flags;
994{
995 regprog_T *r;
996 char_u *scan;
997 char_u *longest;
998 int len;
999 int flags;
1000
1001 if (expr == NULL)
1002 EMSG_RET_NULL(_(e_null));
1003
1004 init_class_tab();
1005
1006 /*
1007 * First pass: determine size, legality.
1008 */
1009 regcomp_start(expr, re_flags);
1010 regcode = JUST_CALC_SIZE;
1011 regc(REGMAGIC);
1012 if (reg(REG_NOPAREN, &flags) == NULL)
1013 return NULL;
1014
1015 /* Small enough for pointer-storage convention? */
1016#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1017 if (regsize >= 65536L - 256L)
1018 EMSG_RET_NULL(_("E339: Pattern too long"));
1019#endif
1020
1021 /* Allocate space. */
1022 r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1023 if (r == NULL)
1024 return NULL;
1025
1026 /*
1027 * Second pass: emit code.
1028 */
1029 regcomp_start(expr, re_flags);
1030 regcode = r->program;
1031 regc(REGMAGIC);
1032 if (reg(REG_NOPAREN, &flags) == NULL)
1033 {
1034 vim_free(r);
1035 return NULL;
1036 }
1037
1038 /* Dig out information for optimizations. */
1039 r->regstart = NUL; /* Worst-case defaults. */
1040 r->reganch = 0;
1041 r->regmust = NULL;
1042 r->regmlen = 0;
1043 r->regflags = regflags;
1044 if (flags & HASNL)
1045 r->regflags |= RF_HASNL;
1046 if (flags & HASLOOKBH)
1047 r->regflags |= RF_LOOKBH;
1048#ifdef FEAT_SYN_HL
1049 /* Remember whether this pattern has any \z specials in it. */
1050 r->reghasz = re_has_z;
1051#endif
1052 scan = r->program + 1; /* First BRANCH. */
1053 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1054 {
1055 scan = OPERAND(scan);
1056
1057 /* Starting-point info. */
1058 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1059 {
1060 r->reganch++;
1061 scan = regnext(scan);
1062 }
1063
1064 if (OP(scan) == EXACTLY)
1065 {
1066#ifdef FEAT_MBYTE
1067 if (has_mbyte)
1068 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1069 else
1070#endif
1071 r->regstart = *OPERAND(scan);
1072 }
1073 else if ((OP(scan) == BOW
1074 || OP(scan) == EOW
1075 || OP(scan) == NOTHING
1076 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1077 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1078 && OP(regnext(scan)) == EXACTLY)
1079 {
1080#ifdef FEAT_MBYTE
1081 if (has_mbyte)
1082 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1083 else
1084#endif
1085 r->regstart = *OPERAND(regnext(scan));
1086 }
1087
1088 /*
1089 * If there's something expensive in the r.e., find the longest
1090 * literal string that must appear and make it the regmust. Resolve
1091 * ties in favor of later strings, since the regstart check works
1092 * with the beginning of the r.e. and avoiding duplication
1093 * strengthens checking. Not a strong reason, but sufficient in the
1094 * absence of others.
1095 */
1096 /*
1097 * When the r.e. starts with BOW, it is faster to look for a regmust
1098 * first. Used a lot for "#" and "*" commands. (Added by mool).
1099 */
1100 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1101 && !(flags & HASNL))
1102 {
1103 longest = NULL;
1104 len = 0;
1105 for (; scan != NULL; scan = regnext(scan))
1106 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1107 {
1108 longest = OPERAND(scan);
1109 len = (int)STRLEN(OPERAND(scan));
1110 }
1111 r->regmust = longest;
1112 r->regmlen = len;
1113 }
1114 }
1115#ifdef DEBUG
1116 regdump(expr, r);
1117#endif
1118 return r;
1119}
1120
1121/*
1122 * Setup to parse the regexp. Used once to get the length and once to do it.
1123 */
1124 static void
1125regcomp_start(expr, re_flags)
1126 char_u *expr;
1127 int re_flags; /* see vim_regcomp() */
1128{
1129 initchr(expr);
1130 if (re_flags & RE_MAGIC)
1131 reg_magic = MAGIC_ON;
1132 else
1133 reg_magic = MAGIC_OFF;
1134 reg_string = (re_flags & RE_STRING);
1135
1136 num_complex_braces = 0;
1137 regnpar = 1;
1138 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1139#ifdef FEAT_SYN_HL
1140 regnzpar = 1;
1141 re_has_z = 0;
1142#endif
1143 regsize = 0L;
1144 regflags = 0;
1145#if defined(FEAT_SYN_HL) || defined(PROTO)
1146 had_eol = FALSE;
1147#endif
1148}
1149
1150#if defined(FEAT_SYN_HL) || defined(PROTO)
1151/*
1152 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1153 * found. This is messy, but it works fine.
1154 */
1155 int
1156vim_regcomp_had_eol()
1157{
1158 return had_eol;
1159}
1160#endif
1161
1162/*
1163 * reg - regular expression, i.e. main body or parenthesized thing
1164 *
1165 * Caller must absorb opening parenthesis.
1166 *
1167 * Combining parenthesis handling with the base level of regular expression
1168 * is a trifle forced, but the need to tie the tails of the branches to what
1169 * follows makes it hard to avoid.
1170 */
1171 static char_u *
1172reg(paren, flagp)
1173 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1174 int *flagp;
1175{
1176 char_u *ret;
1177 char_u *br;
1178 char_u *ender;
1179 int parno = 0;
1180 int flags;
1181
1182 *flagp = HASWIDTH; /* Tentatively. */
1183
1184#ifdef FEAT_SYN_HL
1185 if (paren == REG_ZPAREN)
1186 {
1187 /* Make a ZOPEN node. */
1188 if (regnzpar >= NSUBEXP)
1189 EMSG_RET_NULL(_("E50: Too many \\z("));
1190 parno = regnzpar;
1191 regnzpar++;
1192 ret = regnode(ZOPEN + parno);
1193 }
1194 else
1195#endif
1196 if (paren == REG_PAREN)
1197 {
1198 /* Make a MOPEN node. */
1199 if (regnpar >= NSUBEXP)
1200 EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1201 parno = regnpar;
1202 ++regnpar;
1203 ret = regnode(MOPEN + parno);
1204 }
1205 else if (paren == REG_NPAREN)
1206 {
1207 /* Make a NOPEN node. */
1208 ret = regnode(NOPEN);
1209 }
1210 else
1211 ret = NULL;
1212
1213 /* Pick up the branches, linking them together. */
1214 br = regbranch(&flags);
1215 if (br == NULL)
1216 return NULL;
1217 if (ret != NULL)
1218 regtail(ret, br); /* [MZ]OPEN -> first. */
1219 else
1220 ret = br;
1221 /* If one of the branches can be zero-width, the whole thing can.
1222 * If one of the branches has * at start or matches a line-break, the
1223 * whole thing can. */
1224 if (!(flags & HASWIDTH))
1225 *flagp &= ~HASWIDTH;
1226 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1227 while (peekchr() == Magic('|'))
1228 {
1229 skipchr();
1230 br = regbranch(&flags);
1231 if (br == NULL)
1232 return NULL;
1233 regtail(ret, br); /* BRANCH -> BRANCH. */
1234 if (!(flags & HASWIDTH))
1235 *flagp &= ~HASWIDTH;
1236 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1237 }
1238
1239 /* Make a closing node, and hook it on the end. */
1240 ender = regnode(
1241#ifdef FEAT_SYN_HL
1242 paren == REG_ZPAREN ? ZCLOSE + parno :
1243#endif
1244 paren == REG_PAREN ? MCLOSE + parno :
1245 paren == REG_NPAREN ? NCLOSE : END);
1246 regtail(ret, ender);
1247
1248 /* Hook the tails of the branches to the closing node. */
1249 for (br = ret; br != NULL; br = regnext(br))
1250 regoptail(br, ender);
1251
1252 /* Check for proper termination. */
1253 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1254 {
1255#ifdef FEAT_SYN_HL
1256 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001257 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001258 else
1259#endif
1260 if (paren == REG_NPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001261 EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001263 EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001264 }
1265 else if (paren == REG_NOPAREN && peekchr() != NUL)
1266 {
1267 if (curchr == Magic(')'))
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001268 EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001270 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 /* NOTREACHED */
1272 }
1273 /*
1274 * Here we set the flag allowing back references to this set of
1275 * parentheses.
1276 */
1277 if (paren == REG_PAREN)
1278 had_endbrace[parno] = TRUE; /* have seen the close paren */
1279 return ret;
1280}
1281
1282/*
1283 * regbranch - one alternative of an | operator
1284 *
1285 * Implements the & operator.
1286 */
1287 static char_u *
1288regbranch(flagp)
1289 int *flagp;
1290{
1291 char_u *ret;
1292 char_u *chain = NULL;
1293 char_u *latest;
1294 int flags;
1295
1296 *flagp = WORST | HASNL; /* Tentatively. */
1297
1298 ret = regnode(BRANCH);
1299 for (;;)
1300 {
1301 latest = regconcat(&flags);
1302 if (latest == NULL)
1303 return NULL;
1304 /* If one of the branches has width, the whole thing has. If one of
1305 * the branches anchors at start-of-line, the whole thing does.
1306 * If one of the branches uses look-behind, the whole thing does. */
1307 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1308 /* If one of the branches doesn't match a line-break, the whole thing
1309 * doesn't. */
1310 *flagp &= ~HASNL | (flags & HASNL);
1311 if (chain != NULL)
1312 regtail(chain, latest);
1313 if (peekchr() != Magic('&'))
1314 break;
1315 skipchr();
1316 regtail(latest, regnode(END)); /* operand ends */
1317 reginsert(MATCH, latest);
1318 chain = latest;
1319 }
1320
1321 return ret;
1322}
1323
1324/*
1325 * regbranch - one alternative of an | or & operator
1326 *
1327 * Implements the concatenation operator.
1328 */
1329 static char_u *
1330regconcat(flagp)
1331 int *flagp;
1332{
1333 char_u *first = NULL;
1334 char_u *chain = NULL;
1335 char_u *latest;
1336 int flags;
1337 int cont = TRUE;
1338
1339 *flagp = WORST; /* Tentatively. */
1340
1341 while (cont)
1342 {
1343 switch (peekchr())
1344 {
1345 case NUL:
1346 case Magic('|'):
1347 case Magic('&'):
1348 case Magic(')'):
1349 cont = FALSE;
1350 break;
1351 case Magic('Z'):
1352#ifdef FEAT_MBYTE
1353 regflags |= RF_ICOMBINE;
1354#endif
1355 skipchr_keepstart();
1356 break;
1357 case Magic('c'):
1358 regflags |= RF_ICASE;
1359 skipchr_keepstart();
1360 break;
1361 case Magic('C'):
1362 regflags |= RF_NOICASE;
1363 skipchr_keepstart();
1364 break;
1365 case Magic('v'):
1366 reg_magic = MAGIC_ALL;
1367 skipchr_keepstart();
1368 curchr = -1;
1369 break;
1370 case Magic('m'):
1371 reg_magic = MAGIC_ON;
1372 skipchr_keepstart();
1373 curchr = -1;
1374 break;
1375 case Magic('M'):
1376 reg_magic = MAGIC_OFF;
1377 skipchr_keepstart();
1378 curchr = -1;
1379 break;
1380 case Magic('V'):
1381 reg_magic = MAGIC_NONE;
1382 skipchr_keepstart();
1383 curchr = -1;
1384 break;
1385 default:
1386 latest = regpiece(&flags);
1387 if (latest == NULL)
1388 return NULL;
1389 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1390 if (chain == NULL) /* First piece. */
1391 *flagp |= flags & SPSTART;
1392 else
1393 regtail(chain, latest);
1394 chain = latest;
1395 if (first == NULL)
1396 first = latest;
1397 break;
1398 }
1399 }
1400 if (first == NULL) /* Loop ran zero times. */
1401 first = regnode(NOTHING);
1402 return first;
1403}
1404
1405/*
1406 * regpiece - something followed by possible [*+=]
1407 *
1408 * Note that the branching code sequences used for = and the general cases
1409 * of * and + are somewhat optimized: they use the same NOTHING node as
1410 * both the endmarker for their branch list and the body of the last branch.
1411 * It might seem that this node could be dispensed with entirely, but the
1412 * endmarker role is not redundant.
1413 */
1414 static char_u *
1415regpiece(flagp)
1416 int *flagp;
1417{
1418 char_u *ret;
1419 int op;
1420 char_u *next;
1421 int flags;
1422 long minval;
1423 long maxval;
1424
1425 ret = regatom(&flags);
1426 if (ret == NULL)
1427 return NULL;
1428
1429 op = peekchr();
1430 if (re_multi_type(op) == NOT_MULTI)
1431 {
1432 *flagp = flags;
1433 return ret;
1434 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001435 /* default flags */
1436 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1437
1438 skipchr();
1439 switch (op)
1440 {
1441 case Magic('*'):
1442 if (flags & SIMPLE)
1443 reginsert(STAR, ret);
1444 else
1445 {
1446 /* Emit x* as (x&|), where & means "self". */
1447 reginsert(BRANCH, ret); /* Either x */
1448 regoptail(ret, regnode(BACK)); /* and loop */
1449 regoptail(ret, ret); /* back */
1450 regtail(ret, regnode(BRANCH)); /* or */
1451 regtail(ret, regnode(NOTHING)); /* null. */
1452 }
1453 break;
1454
1455 case Magic('+'):
1456 if (flags & SIMPLE)
1457 reginsert(PLUS, ret);
1458 else
1459 {
1460 /* Emit x+ as x(&|), where & means "self". */
1461 next = regnode(BRANCH); /* Either */
1462 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001463 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001464 regtail(next, regnode(BRANCH)); /* or */
1465 regtail(ret, regnode(NOTHING)); /* null. */
1466 }
1467 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1468 break;
1469
1470 case Magic('@'):
1471 {
1472 int lop = END;
1473
1474 switch (no_Magic(getchr()))
1475 {
1476 case '=': lop = MATCH; break; /* \@= */
1477 case '!': lop = NOMATCH; break; /* \@! */
1478 case '>': lop = SUBPAT; break; /* \@> */
1479 case '<': switch (no_Magic(getchr()))
1480 {
1481 case '=': lop = BEHIND; break; /* \@<= */
1482 case '!': lop = NOBEHIND; break; /* \@<! */
1483 }
1484 }
1485 if (lop == END)
1486 EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1487 reg_magic == MAGIC_ALL);
1488 /* Look behind must match with behind_pos. */
1489 if (lop == BEHIND || lop == NOBEHIND)
1490 {
1491 regtail(ret, regnode(BHPOS));
1492 *flagp |= HASLOOKBH;
1493 }
1494 regtail(ret, regnode(END)); /* operand ends */
1495 reginsert(lop, ret);
1496 break;
1497 }
1498
1499 case Magic('?'):
1500 case Magic('='):
1501 /* Emit x= as (x|) */
1502 reginsert(BRANCH, ret); /* Either x */
1503 regtail(ret, regnode(BRANCH)); /* or */
1504 next = regnode(NOTHING); /* null. */
1505 regtail(ret, next);
1506 regoptail(ret, next);
1507 break;
1508
1509 case Magic('{'):
1510 if (!read_limits(&minval, &maxval))
1511 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001512 if (flags & SIMPLE)
1513 {
1514 reginsert(BRACE_SIMPLE, ret);
1515 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1516 }
1517 else
1518 {
1519 if (num_complex_braces >= 10)
1520 EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1521 reg_magic == MAGIC_ALL);
1522 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1523 regoptail(ret, regnode(BACK));
1524 regoptail(ret, ret);
1525 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1526 ++num_complex_braces;
1527 }
1528 if (minval > 0 && maxval > 0)
1529 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1530 break;
1531 }
1532 if (re_multi_type(peekchr()) != NOT_MULTI)
1533 {
1534 /* Can't have a multi follow a multi. */
1535 if (peekchr() == Magic('*'))
1536 sprintf((char *)IObuff, _("E61: Nested %s*"),
1537 reg_magic >= MAGIC_ON ? "" : "\\");
1538 else
1539 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1540 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1541 EMSG_RET_NULL(IObuff);
1542 }
1543
1544 return ret;
1545}
1546
1547/*
1548 * regatom - the lowest level
1549 *
1550 * Optimization: gobbles an entire sequence of ordinary characters so that
1551 * it can turn them into a single node, which is smaller to store and
1552 * faster to run. Don't do this when one_exactly is set.
1553 */
1554 static char_u *
1555regatom(flagp)
1556 int *flagp;
1557{
1558 char_u *ret;
1559 int flags;
1560 int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001561 int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001562 int c;
1563 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1564 static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1565 FNAME, SFNAME, PRINT, SPRINT,
1566 WHITE, NWHITE, DIGIT, NDIGIT,
1567 HEX, NHEX, OCTAL, NOCTAL,
1568 WORD, NWORD, HEAD, NHEAD,
1569 ALPHA, NALPHA, LOWER, NLOWER,
1570 UPPER, NUPPER
1571 };
1572 char_u *p;
1573 int extra = 0;
1574
1575 *flagp = WORST; /* Tentatively. */
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00001576 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1577 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578
1579 c = getchr();
1580 switch (c)
1581 {
1582 case Magic('^'):
1583 ret = regnode(BOL);
1584 break;
1585
1586 case Magic('$'):
1587 ret = regnode(EOL);
1588#if defined(FEAT_SYN_HL) || defined(PROTO)
1589 had_eol = TRUE;
1590#endif
1591 break;
1592
1593 case Magic('<'):
1594 ret = regnode(BOW);
1595 break;
1596
1597 case Magic('>'):
1598 ret = regnode(EOW);
1599 break;
1600
1601 case Magic('_'):
1602 c = no_Magic(getchr());
1603 if (c == '^') /* "\_^" is start-of-line */
1604 {
1605 ret = regnode(BOL);
1606 break;
1607 }
1608 if (c == '$') /* "\_$" is end-of-line */
1609 {
1610 ret = regnode(EOL);
1611#if defined(FEAT_SYN_HL) || defined(PROTO)
1612 had_eol = TRUE;
1613#endif
1614 break;
1615 }
1616
1617 extra = ADD_NL;
1618 *flagp |= HASNL;
1619
1620 /* "\_[" is character range plus newline */
1621 if (c == '[')
1622 goto collection;
1623
1624 /* "\_x" is character class plus newline */
1625 /*FALLTHROUGH*/
1626
1627 /*
1628 * Character classes.
1629 */
1630 case Magic('.'):
1631 case Magic('i'):
1632 case Magic('I'):
1633 case Magic('k'):
1634 case Magic('K'):
1635 case Magic('f'):
1636 case Magic('F'):
1637 case Magic('p'):
1638 case Magic('P'):
1639 case Magic('s'):
1640 case Magic('S'):
1641 case Magic('d'):
1642 case Magic('D'):
1643 case Magic('x'):
1644 case Magic('X'):
1645 case Magic('o'):
1646 case Magic('O'):
1647 case Magic('w'):
1648 case Magic('W'):
1649 case Magic('h'):
1650 case Magic('H'):
1651 case Magic('a'):
1652 case Magic('A'):
1653 case Magic('l'):
1654 case Magic('L'):
1655 case Magic('u'):
1656 case Magic('U'):
1657 p = vim_strchr(classchars, no_Magic(c));
1658 if (p == NULL)
1659 EMSG_RET_NULL(_("E63: invalid use of \\_"));
1660 ret = regnode(classcodes[p - classchars] + extra);
1661 *flagp |= HASWIDTH | SIMPLE;
1662 break;
1663
1664 case Magic('n'):
1665 if (reg_string)
1666 {
1667 /* In a string "\n" matches a newline character. */
1668 ret = regnode(EXACTLY);
1669 regc(NL);
1670 regc(NUL);
1671 *flagp |= HASWIDTH | SIMPLE;
1672 }
1673 else
1674 {
1675 /* In buffer text "\n" matches the end of a line. */
1676 ret = regnode(NEWL);
1677 *flagp |= HASWIDTH | HASNL;
1678 }
1679 break;
1680
1681 case Magic('('):
1682 if (one_exactly)
1683 EMSG_ONE_RET_NULL;
1684 ret = reg(REG_PAREN, &flags);
1685 if (ret == NULL)
1686 return NULL;
1687 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1688 break;
1689
1690 case NUL:
1691 case Magic('|'):
1692 case Magic('&'):
1693 case Magic(')'):
1694 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
1695 /* NOTREACHED */
1696
1697 case Magic('='):
1698 case Magic('?'):
1699 case Magic('+'):
1700 case Magic('@'):
1701 case Magic('{'):
1702 case Magic('*'):
1703 c = no_Magic(c);
1704 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1705 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1706 ? "" : "\\", c);
1707 EMSG_RET_NULL(IObuff);
1708 /* NOTREACHED */
1709
1710 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00001711 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001712 {
1713 char_u *lp;
1714
1715 ret = regnode(EXACTLY);
1716 lp = reg_prev_sub;
1717 while (*lp != NUL)
1718 regc(*lp++);
1719 regc(NUL);
1720 if (*reg_prev_sub != NUL)
1721 {
1722 *flagp |= HASWIDTH;
1723 if ((lp - reg_prev_sub) == 1)
1724 *flagp |= SIMPLE;
1725 }
1726 }
1727 else
1728 EMSG_RET_NULL(_(e_nopresub));
1729 break;
1730
1731 case Magic('1'):
1732 case Magic('2'):
1733 case Magic('3'):
1734 case Magic('4'):
1735 case Magic('5'):
1736 case Magic('6'):
1737 case Magic('7'):
1738 case Magic('8'):
1739 case Magic('9'):
1740 {
1741 int refnum;
1742
1743 refnum = c - Magic('0');
1744 /*
1745 * Check if the back reference is legal. We must have seen the
1746 * close brace.
1747 * TODO: Should also check that we don't refer to something
1748 * that is repeated (+*=): what instance of the repetition
1749 * should we match?
1750 */
1751 if (!had_endbrace[refnum])
1752 {
1753 /* Trick: check if "@<=" or "@<!" follows, in which case
1754 * the \1 can appear before the referenced match. */
1755 for (p = regparse; *p != NUL; ++p)
1756 if (p[0] == '@' && p[1] == '<'
1757 && (p[2] == '!' || p[2] == '='))
1758 break;
1759 if (*p == NUL)
1760 EMSG_RET_NULL(_("E65: Illegal back reference"));
1761 }
1762 ret = regnode(BACKREF + refnum);
1763 }
1764 break;
1765
1766#ifdef FEAT_SYN_HL
1767 case Magic('z'):
1768 {
1769 c = no_Magic(getchr());
1770 switch (c)
1771 {
1772 case '(': if (reg_do_extmatch != REX_SET)
1773 EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1774 if (one_exactly)
1775 EMSG_ONE_RET_NULL;
1776 ret = reg(REG_ZPAREN, &flags);
1777 if (ret == NULL)
1778 return NULL;
1779 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1780 re_has_z = REX_SET;
1781 break;
1782
1783 case '1':
1784 case '2':
1785 case '3':
1786 case '4':
1787 case '5':
1788 case '6':
1789 case '7':
1790 case '8':
1791 case '9': if (reg_do_extmatch != REX_USE)
1792 EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1793 ret = regnode(ZREF + c - '0');
1794 re_has_z = REX_USE;
1795 break;
1796
1797 case 's': ret = regnode(MOPEN + 0);
1798 break;
1799
1800 case 'e': ret = regnode(MCLOSE + 0);
1801 break;
1802
1803 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1804 }
1805 }
1806 break;
1807#endif
1808
1809 case Magic('%'):
1810 {
1811 c = no_Magic(getchr());
1812 switch (c)
1813 {
1814 /* () without a back reference */
1815 case '(':
1816 if (one_exactly)
1817 EMSG_ONE_RET_NULL;
1818 ret = reg(REG_NPAREN, &flags);
1819 if (ret == NULL)
1820 return NULL;
1821 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1822 break;
1823
1824 /* Catch \%^ and \%$ regardless of where they appear in the
1825 * pattern -- regardless of whether or not it makes sense. */
1826 case '^':
1827 ret = regnode(RE_BOF);
1828 break;
1829
1830 case '$':
1831 ret = regnode(RE_EOF);
1832 break;
1833
1834 case '#':
1835 ret = regnode(CURSOR);
1836 break;
1837
1838 /* \%[abc]: Emit as a list of branches, all ending at the last
1839 * branch which matches nothing. */
1840 case '[':
1841 if (one_exactly) /* doesn't nest */
1842 EMSG_ONE_RET_NULL;
1843 {
1844 char_u *lastbranch;
1845 char_u *lastnode = NULL;
1846 char_u *br;
1847
1848 ret = NULL;
1849 while ((c = getchr()) != ']')
1850 {
1851 if (c == NUL)
1852 EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1853 reg_magic == MAGIC_ALL);
1854 br = regnode(BRANCH);
1855 if (ret == NULL)
1856 ret = br;
1857 else
1858 regtail(lastnode, br);
1859
1860 ungetchr();
1861 one_exactly = TRUE;
1862 lastnode = regatom(flagp);
1863 one_exactly = FALSE;
1864 if (lastnode == NULL)
1865 return NULL;
1866 }
1867 if (ret == NULL)
1868 EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1869 reg_magic == MAGIC_ALL);
1870 lastbranch = regnode(BRANCH);
1871 br = regnode(NOTHING);
1872 if (ret != JUST_CALC_SIZE)
1873 {
1874 regtail(lastnode, br);
1875 regtail(lastbranch, br);
1876 /* connect all branches to the NOTHING
1877 * branch at the end */
1878 for (br = ret; br != lastnode; )
1879 {
1880 if (OP(br) == BRANCH)
1881 {
1882 regtail(br, lastbranch);
1883 br = OPERAND(br);
1884 }
1885 else
1886 br = regnext(br);
1887 }
1888 }
1889 *flagp &= ~HASWIDTH;
1890 break;
1891 }
1892
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001893 case 'd': /* %d123 decimal */
1894 case 'o': /* %o123 octal */
1895 case 'x': /* %xab hex 2 */
1896 case 'u': /* %uabcd hex 4 */
1897 case 'U': /* %U1234abcd hex 8 */
1898 {
1899 int i;
1900
1901 switch (c)
1902 {
1903 case 'd': i = getdecchrs(); break;
1904 case 'o': i = getoctchrs(); break;
1905 case 'x': i = gethexchrs(2); break;
1906 case 'u': i = gethexchrs(4); break;
1907 case 'U': i = gethexchrs(8); break;
1908 default: i = -1; break;
1909 }
1910
1911 if (i < 0)
1912 EMSG_M_RET_NULL(
1913 _("E678: Invalid character after %s%%[dxouU]"),
1914 reg_magic == MAGIC_ALL);
1915 ret = regnode(EXACTLY);
1916 if (i == 0)
1917 regc(0x0a);
1918 else
1919#ifdef FEAT_MBYTE
1920 regmbc(i);
1921#else
1922 regc(i);
1923#endif
1924 regc(NUL);
1925 *flagp |= HASWIDTH;
1926 break;
1927 }
1928
Bram Moolenaar071d4272004-06-13 20:20:40 +00001929 default:
1930 if (VIM_ISDIGIT(c) || c == '<' || c == '>')
1931 {
1932 long_u n = 0;
1933 int cmp;
1934
1935 cmp = c;
1936 if (cmp == '<' || cmp == '>')
1937 c = getchr();
1938 while (VIM_ISDIGIT(c))
1939 {
1940 n = n * 10 + (c - '0');
1941 c = getchr();
1942 }
1943 if (c == 'l' || c == 'c' || c == 'v')
1944 {
1945 if (c == 'l')
1946 ret = regnode(RE_LNUM);
1947 else if (c == 'c')
1948 ret = regnode(RE_COL);
1949 else
1950 ret = regnode(RE_VCOL);
1951 if (ret == JUST_CALC_SIZE)
1952 regsize += 5;
1953 else
1954 {
1955 /* put the number and the optional
1956 * comparator after the opcode */
1957 regcode = re_put_long(regcode, n);
1958 *regcode++ = cmp;
1959 }
1960 break;
1961 }
1962 }
1963
1964 EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
1965 reg_magic == MAGIC_ALL);
1966 }
1967 }
1968 break;
1969
1970 case Magic('['):
1971collection:
1972 {
1973 char_u *lp;
1974
1975 /*
1976 * If there is no matching ']', we assume the '[' is a normal
1977 * character. This makes 'incsearch' and ":help [" work.
1978 */
1979 lp = skip_anyof(regparse);
1980 if (*lp == ']') /* there is a matching ']' */
1981 {
1982 int startc = -1; /* > 0 when next '-' is a range */
1983 int endc;
1984
1985 /*
1986 * In a character class, different parsing rules apply.
1987 * Not even \ is special anymore, nothing is.
1988 */
1989 if (*regparse == '^') /* Complement of range. */
1990 {
1991 ret = regnode(ANYBUT + extra);
1992 regparse++;
1993 }
1994 else
1995 ret = regnode(ANYOF + extra);
1996
1997 /* At the start ']' and '-' mean the literal character. */
1998 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00001999 {
2000 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002001 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002002 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002003
2004 while (*regparse != NUL && *regparse != ']')
2005 {
2006 if (*regparse == '-')
2007 {
2008 ++regparse;
2009 /* The '-' is not used for a range at the end and
2010 * after or before a '\n'. */
2011 if (*regparse == ']' || *regparse == NUL
2012 || startc == -1
2013 || (regparse[0] == '\\' && regparse[1] == 'n'))
2014 {
2015 regc('-');
2016 startc = '-'; /* [--x] is a range */
2017 }
2018 else
2019 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002020 /* Also accept "a-[.z.]" */
2021 endc = 0;
2022 if (*regparse == '[')
2023 endc = get_coll_element(&regparse);
2024 if (endc == 0)
2025 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002026#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002027 if (has_mbyte)
2028 endc = mb_ptr2char_adv(&regparse);
2029 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002030#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002031 endc = *regparse++;
2032 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002033
2034 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002035 if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002036 endc = coll_get_char();
2037
Bram Moolenaar071d4272004-06-13 20:20:40 +00002038 if (startc > endc)
2039 EMSG_RET_NULL(_(e_invrange));
2040#ifdef FEAT_MBYTE
2041 if (has_mbyte && ((*mb_char2len)(startc) > 1
2042 || (*mb_char2len)(endc) > 1))
2043 {
2044 /* Limit to a range of 256 chars */
2045 if (endc > startc + 256)
2046 EMSG_RET_NULL(_(e_invrange));
2047 while (++startc <= endc)
2048 regmbc(startc);
2049 }
2050 else
2051#endif
2052 {
2053#ifdef EBCDIC
2054 int alpha_only = FALSE;
2055
2056 /* for alphabetical range skip the gaps
2057 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2058 if (isalpha(startc) && isalpha(endc))
2059 alpha_only = TRUE;
2060#endif
2061 while (++startc <= endc)
2062#ifdef EBCDIC
2063 if (!alpha_only || isalpha(startc))
2064#endif
2065 regc(startc);
2066 }
2067 startc = -1;
2068 }
2069 }
2070 /*
2071 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2072 * accepts "\t", "\e", etc., but only when the 'l' flag in
2073 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002074 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002075 */
2076 else if (*regparse == '\\'
Bram Moolenaardf177f62005-02-22 08:39:57 +00002077 && !cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002078 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2079 || (!cpo_lit
2080 && vim_strchr(REGEXP_ABBR,
2081 regparse[1]) != NULL)))
2082 {
2083 regparse++;
2084 if (*regparse == 'n')
2085 {
2086 /* '\n' in range: also match NL */
2087 if (ret != JUST_CALC_SIZE)
2088 {
2089 if (*ret == ANYBUT)
2090 *ret = ANYBUT + ADD_NL;
2091 else if (*ret == ANYOF)
2092 *ret = ANYOF + ADD_NL;
2093 /* else: must have had a \n already */
2094 }
2095 *flagp |= HASNL;
2096 regparse++;
2097 startc = -1;
2098 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002099 else if (*regparse == 'd'
2100 || *regparse == 'o'
2101 || *regparse == 'x'
2102 || *regparse == 'u'
2103 || *regparse == 'U')
2104 {
2105 startc = coll_get_char();
2106 if (startc == 0)
2107 regc(0x0a);
2108 else
2109#ifdef FEAT_MBYTE
2110 regmbc(startc);
2111#else
2112 regc(startc);
2113#endif
2114 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002115 else
2116 {
2117 startc = backslash_trans(*regparse++);
2118 regc(startc);
2119 }
2120 }
2121 else if (*regparse == '[')
2122 {
2123 int c_class;
2124 int cu;
2125
Bram Moolenaardf177f62005-02-22 08:39:57 +00002126 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002127 startc = -1;
2128 /* Characters assumed to be 8 bits! */
2129 switch (c_class)
2130 {
2131 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002132 c_class = get_equi_class(&regparse);
2133 if (c_class != 0)
2134 {
2135 /* produce equivalence class */
2136 reg_equi_class(c_class);
2137 }
2138 else if ((c_class =
2139 get_coll_element(&regparse)) != 0)
2140 {
2141 /* produce a collating element */
2142 regmbc(c_class);
2143 }
2144 else
2145 {
2146 /* literal '[', allow [[-x] as a range */
2147 startc = *regparse++;
2148 regc(startc);
2149 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002150 break;
2151 case CLASS_ALNUM:
2152 for (cu = 1; cu <= 255; cu++)
2153 if (isalnum(cu))
2154 regc(cu);
2155 break;
2156 case CLASS_ALPHA:
2157 for (cu = 1; cu <= 255; cu++)
2158 if (isalpha(cu))
2159 regc(cu);
2160 break;
2161 case CLASS_BLANK:
2162 regc(' ');
2163 regc('\t');
2164 break;
2165 case CLASS_CNTRL:
2166 for (cu = 1; cu <= 255; cu++)
2167 if (iscntrl(cu))
2168 regc(cu);
2169 break;
2170 case CLASS_DIGIT:
2171 for (cu = 1; cu <= 255; cu++)
2172 if (VIM_ISDIGIT(cu))
2173 regc(cu);
2174 break;
2175 case CLASS_GRAPH:
2176 for (cu = 1; cu <= 255; cu++)
2177 if (isgraph(cu))
2178 regc(cu);
2179 break;
2180 case CLASS_LOWER:
2181 for (cu = 1; cu <= 255; cu++)
2182 if (islower(cu))
2183 regc(cu);
2184 break;
2185 case CLASS_PRINT:
2186 for (cu = 1; cu <= 255; cu++)
2187 if (vim_isprintc(cu))
2188 regc(cu);
2189 break;
2190 case CLASS_PUNCT:
2191 for (cu = 1; cu <= 255; cu++)
2192 if (ispunct(cu))
2193 regc(cu);
2194 break;
2195 case CLASS_SPACE:
2196 for (cu = 9; cu <= 13; cu++)
2197 regc(cu);
2198 regc(' ');
2199 break;
2200 case CLASS_UPPER:
2201 for (cu = 1; cu <= 255; cu++)
2202 if (isupper(cu))
2203 regc(cu);
2204 break;
2205 case CLASS_XDIGIT:
2206 for (cu = 1; cu <= 255; cu++)
2207 if (vim_isxdigit(cu))
2208 regc(cu);
2209 break;
2210 case CLASS_TAB:
2211 regc('\t');
2212 break;
2213 case CLASS_RETURN:
2214 regc('\r');
2215 break;
2216 case CLASS_BACKSPACE:
2217 regc('\b');
2218 break;
2219 case CLASS_ESCAPE:
2220 regc('\033');
2221 break;
2222 }
2223 }
2224 else
2225 {
2226#ifdef FEAT_MBYTE
2227 if (has_mbyte)
2228 {
2229 int len;
2230
2231 /* produce a multibyte character, including any
2232 * following composing characters */
2233 startc = mb_ptr2char(regparse);
2234 len = (*mb_ptr2len_check)(regparse);
2235 if (enc_utf8 && utf_char2len(startc) != len)
2236 startc = -1; /* composing chars */
2237 while (--len >= 0)
2238 regc(*regparse++);
2239 }
2240 else
2241#endif
2242 {
2243 startc = *regparse++;
2244 regc(startc);
2245 }
2246 }
2247 }
2248 regc(NUL);
2249 prevchr_len = 1; /* last char was the ']' */
2250 if (*regparse != ']')
2251 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2252 skipchr(); /* let's be friends with the lexer again */
2253 *flagp |= HASWIDTH | SIMPLE;
2254 break;
2255 }
2256 }
2257 /* FALLTHROUGH */
2258
2259 default:
2260 {
2261 int len;
2262
2263#ifdef FEAT_MBYTE
2264 /* A multi-byte character is handled as a separate atom if it's
2265 * before a multi. */
2266 if (has_mbyte && (*mb_char2len)(c) > 1
2267 && re_multi_type(peekchr()) != NOT_MULTI)
2268 {
2269 ret = regnode(MULTIBYTECODE);
2270 regmbc(c);
2271 *flagp |= HASWIDTH | SIMPLE;
2272 break;
2273 }
2274#endif
2275
2276 ret = regnode(EXACTLY);
2277
2278 /*
2279 * Append characters as long as:
2280 * - there is no following multi, we then need the character in
2281 * front of it as a single character operand
2282 * - not running into a Magic character
2283 * - "one_exactly" is not set
2284 * But always emit at least one character. Might be a Multi,
2285 * e.g., a "[" without matching "]".
2286 */
2287 for (len = 0; c != NUL && (len == 0
2288 || (re_multi_type(peekchr()) == NOT_MULTI
2289 && !one_exactly
2290 && !is_Magic(c))); ++len)
2291 {
2292 c = no_Magic(c);
2293#ifdef FEAT_MBYTE
2294 if (has_mbyte)
2295 {
2296 regmbc(c);
2297 if (enc_utf8)
2298 {
2299 int off;
2300 int l;
2301
2302 /* Need to get composing character too, directly
2303 * access regparse for that, because skipchr() skips
2304 * over composing chars. */
2305 ungetchr();
2306 if (*regparse == '\\' && regparse[1] != NUL)
2307 off = 1;
2308 else
2309 off = 0;
2310 for (;;)
2311 {
2312 l = utf_ptr2len_check(regparse + off);
2313 if (!UTF_COMPOSINGLIKE(regparse + off,
2314 regparse + off + l))
2315 break;
2316 off += l;
2317 regmbc(utf_ptr2char(regparse + off));
2318 }
2319 skipchr();
2320 }
2321 }
2322 else
2323#endif
2324 regc(c);
2325 c = getchr();
2326 }
2327 ungetchr();
2328
2329 regc(NUL);
2330 *flagp |= HASWIDTH;
2331 if (len == 1)
2332 *flagp |= SIMPLE;
2333 }
2334 break;
2335 }
2336
2337 return ret;
2338}
2339
2340/*
2341 * emit a node
2342 * Return pointer to generated code.
2343 */
2344 static char_u *
2345regnode(op)
2346 int op;
2347{
2348 char_u *ret;
2349
2350 ret = regcode;
2351 if (ret == JUST_CALC_SIZE)
2352 regsize += 3;
2353 else
2354 {
2355 *regcode++ = op;
2356 *regcode++ = NUL; /* Null "next" pointer. */
2357 *regcode++ = NUL;
2358 }
2359 return ret;
2360}
2361
2362/*
2363 * Emit (if appropriate) a byte of code
2364 */
2365 static void
2366regc(b)
2367 int b;
2368{
2369 if (regcode == JUST_CALC_SIZE)
2370 regsize++;
2371 else
2372 *regcode++ = b;
2373}
2374
2375#ifdef FEAT_MBYTE
2376/*
2377 * Emit (if appropriate) a multi-byte character of code
2378 */
2379 static void
2380regmbc(c)
2381 int c;
2382{
2383 if (regcode == JUST_CALC_SIZE)
2384 regsize += (*mb_char2len)(c);
2385 else
2386 regcode += (*mb_char2bytes)(c, regcode);
2387}
2388#endif
2389
2390/*
2391 * reginsert - insert an operator in front of already-emitted operand
2392 *
2393 * Means relocating the operand.
2394 */
2395 static void
2396reginsert(op, opnd)
2397 int op;
2398 char_u *opnd;
2399{
2400 char_u *src;
2401 char_u *dst;
2402 char_u *place;
2403
2404 if (regcode == JUST_CALC_SIZE)
2405 {
2406 regsize += 3;
2407 return;
2408 }
2409 src = regcode;
2410 regcode += 3;
2411 dst = regcode;
2412 while (src > opnd)
2413 *--dst = *--src;
2414
2415 place = opnd; /* Op node, where operand used to be. */
2416 *place++ = op;
2417 *place++ = NUL;
2418 *place = NUL;
2419}
2420
2421/*
2422 * reginsert_limits - insert an operator in front of already-emitted operand.
2423 * The operator has the given limit values as operands. Also set next pointer.
2424 *
2425 * Means relocating the operand.
2426 */
2427 static void
2428reginsert_limits(op, minval, maxval, opnd)
2429 int op;
2430 long minval;
2431 long maxval;
2432 char_u *opnd;
2433{
2434 char_u *src;
2435 char_u *dst;
2436 char_u *place;
2437
2438 if (regcode == JUST_CALC_SIZE)
2439 {
2440 regsize += 11;
2441 return;
2442 }
2443 src = regcode;
2444 regcode += 11;
2445 dst = regcode;
2446 while (src > opnd)
2447 *--dst = *--src;
2448
2449 place = opnd; /* Op node, where operand used to be. */
2450 *place++ = op;
2451 *place++ = NUL;
2452 *place++ = NUL;
2453 place = re_put_long(place, (long_u)minval);
2454 place = re_put_long(place, (long_u)maxval);
2455 regtail(opnd, place);
2456}
2457
2458/*
2459 * Write a long as four bytes at "p" and return pointer to the next char.
2460 */
2461 static char_u *
2462re_put_long(p, val)
2463 char_u *p;
2464 long_u val;
2465{
2466 *p++ = (char_u) ((val >> 24) & 0377);
2467 *p++ = (char_u) ((val >> 16) & 0377);
2468 *p++ = (char_u) ((val >> 8) & 0377);
2469 *p++ = (char_u) (val & 0377);
2470 return p;
2471}
2472
2473/*
2474 * regtail - set the next-pointer at the end of a node chain
2475 */
2476 static void
2477regtail(p, val)
2478 char_u *p;
2479 char_u *val;
2480{
2481 char_u *scan;
2482 char_u *temp;
2483 int offset;
2484
2485 if (p == JUST_CALC_SIZE)
2486 return;
2487
2488 /* Find last node. */
2489 scan = p;
2490 for (;;)
2491 {
2492 temp = regnext(scan);
2493 if (temp == NULL)
2494 break;
2495 scan = temp;
2496 }
2497
Bram Moolenaar582fd852005-03-28 20:58:01 +00002498 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002499 offset = (int)(scan - val);
2500 else
2501 offset = (int)(val - scan);
2502 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2503 *(scan + 2) = (char_u) (offset & 0377);
2504}
2505
2506/*
2507 * regoptail - regtail on item after a BRANCH; nop if none
2508 */
2509 static void
2510regoptail(p, val)
2511 char_u *p;
2512 char_u *val;
2513{
2514 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2515 if (p == NULL || p == JUST_CALC_SIZE
2516 || (OP(p) != BRANCH
2517 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2518 return;
2519 regtail(OPERAND(p), val);
2520}
2521
2522/*
2523 * getchr() - get the next character from the pattern. We know about
2524 * magic and such, so therefore we need a lexical analyzer.
2525 */
2526
2527/* static int curchr; */
2528static int prevprevchr;
2529static int prevchr;
2530static int nextchr; /* used for ungetchr() */
2531/*
2532 * Note: prevchr is sometimes -1 when we are not at the start,
2533 * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2534 * taken to be magic -- webb
2535 */
2536static int at_start; /* True when on the first character */
2537static int prev_at_start; /* True when on the second character */
2538
2539 static void
2540initchr(str)
2541 char_u *str;
2542{
2543 regparse = str;
2544 prevchr_len = 0;
2545 curchr = prevprevchr = prevchr = nextchr = -1;
2546 at_start = TRUE;
2547 prev_at_start = FALSE;
2548}
2549
2550 static int
2551peekchr()
2552{
Bram Moolenaardf177f62005-02-22 08:39:57 +00002553 static int after_slash = FALSE;
2554
Bram Moolenaar071d4272004-06-13 20:20:40 +00002555 if (curchr == -1)
2556 {
2557 switch (curchr = regparse[0])
2558 {
2559 case '.':
2560 case '[':
2561 case '~':
2562 /* magic when 'magic' is on */
2563 if (reg_magic >= MAGIC_ON)
2564 curchr = Magic(curchr);
2565 break;
2566 case '(':
2567 case ')':
2568 case '{':
2569 case '%':
2570 case '+':
2571 case '=':
2572 case '?':
2573 case '@':
2574 case '!':
2575 case '&':
2576 case '|':
2577 case '<':
2578 case '>':
2579 case '#': /* future ext. */
2580 case '"': /* future ext. */
2581 case '\'': /* future ext. */
2582 case ',': /* future ext. */
2583 case '-': /* future ext. */
2584 case ':': /* future ext. */
2585 case ';': /* future ext. */
2586 case '`': /* future ext. */
2587 case '/': /* Can't be used in / command */
2588 /* magic only after "\v" */
2589 if (reg_magic == MAGIC_ALL)
2590 curchr = Magic(curchr);
2591 break;
2592 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00002593 /* * is not magic as the very first character, eg "?*ptr", when
2594 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
2595 * "\(\*" is not magic, thus must be magic if "after_slash" */
2596 if (reg_magic >= MAGIC_ON
2597 && !at_start
2598 && !(prev_at_start && prevchr == Magic('^'))
2599 && (after_slash
2600 || (prevchr != Magic('(')
2601 && prevchr != Magic('&')
2602 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002603 curchr = Magic('*');
2604 break;
2605 case '^':
2606 /* '^' is only magic as the very first character and if it's after
2607 * "\(", "\|", "\&' or "\n" */
2608 if (reg_magic >= MAGIC_OFF
2609 && (at_start
2610 || reg_magic == MAGIC_ALL
2611 || prevchr == Magic('(')
2612 || prevchr == Magic('|')
2613 || prevchr == Magic('&')
2614 || prevchr == Magic('n')
2615 || (no_Magic(prevchr) == '('
2616 && prevprevchr == Magic('%'))))
2617 {
2618 curchr = Magic('^');
2619 at_start = TRUE;
2620 prev_at_start = FALSE;
2621 }
2622 break;
2623 case '$':
2624 /* '$' is only magic as the very last char and if it's in front of
2625 * either "\|", "\)", "\&", or "\n" */
2626 if (reg_magic >= MAGIC_OFF)
2627 {
2628 char_u *p = regparse + 1;
2629
2630 /* ignore \c \C \m and \M after '$' */
2631 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2632 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2633 p += 2;
2634 if (p[0] == NUL
2635 || (p[0] == '\\'
2636 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2637 || p[1] == 'n'))
2638 || reg_magic == MAGIC_ALL)
2639 curchr = Magic('$');
2640 }
2641 break;
2642 case '\\':
2643 {
2644 int c = regparse[1];
2645
2646 if (c == NUL)
2647 curchr = '\\'; /* trailing '\' */
2648 else if (
2649#ifdef EBCDIC
2650 vim_strchr(META, c)
2651#else
2652 c <= '~' && META_flags[c]
2653#endif
2654 )
2655 {
2656 /*
2657 * META contains everything that may be magic sometimes,
2658 * except ^ and $ ("\^" and "\$" are only magic after
2659 * "\v"). We now fetch the next character and toggle its
2660 * magicness. Therefore, \ is so meta-magic that it is
2661 * not in META.
2662 */
2663 curchr = -1;
2664 prev_at_start = at_start;
2665 at_start = FALSE; /* be able to say "/\*ptr" */
2666 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00002667 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002668 peekchr();
2669 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00002670 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002671 curchr = toggle_Magic(curchr);
2672 }
2673 else if (vim_strchr(REGEXP_ABBR, c))
2674 {
2675 /*
2676 * Handle abbreviations, like "\t" for TAB -- webb
2677 */
2678 curchr = backslash_trans(c);
2679 }
2680 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2681 curchr = toggle_Magic(c);
2682 else
2683 {
2684 /*
2685 * Next character can never be (made) magic?
2686 * Then backslashing it won't do anything.
2687 */
2688#ifdef FEAT_MBYTE
2689 if (has_mbyte)
2690 curchr = (*mb_ptr2char)(regparse + 1);
2691 else
2692#endif
2693 curchr = c;
2694 }
2695 break;
2696 }
2697
2698#ifdef FEAT_MBYTE
2699 default:
2700 if (has_mbyte)
2701 curchr = (*mb_ptr2char)(regparse);
2702#endif
2703 }
2704 }
2705
2706 return curchr;
2707}
2708
2709/*
2710 * Eat one lexed character. Do this in a way that we can undo it.
2711 */
2712 static void
2713skipchr()
2714{
2715 /* peekchr() eats a backslash, do the same here */
2716 if (*regparse == '\\')
2717 prevchr_len = 1;
2718 else
2719 prevchr_len = 0;
2720 if (regparse[prevchr_len] != NUL)
2721 {
2722#ifdef FEAT_MBYTE
2723 if (has_mbyte)
2724 prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
2725 else
2726#endif
2727 ++prevchr_len;
2728 }
2729 regparse += prevchr_len;
2730 prev_at_start = at_start;
2731 at_start = FALSE;
2732 prevprevchr = prevchr;
2733 prevchr = curchr;
2734 curchr = nextchr; /* use previously unget char, or -1 */
2735 nextchr = -1;
2736}
2737
2738/*
2739 * Skip a character while keeping the value of prev_at_start for at_start.
2740 * prevchr and prevprevchr are also kept.
2741 */
2742 static void
2743skipchr_keepstart()
2744{
2745 int as = prev_at_start;
2746 int pr = prevchr;
2747 int prpr = prevprevchr;
2748
2749 skipchr();
2750 at_start = as;
2751 prevchr = pr;
2752 prevprevchr = prpr;
2753}
2754
2755 static int
2756getchr()
2757{
2758 int chr = peekchr();
2759
2760 skipchr();
2761 return chr;
2762}
2763
2764/*
2765 * put character back. Works only once!
2766 */
2767 static void
2768ungetchr()
2769{
2770 nextchr = curchr;
2771 curchr = prevchr;
2772 prevchr = prevprevchr;
2773 at_start = prev_at_start;
2774 prev_at_start = FALSE;
2775
2776 /* Backup regparse, so that it's at the same position as before the
2777 * getchr(). */
2778 regparse -= prevchr_len;
2779}
2780
2781/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00002782 * Get and return the value of the hex string at the current position.
2783 * Return -1 if there is no valid hex number.
2784 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002785 * blahblah\%x20asdf
2786 * before-^ ^-after
2787 * The parameter controls the maximum number of input characters. This will be
2788 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2789 */
2790 static int
2791gethexchrs(maxinputlen)
2792 int maxinputlen;
2793{
2794 int nr = 0;
2795 int c;
2796 int i;
2797
2798 for (i = 0; i < maxinputlen; ++i)
2799 {
2800 c = regparse[0];
2801 if (!vim_isxdigit(c))
2802 break;
2803 nr <<= 4;
2804 nr |= hex2nr(c);
2805 ++regparse;
2806 }
2807
2808 if (i == 0)
2809 return -1;
2810 return nr;
2811}
2812
2813/*
2814 * get and return the value of the decimal string immediately after the
2815 * current position. Return -1 for invalid. Consumes all digits.
2816 */
2817 static int
2818getdecchrs()
2819{
2820 int nr = 0;
2821 int c;
2822 int i;
2823
2824 for (i = 0; ; ++i)
2825 {
2826 c = regparse[0];
2827 if (c < '0' || c > '9')
2828 break;
2829 nr *= 10;
2830 nr += c - '0';
2831 ++regparse;
2832 }
2833
2834 if (i == 0)
2835 return -1;
2836 return nr;
2837}
2838
2839/*
2840 * get and return the value of the octal string immediately after the current
2841 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2842 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2843 * treat 8 or 9 as recognised characters. Position is updated:
2844 * blahblah\%o210asdf
2845 * before-^ ^-after
2846 */
2847 static int
2848getoctchrs()
2849{
2850 int nr = 0;
2851 int c;
2852 int i;
2853
2854 for (i = 0; i < 3 && nr < 040; ++i)
2855 {
2856 c = regparse[0];
2857 if (c < '0' || c > '7')
2858 break;
2859 nr <<= 3;
2860 nr |= hex2nr(c);
2861 ++regparse;
2862 }
2863
2864 if (i == 0)
2865 return -1;
2866 return nr;
2867}
2868
2869/*
2870 * Get a number after a backslash that is inside [].
2871 * When nothing is recognized return a backslash.
2872 */
2873 static int
2874coll_get_char()
2875{
2876 int nr = -1;
2877
2878 switch (*regparse++)
2879 {
2880 case 'd': nr = getdecchrs(); break;
2881 case 'o': nr = getoctchrs(); break;
2882 case 'x': nr = gethexchrs(2); break;
2883 case 'u': nr = gethexchrs(4); break;
2884 case 'U': nr = gethexchrs(8); break;
2885 }
2886 if (nr < 0)
2887 {
2888 /* If getting the number fails be backwards compatible: the character
2889 * is a backslash. */
2890 --regparse;
2891 nr = '\\';
2892 }
2893 return nr;
2894}
2895
2896/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00002897 * read_limits - Read two integers to be taken as a minimum and maximum.
2898 * If the first character is '-', then the range is reversed.
2899 * Should end with 'end'. If minval is missing, zero is default, if maxval is
2900 * missing, a very big number is the default.
2901 */
2902 static int
2903read_limits(minval, maxval)
2904 long *minval;
2905 long *maxval;
2906{
2907 int reverse = FALSE;
2908 char_u *first_char;
2909 long tmp;
2910
2911 if (*regparse == '-')
2912 {
2913 /* Starts with '-', so reverse the range later */
2914 regparse++;
2915 reverse = TRUE;
2916 }
2917 first_char = regparse;
2918 *minval = getdigits(&regparse);
2919 if (*regparse == ',') /* There is a comma */
2920 {
2921 if (vim_isdigit(*++regparse))
2922 *maxval = getdigits(&regparse);
2923 else
2924 *maxval = MAX_LIMIT;
2925 }
2926 else if (VIM_ISDIGIT(*first_char))
2927 *maxval = *minval; /* It was \{n} or \{-n} */
2928 else
2929 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
2930 if (*regparse == '\\')
2931 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002932 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002933 {
2934 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2935 reg_magic == MAGIC_ALL ? "" : "\\");
2936 EMSG_RET_FAIL(IObuff);
2937 }
2938
2939 /*
2940 * Reverse the range if there was a '-', or make sure it is in the right
2941 * order otherwise.
2942 */
2943 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
2944 {
2945 tmp = *minval;
2946 *minval = *maxval;
2947 *maxval = tmp;
2948 }
2949 skipchr(); /* let's be friends with the lexer again */
2950 return OK;
2951}
2952
2953/*
2954 * vim_regexec and friends
2955 */
2956
2957/*
2958 * Global work variables for vim_regexec().
2959 */
2960
2961/* The current match-position is remembered with these variables: */
2962static linenr_T reglnum; /* line number, relative to first line */
2963static char_u *regline; /* start of current line */
2964static char_u *reginput; /* current input, points into "regline" */
2965
2966static int need_clear_subexpr; /* subexpressions still need to be
2967 * cleared */
2968#ifdef FEAT_SYN_HL
2969static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
2970 * still need to be cleared */
2971#endif
2972
Bram Moolenaar071d4272004-06-13 20:20:40 +00002973/*
2974 * Structure used to save the current input state, when it needs to be
2975 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00002976 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00002977 */
2978typedef struct
2979{
2980 union
2981 {
2982 char_u *ptr; /* reginput pointer, for single-line regexp */
2983 lpos_T pos; /* reginput pos, for multi-line regexp */
2984 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00002985 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002986} regsave_T;
2987
2988/* struct to save start/end pointer/position in for \(\) */
2989typedef struct
2990{
2991 union
2992 {
2993 char_u *ptr;
2994 lpos_T pos;
2995 } se_u;
2996} save_se_T;
2997
2998static char_u *reg_getline __ARGS((linenr_T lnum));
2999static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
3000static long regtry __ARGS((regprog_T *prog, colnr_T col));
3001static void cleanup_subexpr __ARGS((void));
3002#ifdef FEAT_SYN_HL
3003static void cleanup_zsubexpr __ARGS((void));
3004#endif
3005static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003006static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3007static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003008static int reg_save_equal __ARGS((regsave_T *save));
3009static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3010static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3011
3012/* Save the sub-expressions before attempting a match. */
3013#define save_se(savep, posp, pp) \
3014 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3015
3016/* After a failed match restore the sub-expressions. */
3017#define restore_se(savep, posp, pp) { \
3018 if (REG_MULTI) \
3019 *(posp) = (savep)->se_u.pos; \
3020 else \
3021 *(pp) = (savep)->se_u.ptr; }
3022
3023static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003024static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003025static int regrepeat __ARGS((char_u *p, long maxcount));
3026
3027#ifdef DEBUG
3028int regnarrate = 0;
3029#endif
3030
3031/*
3032 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3033 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3034 * contains '\c' or '\C' the value is overruled.
3035 */
3036static int ireg_ic;
3037
3038#ifdef FEAT_MBYTE
3039/*
3040 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3041 * in the regexp. Defaults to false, always.
3042 */
3043static int ireg_icombine;
3044#endif
3045
3046/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003047 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3048 * there is no maximum.
3049 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003050static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003051
3052/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003053 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3054 * slow, we keep one allocated piece of memory and only re-allocate it when
3055 * it's too small. It's freed in vim_regexec_both() when finished.
3056 */
3057static char_u *reg_tofree;
3058static unsigned reg_tofreelen;
3059
3060/*
3061 * These variables are set when executing a regexp to speed up the execution.
3062 * Which ones are set depends on whethere a single-line or multi-line match is
3063 * done:
3064 * single-line multi-line
3065 * reg_match &regmatch_T NULL
3066 * reg_mmatch NULL &regmmatch_T
3067 * reg_startp reg_match->startp <invalid>
3068 * reg_endp reg_match->endp <invalid>
3069 * reg_startpos <invalid> reg_mmatch->startpos
3070 * reg_endpos <invalid> reg_mmatch->endpos
3071 * reg_win NULL window in which to search
3072 * reg_buf <invalid> buffer in which to search
3073 * reg_firstlnum <invalid> first line in which to search
3074 * reg_maxline 0 last line nr
3075 * reg_line_lbr FALSE or TRUE FALSE
3076 */
3077static regmatch_T *reg_match;
3078static regmmatch_T *reg_mmatch;
3079static char_u **reg_startp = NULL;
3080static char_u **reg_endp = NULL;
3081static lpos_T *reg_startpos = NULL;
3082static lpos_T *reg_endpos = NULL;
3083static win_T *reg_win;
3084static buf_T *reg_buf;
3085static linenr_T reg_firstlnum;
3086static linenr_T reg_maxline;
3087static int reg_line_lbr; /* "\n" in string is line break */
3088
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003089/* Values for rs_state in regitem_T. */
3090typedef enum regstate_E
3091{
3092 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3093 , RS_MOPEN /* MOPEN + [0-9] */
3094 , RS_MCLOSE /* MCLOSE + [0-9] */
3095#ifdef FEAT_SYN_HL
3096 , RS_ZOPEN /* ZOPEN + [0-9] */
3097 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3098#endif
3099 , RS_BRANCH /* BRANCH */
3100 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3101 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3102 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3103 , RS_NOMATCH /* NOMATCH */
3104 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3105 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3106 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3107 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3108} regstate_T;
3109
3110/*
3111 * When there are alternatives a regstate_T is put on the regstack to remember
3112 * what we are doing.
3113 * Before it may be another type of item, depending on rs_state, to remember
3114 * more things.
3115 */
3116typedef struct regitem_S
3117{
3118 regstate_T rs_state; /* what we are doing, one of RS_ above */
3119 char_u *rs_scan; /* current node in program */
3120 union
3121 {
3122 save_se_T sesave;
3123 regsave_T regsave;
3124 } rs_un; /* room for saving reginput */
3125 short rs_no; /* submatch nr */
3126} regitem_T;
3127
3128static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3129static void regstack_pop __ARGS((char_u **scan));
3130
3131/* used for BEHIND and NOBEHIND matching */
3132typedef struct regbehind_S
3133{
3134 regsave_T save_after;
3135 regsave_T save_behind;
3136} regbehind_T;
3137
3138/* used for STAR, PLUS and BRACE_SIMPLE matching */
3139typedef struct regstar_S
3140{
3141 int nextb; /* next byte */
3142 int nextb_ic; /* next byte reverse case */
3143 long count;
3144 long minval;
3145 long maxval;
3146} regstar_T;
3147
3148/* used to store input position when a BACK was encountered, so that we now if
3149 * we made any progress since the last time. */
3150typedef struct backpos_S
3151{
3152 char_u *bp_scan; /* "scan" where BACK was encountered */
3153 regsave_T bp_pos; /* last input position */
3154} backpos_T;
3155
3156/*
3157 * regstack and backpos are used by regmatch(). They are kept over calls to
3158 * avoid invoking malloc() and free() often.
3159 */
3160static garray_T regstack; /* stack with regitem_T items, sometimes
3161 preceded by regstar_T or regbehind_T. */
3162static garray_T backpos; /* table with backpos_T for BACK */
3163
Bram Moolenaar071d4272004-06-13 20:20:40 +00003164/*
3165 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3166 */
3167 static char_u *
3168reg_getline(lnum)
3169 linenr_T lnum;
3170{
3171 /* when looking behind for a match/no-match lnum is negative. But we
3172 * can't go before line 1 */
3173 if (reg_firstlnum + lnum < 1)
3174 return NULL;
3175 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3176}
3177
3178static regsave_T behind_pos;
3179
3180#ifdef FEAT_SYN_HL
3181static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3182static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3183static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3184static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3185#endif
3186
3187/* TRUE if using multi-line regexp. */
3188#define REG_MULTI (reg_match == NULL)
3189
3190/*
3191 * Match a regexp against a string.
3192 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3193 * Uses curbuf for line count and 'iskeyword'.
3194 *
3195 * Return TRUE if there is a match, FALSE if not.
3196 */
3197 int
3198vim_regexec(rmp, line, col)
3199 regmatch_T *rmp;
3200 char_u *line; /* string to match against */
3201 colnr_T col; /* column to start looking for match */
3202{
3203 reg_match = rmp;
3204 reg_mmatch = NULL;
3205 reg_maxline = 0;
3206 reg_line_lbr = FALSE;
3207 reg_win = NULL;
3208 ireg_ic = rmp->rm_ic;
3209#ifdef FEAT_MBYTE
3210 ireg_icombine = FALSE;
3211#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003212 ireg_maxcol = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003213 return (vim_regexec_both(line, col) != 0);
3214}
3215
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003216#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3217 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003218/*
3219 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3220 */
3221 int
3222vim_regexec_nl(rmp, line, col)
3223 regmatch_T *rmp;
3224 char_u *line; /* string to match against */
3225 colnr_T col; /* column to start looking for match */
3226{
3227 reg_match = rmp;
3228 reg_mmatch = NULL;
3229 reg_maxline = 0;
3230 reg_line_lbr = TRUE;
3231 reg_win = NULL;
3232 ireg_ic = rmp->rm_ic;
3233#ifdef FEAT_MBYTE
3234 ireg_icombine = FALSE;
3235#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003236 ireg_maxcol = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003237 return (vim_regexec_both(line, col) != 0);
3238}
3239#endif
3240
3241/*
3242 * Match a regexp against multiple lines.
3243 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3244 * Uses curbuf for line count and 'iskeyword'.
3245 *
3246 * Return zero if there is no match. Return number of lines contained in the
3247 * match otherwise.
3248 */
3249 long
3250vim_regexec_multi(rmp, win, buf, lnum, col)
3251 regmmatch_T *rmp;
3252 win_T *win; /* window in which to search or NULL */
3253 buf_T *buf; /* buffer in which to search */
3254 linenr_T lnum; /* nr of line to start looking for match */
3255 colnr_T col; /* column to start looking for match */
3256{
3257 long r;
3258 buf_T *save_curbuf = curbuf;
3259
3260 reg_match = NULL;
3261 reg_mmatch = rmp;
3262 reg_buf = buf;
3263 reg_win = win;
3264 reg_firstlnum = lnum;
3265 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3266 reg_line_lbr = FALSE;
3267 ireg_ic = rmp->rmm_ic;
3268#ifdef FEAT_MBYTE
3269 ireg_icombine = FALSE;
3270#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003271 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003272
3273 /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3274 curbuf = buf;
3275 r = vim_regexec_both(NULL, col);
3276 curbuf = save_curbuf;
3277
3278 return r;
3279}
3280
3281/*
3282 * Match a regexp against a string ("line" points to the string) or multiple
3283 * lines ("line" is NULL, use reg_getline()).
3284 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003285 static long
3286vim_regexec_both(line, col)
3287 char_u *line;
3288 colnr_T col; /* column to start looking for match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003289{
3290 regprog_T *prog;
3291 char_u *s;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003292 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003293
3294 reg_tofree = NULL;
3295
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003296 /* Init the regstack empty. Use an item size of 1 byte, since we push
3297 * different things onto it. Use a large grow size to avoid reallocating
3298 * it too often. */
3299 ga_init2(&regstack, 1, 10000);
3300
3301 /* Init the backpos table empty. */
3302 ga_init2(&backpos, sizeof(backpos_T), 10);
3303
Bram Moolenaar071d4272004-06-13 20:20:40 +00003304 if (REG_MULTI)
3305 {
3306 prog = reg_mmatch->regprog;
3307 line = reg_getline((linenr_T)0);
3308 reg_startpos = reg_mmatch->startpos;
3309 reg_endpos = reg_mmatch->endpos;
3310 }
3311 else
3312 {
3313 prog = reg_match->regprog;
3314 reg_startp = reg_match->startp;
3315 reg_endp = reg_match->endp;
3316 }
3317
3318 /* Be paranoid... */
3319 if (prog == NULL || line == NULL)
3320 {
3321 EMSG(_(e_null));
3322 goto theend;
3323 }
3324
3325 /* Check validity of program. */
3326 if (prog_magic_wrong())
3327 goto theend;
3328
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003329 /* If the start column is past the maximum column: no need to try. */
3330 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3331 goto theend;
3332
Bram Moolenaar071d4272004-06-13 20:20:40 +00003333 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3334 if (prog->regflags & RF_ICASE)
3335 ireg_ic = TRUE;
3336 else if (prog->regflags & RF_NOICASE)
3337 ireg_ic = FALSE;
3338
3339#ifdef FEAT_MBYTE
3340 /* If pattern contains "\Z" overrule value of ireg_icombine */
3341 if (prog->regflags & RF_ICOMBINE)
3342 ireg_icombine = TRUE;
3343#endif
3344
3345 /* If there is a "must appear" string, look for it. */
3346 if (prog->regmust != NULL)
3347 {
3348 int c;
3349
3350#ifdef FEAT_MBYTE
3351 if (has_mbyte)
3352 c = (*mb_ptr2char)(prog->regmust);
3353 else
3354#endif
3355 c = *prog->regmust;
3356 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003357
3358 /*
3359 * This is used very often, esp. for ":global". Use three versions of
3360 * the loop to avoid overhead of conditions.
3361 */
3362 if (!ireg_ic
3363#ifdef FEAT_MBYTE
3364 && !has_mbyte
3365#endif
3366 )
3367 while ((s = vim_strbyte(s, c)) != NULL)
3368 {
3369 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3370 break; /* Found it. */
3371 ++s;
3372 }
3373#ifdef FEAT_MBYTE
3374 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3375 while ((s = vim_strchr(s, c)) != NULL)
3376 {
3377 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3378 break; /* Found it. */
3379 mb_ptr_adv(s);
3380 }
3381#endif
3382 else
3383 while ((s = cstrchr(s, c)) != NULL)
3384 {
3385 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3386 break; /* Found it. */
3387 mb_ptr_adv(s);
3388 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003389 if (s == NULL) /* Not present. */
3390 goto theend;
3391 }
3392
3393 regline = line;
3394 reglnum = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003395
3396 /* Simplest case: Anchored match need be tried only once. */
3397 if (prog->reganch)
3398 {
3399 int c;
3400
3401#ifdef FEAT_MBYTE
3402 if (has_mbyte)
3403 c = (*mb_ptr2char)(regline + col);
3404 else
3405#endif
3406 c = regline[col];
3407 if (prog->regstart == NUL
3408 || prog->regstart == c
3409 || (ireg_ic && ((
3410#ifdef FEAT_MBYTE
3411 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3412 || (c < 255 && prog->regstart < 255 &&
3413#endif
3414 TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
3415 retval = regtry(prog, col);
3416 else
3417 retval = 0;
3418 }
3419 else
3420 {
3421 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003422 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003423 {
3424 if (prog->regstart != NUL)
3425 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003426 /* Skip until the char we know it must start with.
3427 * Used often, do some work to avoid call overhead. */
3428 if (!ireg_ic
3429#ifdef FEAT_MBYTE
3430 && !has_mbyte
3431#endif
3432 )
3433 s = vim_strbyte(regline + col, prog->regstart);
3434 else
3435 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003436 if (s == NULL)
3437 {
3438 retval = 0;
3439 break;
3440 }
3441 col = (int)(s - regline);
3442 }
3443
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003444 /* Check for maximum column to try. */
3445 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3446 {
3447 retval = 0;
3448 break;
3449 }
3450
Bram Moolenaar071d4272004-06-13 20:20:40 +00003451 retval = regtry(prog, col);
3452 if (retval > 0)
3453 break;
3454
3455 /* if not currently on the first line, get it again */
3456 if (reglnum != 0)
3457 {
3458 regline = reg_getline((linenr_T)0);
3459 reglnum = 0;
3460 }
3461 if (regline[col] == NUL)
3462 break;
3463#ifdef FEAT_MBYTE
3464 if (has_mbyte)
3465 col += (*mb_ptr2len_check)(regline + col);
3466 else
3467#endif
3468 ++col;
3469 }
3470 }
3471
Bram Moolenaar071d4272004-06-13 20:20:40 +00003472theend:
Bram Moolenaar071d4272004-06-13 20:20:40 +00003473 vim_free(reg_tofree);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003474 ga_clear(&regstack);
3475 ga_clear(&backpos);
3476
Bram Moolenaar071d4272004-06-13 20:20:40 +00003477 return retval;
3478}
3479
3480#ifdef FEAT_SYN_HL
3481static reg_extmatch_T *make_extmatch __ARGS((void));
3482
3483/*
3484 * Create a new extmatch and mark it as referenced once.
3485 */
3486 static reg_extmatch_T *
3487make_extmatch()
3488{
3489 reg_extmatch_T *em;
3490
3491 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3492 if (em != NULL)
3493 em->refcnt = 1;
3494 return em;
3495}
3496
3497/*
3498 * Add a reference to an extmatch.
3499 */
3500 reg_extmatch_T *
3501ref_extmatch(em)
3502 reg_extmatch_T *em;
3503{
3504 if (em != NULL)
3505 em->refcnt++;
3506 return em;
3507}
3508
3509/*
3510 * Remove a reference to an extmatch. If there are no references left, free
3511 * the info.
3512 */
3513 void
3514unref_extmatch(em)
3515 reg_extmatch_T *em;
3516{
3517 int i;
3518
3519 if (em != NULL && --em->refcnt <= 0)
3520 {
3521 for (i = 0; i < NSUBEXP; ++i)
3522 vim_free(em->matches[i]);
3523 vim_free(em);
3524 }
3525}
3526#endif
3527
3528/*
3529 * regtry - try match of "prog" with at regline["col"].
3530 * Returns 0 for failure, number of lines contained in the match otherwise.
3531 */
3532 static long
3533regtry(prog, col)
3534 regprog_T *prog;
3535 colnr_T col;
3536{
3537 reginput = regline + col;
3538 need_clear_subexpr = TRUE;
3539#ifdef FEAT_SYN_HL
3540 /* Clear the external match subpointers if necessary. */
3541 if (prog->reghasz == REX_SET)
3542 need_clear_zsubexpr = TRUE;
3543#endif
3544
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003545 if (regmatch(prog->program + 1) == 0)
3546 return 0;
3547
3548 cleanup_subexpr();
3549 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003550 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003551 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003552 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003553 reg_startpos[0].lnum = 0;
3554 reg_startpos[0].col = col;
3555 }
3556 if (reg_endpos[0].lnum < 0)
3557 {
3558 reg_endpos[0].lnum = reglnum;
3559 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003560 }
3561 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003562 /* Use line number of "\ze". */
3563 reglnum = reg_endpos[0].lnum;
3564 }
3565 else
3566 {
3567 if (reg_startp[0] == NULL)
3568 reg_startp[0] = regline + col;
3569 if (reg_endp[0] == NULL)
3570 reg_endp[0] = reginput;
3571 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003572#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003573 /* Package any found \z(...\) matches for export. Default is none. */
3574 unref_extmatch(re_extmatch_out);
3575 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003576
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003577 if (prog->reghasz == REX_SET)
3578 {
3579 int i;
3580
3581 cleanup_zsubexpr();
3582 re_extmatch_out = make_extmatch();
3583 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003584 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003585 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003586 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003587 /* Only accept single line matches. */
3588 if (reg_startzpos[i].lnum >= 0
3589 && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3590 re_extmatch_out->matches[i] =
3591 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003592 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003593 reg_endzpos[i].col - reg_startzpos[i].col);
3594 }
3595 else
3596 {
3597 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3598 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00003599 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003600 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003601 }
3602 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003603 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003604#endif
3605 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003606}
3607
3608#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00003609static int reg_prev_class __ARGS((void));
3610
Bram Moolenaar071d4272004-06-13 20:20:40 +00003611/*
3612 * Get class of previous character.
3613 */
3614 static int
3615reg_prev_class()
3616{
3617 if (reginput > regline)
3618 return mb_get_class(reginput - 1
3619 - (*mb_head_off)(regline, reginput - 1));
3620 return -1;
3621}
3622
Bram Moolenaar071d4272004-06-13 20:20:40 +00003623#endif
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003624#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003625
3626/*
3627 * The arguments from BRACE_LIMITS are stored here. They are actually local
3628 * to regmatch(), but they are here to reduce the amount of stack space used
3629 * (it can be called recursively many times).
3630 */
3631static long bl_minval;
3632static long bl_maxval;
3633
3634/*
3635 * regmatch - main matching routine
3636 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003637 * Conceptually the strategy is simple: Check to see whether the current node
3638 * matches, push an item onto the regstack and loop to see whether the rest
3639 * matches, and then act accordingly. In practice we make some effort to
3640 * avoid using the regstack, in particular by going through "ordinary" nodes
3641 * (that don't need to know whether the rest of the match failed) by a nested
3642 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003643 *
3644 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
3645 * the last matched character.
3646 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
3647 * undefined state!
3648 */
3649 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003650regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003651 char_u *scan; /* Current node. */
3652{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003653 char_u *next; /* Next node. */
3654 int op;
3655 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003656 regitem_T *rp;
3657 int no;
3658 int status; /* one of the RA_ values: */
3659#define RA_FAIL 1 /* something failed, abort */
3660#define RA_CONT 2 /* continue in inner loop */
3661#define RA_BREAK 3 /* break inner loop */
3662#define RA_MATCH 4 /* successful match */
3663#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003664
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003665 /* Init the regstack and backpos table empty. They are initialized and
3666 * freed in vim_regexec_both() to reduce malloc()/free() calls. */
3667 regstack.ga_len = 0;
3668 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003669
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003670 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00003671 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003672 */
3673 for (;;)
3674 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003675 /* Some patterns my cause a long time to match, even though they are not
3676 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
3677 fast_breakcheck();
3678
3679#ifdef DEBUG
3680 if (scan != NULL && regnarrate)
3681 {
3682 mch_errmsg(regprop(scan));
3683 mch_errmsg("(\n");
3684 }
3685#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003686
3687 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00003688 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003689 * regstack.
3690 */
3691 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003692 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003693 if (got_int || scan == NULL)
3694 {
3695 status = RA_FAIL;
3696 break;
3697 }
3698 status = RA_CONT;
3699
Bram Moolenaar071d4272004-06-13 20:20:40 +00003700#ifdef DEBUG
3701 if (regnarrate)
3702 {
3703 mch_errmsg(regprop(scan));
3704 mch_errmsg("...\n");
3705# ifdef FEAT_SYN_HL
3706 if (re_extmatch_in != NULL)
3707 {
3708 int i;
3709
3710 mch_errmsg(_("External submatches:\n"));
3711 for (i = 0; i < NSUBEXP; i++)
3712 {
3713 mch_errmsg(" \"");
3714 if (re_extmatch_in->matches[i] != NULL)
3715 mch_errmsg(re_extmatch_in->matches[i]);
3716 mch_errmsg("\"\n");
3717 }
3718 }
3719# endif
3720 }
3721#endif
3722 next = regnext(scan);
3723
3724 op = OP(scan);
3725 /* Check for character class with NL added. */
3726 if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
3727 {
3728 reg_nextline();
3729 }
3730 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3731 {
3732 ADVANCE_REGINPUT();
3733 }
3734 else
3735 {
3736 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003737 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003738#ifdef FEAT_MBYTE
3739 if (has_mbyte)
3740 c = (*mb_ptr2char)(reginput);
3741 else
3742#endif
3743 c = *reginput;
3744 switch (op)
3745 {
3746 case BOL:
3747 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003748 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003749 break;
3750
3751 case EOL:
3752 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003753 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003754 break;
3755
3756 case RE_BOF:
3757 /* Passing -1 to the getline() function provided for the search
3758 * should always return NULL if the current line is the first
3759 * line of the file. */
3760 if (reglnum != 0 || reginput != regline
3761 || (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003762 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003763 break;
3764
3765 case RE_EOF:
3766 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003767 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003768 break;
3769
3770 case CURSOR:
3771 /* Check if the buffer is in a window and compare the
3772 * reg_win->w_cursor position to the match position. */
3773 if (reg_win == NULL
3774 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3775 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003776 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003777 break;
3778
3779 case RE_LNUM:
3780 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
3781 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003782 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003783 break;
3784
3785 case RE_COL:
3786 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003787 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003788 break;
3789
3790 case RE_VCOL:
3791 if (!re_num_cmp((long_u)win_linetabsize(
3792 reg_win == NULL ? curwin : reg_win,
3793 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003794 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003795 break;
3796
3797 case BOW: /* \<word; reginput points to w */
3798 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003799 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003800#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003801 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003802 {
3803 int this_class;
3804
3805 /* Get class of current and previous char (if it exists). */
3806 this_class = mb_get_class(reginput);
3807 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003808 status = RA_NOMATCH; /* not on a word at all */
3809 else if (reg_prev_class() == this_class)
3810 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003811 }
3812#endif
3813 else
3814 {
3815 if (!vim_iswordc(c)
3816 || (reginput > regline && vim_iswordc(reginput[-1])))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003817 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003818 }
3819 break;
3820
3821 case EOW: /* word\>; reginput points after d */
3822 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003823 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003824#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003825 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003826 {
3827 int this_class, prev_class;
3828
3829 /* Get class of current and previous char (if it exists). */
3830 this_class = mb_get_class(reginput);
3831 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003832 if (this_class == prev_class
3833 || prev_class == 0 || prev_class == 1)
3834 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003835 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003836#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003837 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003838 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003839 if (!vim_iswordc(reginput[-1])
3840 || (reginput[0] != NUL && vim_iswordc(c)))
3841 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003842 }
3843 break; /* Matched with EOW */
3844
3845 case ANY:
3846 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003847 status = RA_NOMATCH;
3848 else
3849 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003850 break;
3851
3852 case IDENT:
3853 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003854 status = RA_NOMATCH;
3855 else
3856 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003857 break;
3858
3859 case SIDENT:
3860 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003861 status = RA_NOMATCH;
3862 else
3863 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003864 break;
3865
3866 case KWORD:
3867 if (!vim_iswordp(reginput))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003868 status = RA_NOMATCH;
3869 else
3870 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003871 break;
3872
3873 case SKWORD:
3874 if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003875 status = RA_NOMATCH;
3876 else
3877 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003878 break;
3879
3880 case FNAME:
3881 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003882 status = RA_NOMATCH;
3883 else
3884 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003885 break;
3886
3887 case SFNAME:
3888 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003889 status = RA_NOMATCH;
3890 else
3891 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003892 break;
3893
3894 case PRINT:
3895 if (ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003896 status = RA_NOMATCH;
3897 else
3898 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003899 break;
3900
3901 case SPRINT:
3902 if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003903 status = RA_NOMATCH;
3904 else
3905 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003906 break;
3907
3908 case WHITE:
3909 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003910 status = RA_NOMATCH;
3911 else
3912 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003913 break;
3914
3915 case NWHITE:
3916 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003917 status = RA_NOMATCH;
3918 else
3919 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003920 break;
3921
3922 case DIGIT:
3923 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003924 status = RA_NOMATCH;
3925 else
3926 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003927 break;
3928
3929 case NDIGIT:
3930 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003931 status = RA_NOMATCH;
3932 else
3933 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003934 break;
3935
3936 case HEX:
3937 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003938 status = RA_NOMATCH;
3939 else
3940 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003941 break;
3942
3943 case NHEX:
3944 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003945 status = RA_NOMATCH;
3946 else
3947 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003948 break;
3949
3950 case OCTAL:
3951 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003952 status = RA_NOMATCH;
3953 else
3954 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003955 break;
3956
3957 case NOCTAL:
3958 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003959 status = RA_NOMATCH;
3960 else
3961 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003962 break;
3963
3964 case WORD:
3965 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003966 status = RA_NOMATCH;
3967 else
3968 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003969 break;
3970
3971 case NWORD:
3972 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003973 status = RA_NOMATCH;
3974 else
3975 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003976 break;
3977
3978 case HEAD:
3979 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003980 status = RA_NOMATCH;
3981 else
3982 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003983 break;
3984
3985 case NHEAD:
3986 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003987 status = RA_NOMATCH;
3988 else
3989 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003990 break;
3991
3992 case ALPHA:
3993 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003994 status = RA_NOMATCH;
3995 else
3996 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003997 break;
3998
3999 case NALPHA:
4000 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004001 status = RA_NOMATCH;
4002 else
4003 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004004 break;
4005
4006 case LOWER:
4007 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004008 status = RA_NOMATCH;
4009 else
4010 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004011 break;
4012
4013 case NLOWER:
4014 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004015 status = RA_NOMATCH;
4016 else
4017 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004018 break;
4019
4020 case UPPER:
4021 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004022 status = RA_NOMATCH;
4023 else
4024 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004025 break;
4026
4027 case NUPPER:
4028 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004029 status = RA_NOMATCH;
4030 else
4031 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004032 break;
4033
4034 case EXACTLY:
4035 {
4036 int len;
4037 char_u *opnd;
4038
4039 opnd = OPERAND(scan);
4040 /* Inline the first byte, for speed. */
4041 if (*opnd != *reginput
4042 && (!ireg_ic || (
4043#ifdef FEAT_MBYTE
4044 !enc_utf8 &&
4045#endif
4046 TOLOWER_LOC(*opnd) != TOLOWER_LOC(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004047 status = RA_NOMATCH;
4048 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004049 {
4050 /* match empty string always works; happens when "~" is
4051 * empty. */
4052 }
4053 else if (opnd[1] == NUL
4054#ifdef FEAT_MBYTE
4055 && !(enc_utf8 && ireg_ic)
4056#endif
4057 )
4058 ++reginput; /* matched a single char */
4059 else
4060 {
4061 len = (int)STRLEN(opnd);
4062 /* Need to match first byte again for multi-byte. */
4063 if (cstrncmp(opnd, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004064 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004065#ifdef FEAT_MBYTE
4066 /* Check for following composing character. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004067 else if (enc_utf8
4068 && UTF_COMPOSINGLIKE(reginput, reginput + len))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004069 {
4070 /* raaron: This code makes a composing character get
4071 * ignored, which is the correct behavior (sometimes)
4072 * for voweled Hebrew texts. */
4073 if (!ireg_icombine)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004074 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004075 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004076#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004077 else
4078 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004079 }
4080 }
4081 break;
4082
4083 case ANYOF:
4084 case ANYBUT:
4085 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004086 status = RA_NOMATCH;
4087 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4088 status = RA_NOMATCH;
4089 else
4090 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004091 break;
4092
4093#ifdef FEAT_MBYTE
4094 case MULTIBYTECODE:
4095 if (has_mbyte)
4096 {
4097 int i, len;
4098 char_u *opnd;
4099
4100 opnd = OPERAND(scan);
4101 /* Safety check (just in case 'encoding' was changed since
4102 * compiling the program). */
4103 if ((len = (*mb_ptr2len_check)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004104 {
4105 status = RA_NOMATCH;
4106 break;
4107 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004108 for (i = 0; i < len; ++i)
4109 if (opnd[i] != reginput[i])
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004110 {
4111 status = RA_NOMATCH;
4112 break;
4113 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004114 reginput += len;
4115 }
4116 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004117 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004118 break;
4119#endif
4120
4121 case NOTHING:
4122 break;
4123
4124 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004125 {
4126 int i;
4127 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004128
Bram Moolenaar582fd852005-03-28 20:58:01 +00004129 /*
4130 * When we run into BACK we need to check if we don't keep
4131 * looping without matching any input. The second and later
4132 * times a BACK is encountered it fails if the input is still
4133 * at the same position as the previous time.
4134 * The positions are stored in "backpos" and found by the
4135 * current value of "scan", the position in the RE program.
4136 */
4137 bp = (backpos_T *)backpos.ga_data;
4138 for (i = 0; i < backpos.ga_len; ++i)
4139 if (bp[i].bp_scan == scan)
4140 break;
4141 if (i == backpos.ga_len)
4142 {
4143 /* First time at this BACK, make room to store the pos. */
4144 if (ga_grow(&backpos, 1) == FAIL)
4145 status = RA_FAIL;
4146 else
4147 {
4148 /* get "ga_data" again, it may have changed */
4149 bp = (backpos_T *)backpos.ga_data;
4150 bp[i].bp_scan = scan;
4151 ++backpos.ga_len;
4152 }
4153 }
4154 else if (reg_save_equal(&bp[i].bp_pos))
4155 /* Still at same position as last time, fail. */
4156 status = RA_NOMATCH;
4157
4158 if (status != RA_FAIL && status != RA_NOMATCH)
4159 reg_save(&bp[i].bp_pos, &backpos);
4160 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004161 break;
4162
Bram Moolenaar071d4272004-06-13 20:20:40 +00004163 case MOPEN + 0: /* Match start: \zs */
4164 case MOPEN + 1: /* \( */
4165 case MOPEN + 2:
4166 case MOPEN + 3:
4167 case MOPEN + 4:
4168 case MOPEN + 5:
4169 case MOPEN + 6:
4170 case MOPEN + 7:
4171 case MOPEN + 8:
4172 case MOPEN + 9:
4173 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004174 no = op - MOPEN;
4175 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004176 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004177 if (rp == NULL)
4178 status = RA_FAIL;
4179 else
4180 {
4181 rp->rs_no = no;
4182 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4183 &reg_startp[no]);
4184 /* We simply continue and handle the result when done. */
4185 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004186 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004187 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004188
4189 case NOPEN: /* \%( */
4190 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004191 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004192 status = RA_FAIL;
4193 /* We simply continue and handle the result when done. */
4194 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004195
4196#ifdef FEAT_SYN_HL
4197 case ZOPEN + 1:
4198 case ZOPEN + 2:
4199 case ZOPEN + 3:
4200 case ZOPEN + 4:
4201 case ZOPEN + 5:
4202 case ZOPEN + 6:
4203 case ZOPEN + 7:
4204 case ZOPEN + 8:
4205 case ZOPEN + 9:
4206 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004207 no = op - ZOPEN;
4208 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004209 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004210 if (rp == NULL)
4211 status = RA_FAIL;
4212 else
4213 {
4214 rp->rs_no = no;
4215 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4216 &reg_startzp[no]);
4217 /* We simply continue and handle the result when done. */
4218 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004219 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004220 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004221#endif
4222
4223 case MCLOSE + 0: /* Match end: \ze */
4224 case MCLOSE + 1: /* \) */
4225 case MCLOSE + 2:
4226 case MCLOSE + 3:
4227 case MCLOSE + 4:
4228 case MCLOSE + 5:
4229 case MCLOSE + 6:
4230 case MCLOSE + 7:
4231 case MCLOSE + 8:
4232 case MCLOSE + 9:
4233 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004234 no = op - MCLOSE;
4235 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004236 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004237 if (rp == NULL)
4238 status = RA_FAIL;
4239 else
4240 {
4241 rp->rs_no = no;
4242 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4243 /* We simply continue and handle the result when done. */
4244 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004245 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004246 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004247
4248#ifdef FEAT_SYN_HL
4249 case ZCLOSE + 1: /* \) after \z( */
4250 case ZCLOSE + 2:
4251 case ZCLOSE + 3:
4252 case ZCLOSE + 4:
4253 case ZCLOSE + 5:
4254 case ZCLOSE + 6:
4255 case ZCLOSE + 7:
4256 case ZCLOSE + 8:
4257 case ZCLOSE + 9:
4258 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004259 no = op - ZCLOSE;
4260 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004261 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004262 if (rp == NULL)
4263 status = RA_FAIL;
4264 else
4265 {
4266 rp->rs_no = no;
4267 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4268 &reg_endzp[no]);
4269 /* We simply continue and handle the result when done. */
4270 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004271 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004272 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004273#endif
4274
4275 case BACKREF + 1:
4276 case BACKREF + 2:
4277 case BACKREF + 3:
4278 case BACKREF + 4:
4279 case BACKREF + 5:
4280 case BACKREF + 6:
4281 case BACKREF + 7:
4282 case BACKREF + 8:
4283 case BACKREF + 9:
4284 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004285 int len;
4286 linenr_T clnum;
4287 colnr_T ccol;
4288 char_u *p;
4289
4290 no = op - BACKREF;
4291 cleanup_subexpr();
4292 if (!REG_MULTI) /* Single-line regexp */
4293 {
4294 if (reg_endp[no] == NULL)
4295 {
4296 /* Backref was not set: Match an empty string. */
4297 len = 0;
4298 }
4299 else
4300 {
4301 /* Compare current input with back-ref in the same
4302 * line. */
4303 len = (int)(reg_endp[no] - reg_startp[no]);
4304 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004305 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004306 }
4307 }
4308 else /* Multi-line regexp */
4309 {
4310 if (reg_endpos[no].lnum < 0)
4311 {
4312 /* Backref was not set: Match an empty string. */
4313 len = 0;
4314 }
4315 else
4316 {
4317 if (reg_startpos[no].lnum == reglnum
4318 && reg_endpos[no].lnum == reglnum)
4319 {
4320 /* Compare back-ref within the current line. */
4321 len = reg_endpos[no].col - reg_startpos[no].col;
4322 if (cstrncmp(regline + reg_startpos[no].col,
4323 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004324 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004325 }
4326 else
4327 {
4328 /* Messy situation: Need to compare between two
4329 * lines. */
4330 ccol = reg_startpos[no].col;
4331 clnum = reg_startpos[no].lnum;
4332 for (;;)
4333 {
4334 /* Since getting one line may invalidate
4335 * the other, need to make copy. Slow! */
4336 if (regline != reg_tofree)
4337 {
4338 len = (int)STRLEN(regline);
4339 if (reg_tofree == NULL
4340 || len >= (int)reg_tofreelen)
4341 {
4342 len += 50; /* get some extra */
4343 vim_free(reg_tofree);
4344 reg_tofree = alloc(len);
4345 if (reg_tofree == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004346 {
4347 status = RA_FAIL; /* outof memory!*/
4348 break;
4349 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004350 reg_tofreelen = len;
4351 }
4352 STRCPY(reg_tofree, regline);
4353 reginput = reg_tofree
4354 + (reginput - regline);
4355 regline = reg_tofree;
4356 }
4357
4358 /* Get the line to compare with. */
4359 p = reg_getline(clnum);
4360 if (clnum == reg_endpos[no].lnum)
4361 len = reg_endpos[no].col - ccol;
4362 else
4363 len = (int)STRLEN(p + ccol);
4364
4365 if (cstrncmp(p + ccol, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004366 {
4367 status = RA_NOMATCH; /* doesn't match */
4368 break;
4369 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004370 if (clnum == reg_endpos[no].lnum)
4371 break; /* match and at end! */
4372 if (reglnum == reg_maxline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004373 {
4374 status = RA_NOMATCH; /* text too short */
4375 break;
4376 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004377
4378 /* Advance to next line. */
4379 reg_nextline();
4380 ++clnum;
4381 ccol = 0;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004382 if (got_int)
4383 {
4384 status = RA_FAIL;
4385 break;
4386 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004387 }
4388
4389 /* found a match! Note that regline may now point
4390 * to a copy of the line, that should not matter. */
4391 }
4392 }
4393 }
4394
4395 /* Matched the backref, skip over it. */
4396 reginput += len;
4397 }
4398 break;
4399
4400#ifdef FEAT_SYN_HL
4401 case ZREF + 1:
4402 case ZREF + 2:
4403 case ZREF + 3:
4404 case ZREF + 4:
4405 case ZREF + 5:
4406 case ZREF + 6:
4407 case ZREF + 7:
4408 case ZREF + 8:
4409 case ZREF + 9:
4410 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004411 int len;
4412
4413 cleanup_zsubexpr();
4414 no = op - ZREF;
4415 if (re_extmatch_in != NULL
4416 && re_extmatch_in->matches[no] != NULL)
4417 {
4418 len = (int)STRLEN(re_extmatch_in->matches[no]);
4419 if (cstrncmp(re_extmatch_in->matches[no],
4420 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004421 status = RA_NOMATCH;
4422 else
4423 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004424 }
4425 else
4426 {
4427 /* Backref was not set: Match an empty string. */
4428 }
4429 }
4430 break;
4431#endif
4432
4433 case BRANCH:
4434 {
4435 if (OP(next) != BRANCH) /* No choice. */
4436 next = OPERAND(scan); /* Avoid recursion. */
4437 else
4438 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004439 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004440 if (rp == NULL)
4441 status = RA_FAIL;
4442 else
4443 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004444 }
4445 }
4446 break;
4447
4448 case BRACE_LIMITS:
4449 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004450 if (OP(next) == BRACE_SIMPLE)
4451 {
4452 bl_minval = OPERAND_MIN(scan);
4453 bl_maxval = OPERAND_MAX(scan);
4454 }
4455 else if (OP(next) >= BRACE_COMPLEX
4456 && OP(next) < BRACE_COMPLEX + 10)
4457 {
4458 no = OP(next) - BRACE_COMPLEX;
4459 brace_min[no] = OPERAND_MIN(scan);
4460 brace_max[no] = OPERAND_MAX(scan);
4461 brace_count[no] = 0;
4462 }
4463 else
4464 {
4465 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004466 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004467 }
4468 }
4469 break;
4470
4471 case BRACE_COMPLEX + 0:
4472 case BRACE_COMPLEX + 1:
4473 case BRACE_COMPLEX + 2:
4474 case BRACE_COMPLEX + 3:
4475 case BRACE_COMPLEX + 4:
4476 case BRACE_COMPLEX + 5:
4477 case BRACE_COMPLEX + 6:
4478 case BRACE_COMPLEX + 7:
4479 case BRACE_COMPLEX + 8:
4480 case BRACE_COMPLEX + 9:
4481 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004482 no = op - BRACE_COMPLEX;
4483 ++brace_count[no];
4484
4485 /* If not matched enough times yet, try one more */
4486 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004487 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004488 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004489 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004490 if (rp == NULL)
4491 status = RA_FAIL;
4492 else
4493 {
4494 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004495 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004496 next = OPERAND(scan);
4497 /* We continue and handle the result when done. */
4498 }
4499 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004500 }
4501
4502 /* If matched enough times, may try matching some more */
4503 if (brace_min[no] <= brace_max[no])
4504 {
4505 /* Range is the normal way around, use longest match */
4506 if (brace_count[no] <= brace_max[no])
4507 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004508 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004509 if (rp == NULL)
4510 status = RA_FAIL;
4511 else
4512 {
4513 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004514 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004515 next = OPERAND(scan);
4516 /* We continue and handle the result when done. */
4517 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004518 }
4519 }
4520 else
4521 {
4522 /* Range is backwards, use shortest match first */
4523 if (brace_count[no] <= brace_min[no])
4524 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004525 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004526 if (rp == NULL)
4527 status = RA_FAIL;
4528 else
4529 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004530 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004531 /* We continue and handle the result when done. */
4532 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004533 }
4534 }
4535 }
4536 break;
4537
4538 case BRACE_SIMPLE:
4539 case STAR:
4540 case PLUS:
4541 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004542 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004543
4544 /*
4545 * Lookahead to avoid useless match attempts when we know
4546 * what character comes next.
4547 */
4548 if (OP(next) == EXACTLY)
4549 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004550 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004551 if (ireg_ic)
4552 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004553 if (isupper(rst.nextb))
4554 rst.nextb_ic = TOLOWER_LOC(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004555 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004556 rst.nextb_ic = TOUPPER_LOC(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004557 }
4558 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004559 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004560 }
4561 else
4562 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004563 rst.nextb = NUL;
4564 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004565 }
4566 if (op != BRACE_SIMPLE)
4567 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004568 rst.minval = (op == STAR) ? 0 : 1;
4569 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004570 }
4571 else
4572 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004573 rst.minval = bl_minval;
4574 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004575 }
4576
4577 /*
4578 * When maxval > minval, try matching as much as possible, up
4579 * to maxval. When maxval < minval, try matching at least the
4580 * minimal number (since the range is backwards, that's also
4581 * maxval!).
4582 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004583 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004584 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004585 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004586 status = RA_FAIL;
4587 break;
4588 }
4589 if (rst.minval <= rst.maxval
4590 ? rst.count >= rst.minval : rst.count >= rst.maxval)
4591 {
4592 /* It could match. Prepare for trying to match what
4593 * follows. The code is below. Parameters are stored in
4594 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00004595 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004596 {
4597 EMSG(_(e_maxmempat));
4598 status = RA_FAIL;
4599 }
4600 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004601 status = RA_FAIL;
4602 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004603 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004604 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004605 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00004606 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004607 if (rp == NULL)
4608 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004609 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004610 {
4611 *(((regstar_T *)rp) - 1) = rst;
4612 status = RA_BREAK; /* skip the restore bits */
4613 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004614 }
4615 }
4616 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004617 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004618
Bram Moolenaar071d4272004-06-13 20:20:40 +00004619 }
4620 break;
4621
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004622 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00004623 case MATCH:
4624 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004625 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004626 if (rp == NULL)
4627 status = RA_FAIL;
4628 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004629 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004630 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004631 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004632 next = OPERAND(scan);
4633 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004634 }
4635 break;
4636
4637 case BEHIND:
4638 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004639 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00004640 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004641 {
4642 EMSG(_(e_maxmempat));
4643 status = RA_FAIL;
4644 }
4645 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004646 status = RA_FAIL;
4647 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004648 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004649 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004650 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004651 if (rp == NULL)
4652 status = RA_FAIL;
4653 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004654 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004655 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004656 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004657 /* First try if what follows matches. If it does then we
4658 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004659 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004660 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004661 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004662
4663 case BHPOS:
4664 if (REG_MULTI)
4665 {
4666 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4667 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004668 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004669 }
4670 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004671 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004672 break;
4673
4674 case NEWL:
4675 if ((c != NUL || reglnum == reg_maxline)
4676 && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004677 status = RA_NOMATCH;
4678 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004679 ADVANCE_REGINPUT();
4680 else
4681 reg_nextline();
4682 break;
4683
4684 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004685 status = RA_MATCH; /* Success! */
4686 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004687
4688 default:
4689 EMSG(_(e_re_corr));
4690#ifdef DEBUG
4691 printf("Illegal op code %d\n", op);
4692#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004693 status = RA_FAIL;
4694 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004695 }
4696 }
4697
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004698 /* If we can't continue sequentially, break the inner loop. */
4699 if (status != RA_CONT)
4700 break;
4701
4702 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004703 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004704
4705 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004706
4707 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004708 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00004709 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004710 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004711 while (regstack.ga_len > 0 && status != RA_FAIL)
4712 {
4713 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4714 switch (rp->rs_state)
4715 {
4716 case RS_NOPEN:
4717 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004718 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004719 break;
4720
4721 case RS_MOPEN:
4722 /* Pop the state. Restore pointers when there is no match. */
4723 if (status == RA_NOMATCH)
4724 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4725 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004726 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004727 break;
4728
4729#ifdef FEAT_SYN_HL
4730 case RS_ZOPEN:
4731 /* Pop the state. Restore pointers when there is no match. */
4732 if (status == RA_NOMATCH)
4733 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4734 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004735 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004736 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004737#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004738
4739 case RS_MCLOSE:
4740 /* Pop the state. Restore pointers when there is no match. */
4741 if (status == RA_NOMATCH)
4742 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
4743 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004744 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004745 break;
4746
4747#ifdef FEAT_SYN_HL
4748 case RS_ZCLOSE:
4749 /* Pop the state. Restore pointers when there is no match. */
4750 if (status == RA_NOMATCH)
4751 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4752 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004753 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004754 break;
4755#endif
4756
4757 case RS_BRANCH:
4758 if (status == RA_MATCH)
4759 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004760 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004761 else
4762 {
4763 if (status != RA_BREAK)
4764 {
4765 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004766 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004767 scan = rp->rs_scan;
4768 }
4769 if (scan == NULL || OP(scan) != BRANCH)
4770 {
4771 /* no more branches, didn't find a match */
4772 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004773 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004774 }
4775 else
4776 {
4777 /* Prepare to try a branch. */
4778 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00004779 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004780 scan = OPERAND(scan);
4781 }
4782 }
4783 break;
4784
4785 case RS_BRCPLX_MORE:
4786 /* Pop the state. Restore pointers when there is no match. */
4787 if (status == RA_NOMATCH)
4788 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004789 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004790 --brace_count[rp->rs_no]; /* decrement match count */
4791 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004792 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004793 break;
4794
4795 case RS_BRCPLX_LONG:
4796 /* Pop the state. Restore pointers when there is no match. */
4797 if (status == RA_NOMATCH)
4798 {
4799 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004800 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004801 --brace_count[rp->rs_no];
4802 /* continue with the items after "\{}" */
4803 status = RA_CONT;
4804 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004805 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004806 if (status == RA_CONT)
4807 scan = regnext(scan);
4808 break;
4809
4810 case RS_BRCPLX_SHORT:
4811 /* Pop the state. Restore pointers when there is no match. */
4812 if (status == RA_NOMATCH)
4813 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004814 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004815 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004816 if (status == RA_NOMATCH)
4817 {
4818 scan = OPERAND(scan);
4819 status = RA_CONT;
4820 }
4821 break;
4822
4823 case RS_NOMATCH:
4824 /* Pop the state. If the operand matches for NOMATCH or
4825 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
4826 * except for SUBPAT, and continue with the next item. */
4827 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
4828 status = RA_NOMATCH;
4829 else
4830 {
4831 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004832 if (rp->rs_no != SUBPAT) /* zero-width */
4833 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004834 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004835 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004836 if (status == RA_CONT)
4837 scan = regnext(scan);
4838 break;
4839
4840 case RS_BEHIND1:
4841 if (status == RA_NOMATCH)
4842 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004843 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004844 regstack.ga_len -= sizeof(regbehind_T);
4845 }
4846 else
4847 {
4848 /* The stuff after BEHIND/NOBEHIND matches. Now try if
4849 * the behind part does (not) match before the current
4850 * position in the input. This must be done at every
4851 * position in the input and checking if the match ends at
4852 * the current position. */
4853
4854 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004855 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004856
4857 /* start looking for a match with operand at the current
4858 * postion. Go back one character until we find the
4859 * result, hitting the start of the line or the previous
4860 * line (for multi-line matching).
4861 * Set behind_pos to where the match should end, BHPOS
4862 * will match it. Save the current value. */
4863 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
4864 behind_pos = rp->rs_un.regsave;
4865
4866 rp->rs_state = RS_BEHIND2;
4867
Bram Moolenaar582fd852005-03-28 20:58:01 +00004868 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004869 scan = OPERAND(rp->rs_scan);
4870 }
4871 break;
4872
4873 case RS_BEHIND2:
4874 /*
4875 * Looping for BEHIND / NOBEHIND match.
4876 */
4877 if (status == RA_MATCH && reg_save_equal(&behind_pos))
4878 {
4879 /* found a match that ends where "next" started */
4880 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4881 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00004882 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4883 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004884 else
4885 /* But we didn't want a match. */
4886 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004887 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004888 regstack.ga_len -= sizeof(regbehind_T);
4889 }
4890 else
4891 {
4892 /* No match: Go back one character. May go to previous
4893 * line once. */
4894 no = OK;
4895 if (REG_MULTI)
4896 {
4897 if (rp->rs_un.regsave.rs_u.pos.col == 0)
4898 {
4899 if (rp->rs_un.regsave.rs_u.pos.lnum
4900 < behind_pos.rs_u.pos.lnum
4901 || reg_getline(
4902 --rp->rs_un.regsave.rs_u.pos.lnum)
4903 == NULL)
4904 no = FAIL;
4905 else
4906 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004907 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004908 rp->rs_un.regsave.rs_u.pos.col =
4909 (colnr_T)STRLEN(regline);
4910 }
4911 }
4912 else
4913 --rp->rs_un.regsave.rs_u.pos.col;
4914 }
4915 else
4916 {
4917 if (rp->rs_un.regsave.rs_u.ptr == regline)
4918 no = FAIL;
4919 else
4920 --rp->rs_un.regsave.rs_u.ptr;
4921 }
4922 if (no == OK)
4923 {
4924 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004925 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004926 scan = OPERAND(rp->rs_scan);
4927 }
4928 else
4929 {
4930 /* Can't advance. For NOBEHIND that's a match. */
4931 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4932 if (rp->rs_no == NOBEHIND)
4933 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004934 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4935 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004936 status = RA_MATCH;
4937 }
4938 else
4939 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004940 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004941 regstack.ga_len -= sizeof(regbehind_T);
4942 }
4943 }
4944 break;
4945
4946 case RS_STAR_LONG:
4947 case RS_STAR_SHORT:
4948 {
4949 regstar_T *rst = ((regstar_T *)rp) - 1;
4950
4951 if (status == RA_MATCH)
4952 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004953 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004954 regstack.ga_len -= sizeof(regstar_T);
4955 break;
4956 }
4957
4958 /* Tried once already, restore input pointers. */
4959 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00004960 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004961
4962 /* Repeat until we found a position where it could match. */
4963 for (;;)
4964 {
4965 if (status != RA_BREAK)
4966 {
4967 /* Tried first position already, advance. */
4968 if (rp->rs_state == RS_STAR_LONG)
4969 {
4970 /* Trying for longest matc, but couldn't or didn't
4971 * match -- back up one char. */
4972 if (--rst->count < rst->minval)
4973 break;
4974 if (reginput == regline)
4975 {
4976 /* backup to last char of previous line */
4977 --reglnum;
4978 regline = reg_getline(reglnum);
4979 /* Just in case regrepeat() didn't count
4980 * right. */
4981 if (regline == NULL)
4982 break;
4983 reginput = regline + STRLEN(regline);
4984 fast_breakcheck();
4985 }
4986 else
4987 mb_ptr_back(regline, reginput);
4988 }
4989 else
4990 {
4991 /* Range is backwards, use shortest match first.
4992 * Careful: maxval and minval are exchanged!
4993 * Couldn't or didn't match: try advancing one
4994 * char. */
4995 if (rst->count == rst->minval
4996 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
4997 break;
4998 ++rst->count;
4999 }
5000 if (got_int)
5001 break;
5002 }
5003 else
5004 status = RA_NOMATCH;
5005
5006 /* If it could match, try it. */
5007 if (rst->nextb == NUL || *reginput == rst->nextb
5008 || *reginput == rst->nextb_ic)
5009 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005010 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005011 scan = regnext(rp->rs_scan);
5012 status = RA_CONT;
5013 break;
5014 }
5015 }
5016 if (status != RA_CONT)
5017 {
5018 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005019 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005020 regstack.ga_len -= sizeof(regstar_T);
5021 status = RA_NOMATCH;
5022 }
5023 }
5024 break;
5025 }
5026
5027 /* If we want to continue the inner loop or didn't pop a state contine
5028 * matching loop */
5029 if (status == RA_CONT || rp == (regitem_T *)
5030 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5031 break;
5032 }
5033
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005034 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005035 if (status == RA_CONT)
5036 continue;
5037
5038 /*
5039 * If the regstack is empty or something failed we are done.
5040 */
5041 if (regstack.ga_len == 0 || status == RA_FAIL)
5042 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005043 if (scan == NULL)
5044 {
5045 /*
5046 * We get here only if there's trouble -- normally "case END" is
5047 * the terminating point.
5048 */
5049 EMSG(_(e_re_corr));
5050#ifdef DEBUG
5051 printf("Premature EOL\n");
5052#endif
5053 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005054 if (status == RA_FAIL)
5055 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005056 return (status == RA_MATCH);
5057 }
5058
5059 } /* End of loop until the regstack is empty. */
5060
5061 /* NOTREACHED */
5062}
5063
5064/*
5065 * Push an item onto the regstack.
5066 * Returns pointer to new item. Returns NULL when out of memory.
5067 */
5068 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005069regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005070 regstate_T state;
5071 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005072{
5073 regitem_T *rp;
5074
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005075 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005076 {
5077 EMSG(_(e_maxmempat));
5078 return NULL;
5079 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005080 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005081 return NULL;
5082
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005083 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005084 rp->rs_state = state;
5085 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005086
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005087 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005088 return rp;
5089}
5090
5091/*
5092 * Pop an item from the regstack.
5093 */
5094 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005095regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005096 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005097{
5098 regitem_T *rp;
5099
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005100 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005101 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005102
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005103 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005104}
5105
Bram Moolenaar071d4272004-06-13 20:20:40 +00005106/*
5107 * regrepeat - repeatedly match something simple, return how many.
5108 * Advances reginput (and reglnum) to just after the matched chars.
5109 */
5110 static int
5111regrepeat(p, maxcount)
5112 char_u *p;
5113 long maxcount; /* maximum number of matches allowed */
5114{
5115 long count = 0;
5116 char_u *scan;
5117 char_u *opnd;
5118 int mask;
5119 int testval = 0;
5120
5121 scan = reginput; /* Make local copy of reginput for speed. */
5122 opnd = OPERAND(p);
5123 switch (OP(p))
5124 {
5125 case ANY:
5126 case ANY + ADD_NL:
5127 while (count < maxcount)
5128 {
5129 /* Matching anything means we continue until end-of-line (or
5130 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5131 while (*scan != NUL && count < maxcount)
5132 {
5133 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005134 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005135 }
5136 if (!WITH_NL(OP(p)) || reglnum == reg_maxline || count == maxcount)
5137 break;
5138 ++count; /* count the line-break */
5139 reg_nextline();
5140 scan = reginput;
5141 if (got_int)
5142 break;
5143 }
5144 break;
5145
5146 case IDENT:
5147 case IDENT + ADD_NL:
5148 testval = TRUE;
5149 /*FALLTHROUGH*/
5150 case SIDENT:
5151 case SIDENT + ADD_NL:
5152 while (count < maxcount)
5153 {
5154 if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5155 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005156 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005157 }
5158 else if (*scan == NUL)
5159 {
5160 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5161 break;
5162 reg_nextline();
5163 scan = reginput;
5164 if (got_int)
5165 break;
5166 }
5167 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5168 ++scan;
5169 else
5170 break;
5171 ++count;
5172 }
5173 break;
5174
5175 case KWORD:
5176 case KWORD + ADD_NL:
5177 testval = TRUE;
5178 /*FALLTHROUGH*/
5179 case SKWORD:
5180 case SKWORD + ADD_NL:
5181 while (count < maxcount)
5182 {
5183 if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5184 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005185 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005186 }
5187 else if (*scan == NUL)
5188 {
5189 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5190 break;
5191 reg_nextline();
5192 scan = reginput;
5193 if (got_int)
5194 break;
5195 }
5196 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5197 ++scan;
5198 else
5199 break;
5200 ++count;
5201 }
5202 break;
5203
5204 case FNAME:
5205 case FNAME + ADD_NL:
5206 testval = TRUE;
5207 /*FALLTHROUGH*/
5208 case SFNAME:
5209 case SFNAME + ADD_NL:
5210 while (count < maxcount)
5211 {
5212 if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5213 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005214 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005215 }
5216 else if (*scan == NUL)
5217 {
5218 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5219 break;
5220 reg_nextline();
5221 scan = reginput;
5222 if (got_int)
5223 break;
5224 }
5225 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5226 ++scan;
5227 else
5228 break;
5229 ++count;
5230 }
5231 break;
5232
5233 case PRINT:
5234 case PRINT + ADD_NL:
5235 testval = TRUE;
5236 /*FALLTHROUGH*/
5237 case SPRINT:
5238 case SPRINT + ADD_NL:
5239 while (count < maxcount)
5240 {
5241 if (*scan == NUL)
5242 {
5243 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5244 break;
5245 reg_nextline();
5246 scan = reginput;
5247 if (got_int)
5248 break;
5249 }
5250 else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5251 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005252 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005253 }
5254 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5255 ++scan;
5256 else
5257 break;
5258 ++count;
5259 }
5260 break;
5261
5262 case WHITE:
5263 case WHITE + ADD_NL:
5264 testval = mask = RI_WHITE;
5265do_class:
5266 while (count < maxcount)
5267 {
5268#ifdef FEAT_MBYTE
5269 int l;
5270#endif
5271 if (*scan == NUL)
5272 {
5273 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5274 break;
5275 reg_nextline();
5276 scan = reginput;
5277 if (got_int)
5278 break;
5279 }
5280#ifdef FEAT_MBYTE
5281 else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
5282 {
5283 if (testval != 0)
5284 break;
5285 scan += l;
5286 }
5287#endif
5288 else if ((class_tab[*scan] & mask) == testval)
5289 ++scan;
5290 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5291 ++scan;
5292 else
5293 break;
5294 ++count;
5295 }
5296 break;
5297
5298 case NWHITE:
5299 case NWHITE + ADD_NL:
5300 mask = RI_WHITE;
5301 goto do_class;
5302 case DIGIT:
5303 case DIGIT + ADD_NL:
5304 testval = mask = RI_DIGIT;
5305 goto do_class;
5306 case NDIGIT:
5307 case NDIGIT + ADD_NL:
5308 mask = RI_DIGIT;
5309 goto do_class;
5310 case HEX:
5311 case HEX + ADD_NL:
5312 testval = mask = RI_HEX;
5313 goto do_class;
5314 case NHEX:
5315 case NHEX + ADD_NL:
5316 mask = RI_HEX;
5317 goto do_class;
5318 case OCTAL:
5319 case OCTAL + ADD_NL:
5320 testval = mask = RI_OCTAL;
5321 goto do_class;
5322 case NOCTAL:
5323 case NOCTAL + ADD_NL:
5324 mask = RI_OCTAL;
5325 goto do_class;
5326 case WORD:
5327 case WORD + ADD_NL:
5328 testval = mask = RI_WORD;
5329 goto do_class;
5330 case NWORD:
5331 case NWORD + ADD_NL:
5332 mask = RI_WORD;
5333 goto do_class;
5334 case HEAD:
5335 case HEAD + ADD_NL:
5336 testval = mask = RI_HEAD;
5337 goto do_class;
5338 case NHEAD:
5339 case NHEAD + ADD_NL:
5340 mask = RI_HEAD;
5341 goto do_class;
5342 case ALPHA:
5343 case ALPHA + ADD_NL:
5344 testval = mask = RI_ALPHA;
5345 goto do_class;
5346 case NALPHA:
5347 case NALPHA + ADD_NL:
5348 mask = RI_ALPHA;
5349 goto do_class;
5350 case LOWER:
5351 case LOWER + ADD_NL:
5352 testval = mask = RI_LOWER;
5353 goto do_class;
5354 case NLOWER:
5355 case NLOWER + ADD_NL:
5356 mask = RI_LOWER;
5357 goto do_class;
5358 case UPPER:
5359 case UPPER + ADD_NL:
5360 testval = mask = RI_UPPER;
5361 goto do_class;
5362 case NUPPER:
5363 case NUPPER + ADD_NL:
5364 mask = RI_UPPER;
5365 goto do_class;
5366
5367 case EXACTLY:
5368 {
5369 int cu, cl;
5370
5371 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5372 * would have been used for it. */
5373 if (ireg_ic)
5374 {
5375 cu = TOUPPER_LOC(*opnd);
5376 cl = TOLOWER_LOC(*opnd);
5377 while (count < maxcount && (*scan == cu || *scan == cl))
5378 {
5379 count++;
5380 scan++;
5381 }
5382 }
5383 else
5384 {
5385 cu = *opnd;
5386 while (count < maxcount && *scan == cu)
5387 {
5388 count++;
5389 scan++;
5390 }
5391 }
5392 break;
5393 }
5394
5395#ifdef FEAT_MBYTE
5396 case MULTIBYTECODE:
5397 {
5398 int i, len, cf = 0;
5399
5400 /* Safety check (just in case 'encoding' was changed since
5401 * compiling the program). */
5402 if ((len = (*mb_ptr2len_check)(opnd)) > 1)
5403 {
5404 if (ireg_ic && enc_utf8)
5405 cf = utf_fold(utf_ptr2char(opnd));
5406 while (count < maxcount)
5407 {
5408 for (i = 0; i < len; ++i)
5409 if (opnd[i] != scan[i])
5410 break;
5411 if (i < len && (!ireg_ic || !enc_utf8
5412 || utf_fold(utf_ptr2char(scan)) != cf))
5413 break;
5414 scan += len;
5415 ++count;
5416 }
5417 }
5418 }
5419 break;
5420#endif
5421
5422 case ANYOF:
5423 case ANYOF + ADD_NL:
5424 testval = TRUE;
5425 /*FALLTHROUGH*/
5426
5427 case ANYBUT:
5428 case ANYBUT + ADD_NL:
5429 while (count < maxcount)
5430 {
5431#ifdef FEAT_MBYTE
5432 int len;
5433#endif
5434 if (*scan == NUL)
5435 {
5436 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5437 break;
5438 reg_nextline();
5439 scan = reginput;
5440 if (got_int)
5441 break;
5442 }
5443 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5444 ++scan;
5445#ifdef FEAT_MBYTE
5446 else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
5447 {
5448 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5449 break;
5450 scan += len;
5451 }
5452#endif
5453 else
5454 {
5455 if ((cstrchr(opnd, *scan) == NULL) == testval)
5456 break;
5457 ++scan;
5458 }
5459 ++count;
5460 }
5461 break;
5462
5463 case NEWL:
5464 while (count < maxcount
5465 && ((*scan == NUL && reglnum < reg_maxline)
5466 || (*scan == '\n' && reg_line_lbr)))
5467 {
5468 count++;
5469 if (reg_line_lbr)
5470 ADVANCE_REGINPUT();
5471 else
5472 reg_nextline();
5473 scan = reginput;
5474 if (got_int)
5475 break;
5476 }
5477 break;
5478
5479 default: /* Oh dear. Called inappropriately. */
5480 EMSG(_(e_re_corr));
5481#ifdef DEBUG
5482 printf("Called regrepeat with op code %d\n", OP(p));
5483#endif
5484 break;
5485 }
5486
5487 reginput = scan;
5488
5489 return (int)count;
5490}
5491
5492/*
5493 * regnext - dig the "next" pointer out of a node
5494 */
5495 static char_u *
5496regnext(p)
5497 char_u *p;
5498{
5499 int offset;
5500
5501 if (p == JUST_CALC_SIZE)
5502 return NULL;
5503
5504 offset = NEXT(p);
5505 if (offset == 0)
5506 return NULL;
5507
Bram Moolenaar582fd852005-03-28 20:58:01 +00005508 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005509 return p - offset;
5510 else
5511 return p + offset;
5512}
5513
5514/*
5515 * Check the regexp program for its magic number.
5516 * Return TRUE if it's wrong.
5517 */
5518 static int
5519prog_magic_wrong()
5520{
5521 if (UCHARAT(REG_MULTI
5522 ? reg_mmatch->regprog->program
5523 : reg_match->regprog->program) != REGMAGIC)
5524 {
5525 EMSG(_(e_re_corr));
5526 return TRUE;
5527 }
5528 return FALSE;
5529}
5530
5531/*
5532 * Cleanup the subexpressions, if this wasn't done yet.
5533 * This construction is used to clear the subexpressions only when they are
5534 * used (to increase speed).
5535 */
5536 static void
5537cleanup_subexpr()
5538{
5539 if (need_clear_subexpr)
5540 {
5541 if (REG_MULTI)
5542 {
5543 /* Use 0xff to set lnum to -1 */
5544 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5545 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5546 }
5547 else
5548 {
5549 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5550 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5551 }
5552 need_clear_subexpr = FALSE;
5553 }
5554}
5555
5556#ifdef FEAT_SYN_HL
5557 static void
5558cleanup_zsubexpr()
5559{
5560 if (need_clear_zsubexpr)
5561 {
5562 if (REG_MULTI)
5563 {
5564 /* Use 0xff to set lnum to -1 */
5565 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5566 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5567 }
5568 else
5569 {
5570 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5571 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5572 }
5573 need_clear_zsubexpr = FALSE;
5574 }
5575}
5576#endif
5577
5578/*
5579 * Advance reglnum, regline and reginput to the next line.
5580 */
5581 static void
5582reg_nextline()
5583{
5584 regline = reg_getline(++reglnum);
5585 reginput = regline;
5586 fast_breakcheck();
5587}
5588
5589/*
5590 * Save the input line and position in a regsave_T.
5591 */
5592 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00005593reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005594 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005595 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005596{
5597 if (REG_MULTI)
5598 {
5599 save->rs_u.pos.col = (colnr_T)(reginput - regline);
5600 save->rs_u.pos.lnum = reglnum;
5601 }
5602 else
5603 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005604 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005605}
5606
5607/*
5608 * Restore the input line and position from a regsave_T.
5609 */
5610 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00005611reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005612 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005613 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005614{
5615 if (REG_MULTI)
5616 {
5617 if (reglnum != save->rs_u.pos.lnum)
5618 {
5619 /* only call reg_getline() when the line number changed to save
5620 * a bit of time */
5621 reglnum = save->rs_u.pos.lnum;
5622 regline = reg_getline(reglnum);
5623 }
5624 reginput = regline + save->rs_u.pos.col;
5625 }
5626 else
5627 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005628 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005629}
5630
5631/*
5632 * Return TRUE if current position is equal to saved position.
5633 */
5634 static int
5635reg_save_equal(save)
5636 regsave_T *save;
5637{
5638 if (REG_MULTI)
5639 return reglnum == save->rs_u.pos.lnum
5640 && reginput == regline + save->rs_u.pos.col;
5641 return reginput == save->rs_u.ptr;
5642}
5643
5644/*
5645 * Tentatively set the sub-expression start to the current position (after
5646 * calling regmatch() they will have changed). Need to save the existing
5647 * values for when there is no match.
5648 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5649 * depending on REG_MULTI.
5650 */
5651 static void
5652save_se_multi(savep, posp)
5653 save_se_T *savep;
5654 lpos_T *posp;
5655{
5656 savep->se_u.pos = *posp;
5657 posp->lnum = reglnum;
5658 posp->col = (colnr_T)(reginput - regline);
5659}
5660
5661 static void
5662save_se_one(savep, pp)
5663 save_se_T *savep;
5664 char_u **pp;
5665{
5666 savep->se_u.ptr = *pp;
5667 *pp = reginput;
5668}
5669
5670/*
5671 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
5672 */
5673 static int
5674re_num_cmp(val, scan)
5675 long_u val;
5676 char_u *scan;
5677{
5678 long_u n = OPERAND_MIN(scan);
5679
5680 if (OPERAND_CMP(scan) == '>')
5681 return val > n;
5682 if (OPERAND_CMP(scan) == '<')
5683 return val < n;
5684 return val == n;
5685}
5686
5687
5688#ifdef DEBUG
5689
5690/*
5691 * regdump - dump a regexp onto stdout in vaguely comprehensible form
5692 */
5693 static void
5694regdump(pattern, r)
5695 char_u *pattern;
5696 regprog_T *r;
5697{
5698 char_u *s;
5699 int op = EXACTLY; /* Arbitrary non-END op. */
5700 char_u *next;
5701 char_u *end = NULL;
5702
5703 printf("\r\nregcomp(%s):\r\n", pattern);
5704
5705 s = r->program + 1;
5706 /*
5707 * Loop until we find the END that isn't before a referred next (an END
5708 * can also appear in a NOMATCH operand).
5709 */
5710 while (op != END || s <= end)
5711 {
5712 op = OP(s);
5713 printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
5714 next = regnext(s);
5715 if (next == NULL) /* Next ptr. */
5716 printf("(0)");
5717 else
5718 printf("(%d)", (int)((s - r->program) + (next - s)));
5719 if (end < next)
5720 end = next;
5721 if (op == BRACE_LIMITS)
5722 {
5723 /* Two short ints */
5724 printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
5725 s += 8;
5726 }
5727 s += 3;
5728 if (op == ANYOF || op == ANYOF + ADD_NL
5729 || op == ANYBUT || op == ANYBUT + ADD_NL
5730 || op == EXACTLY)
5731 {
5732 /* Literal string, where present. */
5733 while (*s != NUL)
5734 printf("%c", *s++);
5735 s++;
5736 }
5737 printf("\r\n");
5738 }
5739
5740 /* Header fields of interest. */
5741 if (r->regstart != NUL)
5742 printf("start `%s' 0x%x; ", r->regstart < 256
5743 ? (char *)transchar(r->regstart)
5744 : "multibyte", r->regstart);
5745 if (r->reganch)
5746 printf("anchored; ");
5747 if (r->regmust != NULL)
5748 printf("must have \"%s\"", r->regmust);
5749 printf("\r\n");
5750}
5751
5752/*
5753 * regprop - printable representation of opcode
5754 */
5755 static char_u *
5756regprop(op)
5757 char_u *op;
5758{
5759 char_u *p;
5760 static char_u buf[50];
5761
5762 (void) strcpy(buf, ":");
5763
5764 switch (OP(op))
5765 {
5766 case BOL:
5767 p = "BOL";
5768 break;
5769 case EOL:
5770 p = "EOL";
5771 break;
5772 case RE_BOF:
5773 p = "BOF";
5774 break;
5775 case RE_EOF:
5776 p = "EOF";
5777 break;
5778 case CURSOR:
5779 p = "CURSOR";
5780 break;
5781 case RE_LNUM:
5782 p = "RE_LNUM";
5783 break;
5784 case RE_COL:
5785 p = "RE_COL";
5786 break;
5787 case RE_VCOL:
5788 p = "RE_VCOL";
5789 break;
5790 case BOW:
5791 p = "BOW";
5792 break;
5793 case EOW:
5794 p = "EOW";
5795 break;
5796 case ANY:
5797 p = "ANY";
5798 break;
5799 case ANY + ADD_NL:
5800 p = "ANY+NL";
5801 break;
5802 case ANYOF:
5803 p = "ANYOF";
5804 break;
5805 case ANYOF + ADD_NL:
5806 p = "ANYOF+NL";
5807 break;
5808 case ANYBUT:
5809 p = "ANYBUT";
5810 break;
5811 case ANYBUT + ADD_NL:
5812 p = "ANYBUT+NL";
5813 break;
5814 case IDENT:
5815 p = "IDENT";
5816 break;
5817 case IDENT + ADD_NL:
5818 p = "IDENT+NL";
5819 break;
5820 case SIDENT:
5821 p = "SIDENT";
5822 break;
5823 case SIDENT + ADD_NL:
5824 p = "SIDENT+NL";
5825 break;
5826 case KWORD:
5827 p = "KWORD";
5828 break;
5829 case KWORD + ADD_NL:
5830 p = "KWORD+NL";
5831 break;
5832 case SKWORD:
5833 p = "SKWORD";
5834 break;
5835 case SKWORD + ADD_NL:
5836 p = "SKWORD+NL";
5837 break;
5838 case FNAME:
5839 p = "FNAME";
5840 break;
5841 case FNAME + ADD_NL:
5842 p = "FNAME+NL";
5843 break;
5844 case SFNAME:
5845 p = "SFNAME";
5846 break;
5847 case SFNAME + ADD_NL:
5848 p = "SFNAME+NL";
5849 break;
5850 case PRINT:
5851 p = "PRINT";
5852 break;
5853 case PRINT + ADD_NL:
5854 p = "PRINT+NL";
5855 break;
5856 case SPRINT:
5857 p = "SPRINT";
5858 break;
5859 case SPRINT + ADD_NL:
5860 p = "SPRINT+NL";
5861 break;
5862 case WHITE:
5863 p = "WHITE";
5864 break;
5865 case WHITE + ADD_NL:
5866 p = "WHITE+NL";
5867 break;
5868 case NWHITE:
5869 p = "NWHITE";
5870 break;
5871 case NWHITE + ADD_NL:
5872 p = "NWHITE+NL";
5873 break;
5874 case DIGIT:
5875 p = "DIGIT";
5876 break;
5877 case DIGIT + ADD_NL:
5878 p = "DIGIT+NL";
5879 break;
5880 case NDIGIT:
5881 p = "NDIGIT";
5882 break;
5883 case NDIGIT + ADD_NL:
5884 p = "NDIGIT+NL";
5885 break;
5886 case HEX:
5887 p = "HEX";
5888 break;
5889 case HEX + ADD_NL:
5890 p = "HEX+NL";
5891 break;
5892 case NHEX:
5893 p = "NHEX";
5894 break;
5895 case NHEX + ADD_NL:
5896 p = "NHEX+NL";
5897 break;
5898 case OCTAL:
5899 p = "OCTAL";
5900 break;
5901 case OCTAL + ADD_NL:
5902 p = "OCTAL+NL";
5903 break;
5904 case NOCTAL:
5905 p = "NOCTAL";
5906 break;
5907 case NOCTAL + ADD_NL:
5908 p = "NOCTAL+NL";
5909 break;
5910 case WORD:
5911 p = "WORD";
5912 break;
5913 case WORD + ADD_NL:
5914 p = "WORD+NL";
5915 break;
5916 case NWORD:
5917 p = "NWORD";
5918 break;
5919 case NWORD + ADD_NL:
5920 p = "NWORD+NL";
5921 break;
5922 case HEAD:
5923 p = "HEAD";
5924 break;
5925 case HEAD + ADD_NL:
5926 p = "HEAD+NL";
5927 break;
5928 case NHEAD:
5929 p = "NHEAD";
5930 break;
5931 case NHEAD + ADD_NL:
5932 p = "NHEAD+NL";
5933 break;
5934 case ALPHA:
5935 p = "ALPHA";
5936 break;
5937 case ALPHA + ADD_NL:
5938 p = "ALPHA+NL";
5939 break;
5940 case NALPHA:
5941 p = "NALPHA";
5942 break;
5943 case NALPHA + ADD_NL:
5944 p = "NALPHA+NL";
5945 break;
5946 case LOWER:
5947 p = "LOWER";
5948 break;
5949 case LOWER + ADD_NL:
5950 p = "LOWER+NL";
5951 break;
5952 case NLOWER:
5953 p = "NLOWER";
5954 break;
5955 case NLOWER + ADD_NL:
5956 p = "NLOWER+NL";
5957 break;
5958 case UPPER:
5959 p = "UPPER";
5960 break;
5961 case UPPER + ADD_NL:
5962 p = "UPPER+NL";
5963 break;
5964 case NUPPER:
5965 p = "NUPPER";
5966 break;
5967 case NUPPER + ADD_NL:
5968 p = "NUPPER+NL";
5969 break;
5970 case BRANCH:
5971 p = "BRANCH";
5972 break;
5973 case EXACTLY:
5974 p = "EXACTLY";
5975 break;
5976 case NOTHING:
5977 p = "NOTHING";
5978 break;
5979 case BACK:
5980 p = "BACK";
5981 break;
5982 case END:
5983 p = "END";
5984 break;
5985 case MOPEN + 0:
5986 p = "MATCH START";
5987 break;
5988 case MOPEN + 1:
5989 case MOPEN + 2:
5990 case MOPEN + 3:
5991 case MOPEN + 4:
5992 case MOPEN + 5:
5993 case MOPEN + 6:
5994 case MOPEN + 7:
5995 case MOPEN + 8:
5996 case MOPEN + 9:
5997 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
5998 p = NULL;
5999 break;
6000 case MCLOSE + 0:
6001 p = "MATCH END";
6002 break;
6003 case MCLOSE + 1:
6004 case MCLOSE + 2:
6005 case MCLOSE + 3:
6006 case MCLOSE + 4:
6007 case MCLOSE + 5:
6008 case MCLOSE + 6:
6009 case MCLOSE + 7:
6010 case MCLOSE + 8:
6011 case MCLOSE + 9:
6012 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6013 p = NULL;
6014 break;
6015 case BACKREF + 1:
6016 case BACKREF + 2:
6017 case BACKREF + 3:
6018 case BACKREF + 4:
6019 case BACKREF + 5:
6020 case BACKREF + 6:
6021 case BACKREF + 7:
6022 case BACKREF + 8:
6023 case BACKREF + 9:
6024 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6025 p = NULL;
6026 break;
6027 case NOPEN:
6028 p = "NOPEN";
6029 break;
6030 case NCLOSE:
6031 p = "NCLOSE";
6032 break;
6033#ifdef FEAT_SYN_HL
6034 case ZOPEN + 1:
6035 case ZOPEN + 2:
6036 case ZOPEN + 3:
6037 case ZOPEN + 4:
6038 case ZOPEN + 5:
6039 case ZOPEN + 6:
6040 case ZOPEN + 7:
6041 case ZOPEN + 8:
6042 case ZOPEN + 9:
6043 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6044 p = NULL;
6045 break;
6046 case ZCLOSE + 1:
6047 case ZCLOSE + 2:
6048 case ZCLOSE + 3:
6049 case ZCLOSE + 4:
6050 case ZCLOSE + 5:
6051 case ZCLOSE + 6:
6052 case ZCLOSE + 7:
6053 case ZCLOSE + 8:
6054 case ZCLOSE + 9:
6055 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6056 p = NULL;
6057 break;
6058 case ZREF + 1:
6059 case ZREF + 2:
6060 case ZREF + 3:
6061 case ZREF + 4:
6062 case ZREF + 5:
6063 case ZREF + 6:
6064 case ZREF + 7:
6065 case ZREF + 8:
6066 case ZREF + 9:
6067 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6068 p = NULL;
6069 break;
6070#endif
6071 case STAR:
6072 p = "STAR";
6073 break;
6074 case PLUS:
6075 p = "PLUS";
6076 break;
6077 case NOMATCH:
6078 p = "NOMATCH";
6079 break;
6080 case MATCH:
6081 p = "MATCH";
6082 break;
6083 case BEHIND:
6084 p = "BEHIND";
6085 break;
6086 case NOBEHIND:
6087 p = "NOBEHIND";
6088 break;
6089 case SUBPAT:
6090 p = "SUBPAT";
6091 break;
6092 case BRACE_LIMITS:
6093 p = "BRACE_LIMITS";
6094 break;
6095 case BRACE_SIMPLE:
6096 p = "BRACE_SIMPLE";
6097 break;
6098 case BRACE_COMPLEX + 0:
6099 case BRACE_COMPLEX + 1:
6100 case BRACE_COMPLEX + 2:
6101 case BRACE_COMPLEX + 3:
6102 case BRACE_COMPLEX + 4:
6103 case BRACE_COMPLEX + 5:
6104 case BRACE_COMPLEX + 6:
6105 case BRACE_COMPLEX + 7:
6106 case BRACE_COMPLEX + 8:
6107 case BRACE_COMPLEX + 9:
6108 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6109 p = NULL;
6110 break;
6111#ifdef FEAT_MBYTE
6112 case MULTIBYTECODE:
6113 p = "MULTIBYTECODE";
6114 break;
6115#endif
6116 case NEWL:
6117 p = "NEWL";
6118 break;
6119 default:
6120 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6121 p = NULL;
6122 break;
6123 }
6124 if (p != NULL)
6125 (void) strcat(buf, p);
6126 return buf;
6127}
6128#endif
6129
6130#ifdef FEAT_MBYTE
6131static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6132
6133typedef struct
6134{
6135 int a, b, c;
6136} decomp_T;
6137
6138
6139/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006140static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006141{
6142 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6143 {0x5d0,0,0}, /* 0xfb21 alt alef */
6144 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6145 {0x5d4,0,0}, /* 0xfb23 alt he */
6146 {0x5db,0,0}, /* 0xfb24 alt kaf */
6147 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6148 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6149 {0x5e8,0,0}, /* 0xfb27 alt resh */
6150 {0x5ea,0,0}, /* 0xfb28 alt tav */
6151 {'+', 0, 0}, /* 0xfb29 alt plus */
6152 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6153 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6154 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6155 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6156 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6157 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6158 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6159 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6160 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6161 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6162 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6163 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6164 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
6165 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
6166 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
6167 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
6168 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
6169 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
6170 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
6171 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
6172 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
6173 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
6174 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
6175 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
6176 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
6177 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
6178 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
6179 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
6180 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
6181 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
6182 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
6183 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
6184 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
6185 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
6186 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
6187 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
6188 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
6189 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
6190};
6191
6192 static void
6193mb_decompose(c, c1, c2, c3)
6194 int c, *c1, *c2, *c3;
6195{
6196 decomp_T d;
6197
6198 if (c >= 0x4b20 && c <= 0xfb4f)
6199 {
6200 d = decomp_table[c - 0xfb20];
6201 *c1 = d.a;
6202 *c2 = d.b;
6203 *c3 = d.c;
6204 }
6205 else
6206 {
6207 *c1 = c;
6208 *c2 = *c3 = 0;
6209 }
6210}
6211#endif
6212
6213/*
6214 * Compare two strings, ignore case if ireg_ic set.
6215 * Return 0 if strings match, non-zero otherwise.
6216 * Correct the length "*n" when composing characters are ignored.
6217 */
6218 static int
6219cstrncmp(s1, s2, n)
6220 char_u *s1, *s2;
6221 int *n;
6222{
6223 int result;
6224
6225 if (!ireg_ic)
6226 result = STRNCMP(s1, s2, *n);
6227 else
6228 result = MB_STRNICMP(s1, s2, *n);
6229
6230#ifdef FEAT_MBYTE
6231 /* if it failed and it's utf8 and we want to combineignore: */
6232 if (result != 0 && enc_utf8 && ireg_icombine)
6233 {
6234 char_u *str1, *str2;
6235 int c1, c2, c11, c12;
6236 int ix;
6237 int junk;
6238
6239 /* we have to handle the strcmp ourselves, since it is necessary to
6240 * deal with the composing characters by ignoring them: */
6241 str1 = s1;
6242 str2 = s2;
6243 c1 = c2 = 0;
6244 for (ix = 0; ix < *n; )
6245 {
6246 c1 = mb_ptr2char_adv(&str1);
6247 c2 = mb_ptr2char_adv(&str2);
6248 ix += utf_char2len(c1);
6249
6250 /* decompose the character if necessary, into 'base' characters
6251 * because I don't care about Arabic, I will hard-code the Hebrew
6252 * which I *do* care about! So sue me... */
6253 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6254 {
6255 /* decomposition necessary? */
6256 mb_decompose(c1, &c11, &junk, &junk);
6257 mb_decompose(c2, &c12, &junk, &junk);
6258 c1 = c11;
6259 c2 = c12;
6260 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6261 break;
6262 }
6263 }
6264 result = c2 - c1;
6265 if (result == 0)
6266 *n = (int)(str2 - s2);
6267 }
6268#endif
6269
6270 return result;
6271}
6272
6273/*
6274 * cstrchr: This function is used a lot for simple searches, keep it fast!
6275 */
6276 static char_u *
6277cstrchr(s, c)
6278 char_u *s;
6279 int c;
6280{
6281 char_u *p;
6282 int cc;
6283
6284 if (!ireg_ic
6285#ifdef FEAT_MBYTE
6286 || (!enc_utf8 && mb_char2len(c) > 1)
6287#endif
6288 )
6289 return vim_strchr(s, c);
6290
6291 /* tolower() and toupper() can be slow, comparing twice should be a lot
6292 * faster (esp. when using MS Visual C++!).
6293 * For UTF-8 need to use folded case. */
6294#ifdef FEAT_MBYTE
6295 if (enc_utf8 && c > 0x80)
6296 cc = utf_fold(c);
6297 else
6298#endif
6299 if (isupper(c))
6300 cc = TOLOWER_LOC(c);
6301 else if (islower(c))
6302 cc = TOUPPER_LOC(c);
6303 else
6304 return vim_strchr(s, c);
6305
6306#ifdef FEAT_MBYTE
6307 if (has_mbyte)
6308 {
6309 for (p = s; *p != NUL; p += (*mb_ptr2len_check)(p))
6310 {
6311 if (enc_utf8 && c > 0x80)
6312 {
6313 if (utf_fold(utf_ptr2char(p)) == cc)
6314 return p;
6315 }
6316 else if (*p == c || *p == cc)
6317 return p;
6318 }
6319 }
6320 else
6321#endif
6322 /* Faster version for when there are no multi-byte characters. */
6323 for (p = s; *p != NUL; ++p)
6324 if (*p == c || *p == cc)
6325 return p;
6326
6327 return NULL;
6328}
6329
6330/***************************************************************
6331 * regsub stuff *
6332 ***************************************************************/
6333
6334/* This stuff below really confuses cc on an SGI -- webb */
6335#ifdef __sgi
6336# undef __ARGS
6337# define __ARGS(x) ()
6338#endif
6339
6340/*
6341 * We should define ftpr as a pointer to a function returning a pointer to
6342 * a function returning a pointer to a function ...
6343 * This is impossible, so we declare a pointer to a function returning a
6344 * pointer to a function returning void. This should work for all compilers.
6345 */
6346typedef void (*(*fptr) __ARGS((char_u *, int)))();
6347
6348static fptr do_upper __ARGS((char_u *, int));
6349static fptr do_Upper __ARGS((char_u *, int));
6350static fptr do_lower __ARGS((char_u *, int));
6351static fptr do_Lower __ARGS((char_u *, int));
6352
6353static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6354
6355 static fptr
6356do_upper(d, c)
6357 char_u *d;
6358 int c;
6359{
6360 *d = TOUPPER_LOC(c);
6361
6362 return (fptr)NULL;
6363}
6364
6365 static fptr
6366do_Upper(d, c)
6367 char_u *d;
6368 int c;
6369{
6370 *d = TOUPPER_LOC(c);
6371
6372 return (fptr)do_Upper;
6373}
6374
6375 static fptr
6376do_lower(d, c)
6377 char_u *d;
6378 int c;
6379{
6380 *d = TOLOWER_LOC(c);
6381
6382 return (fptr)NULL;
6383}
6384
6385 static fptr
6386do_Lower(d, c)
6387 char_u *d;
6388 int c;
6389{
6390 *d = TOLOWER_LOC(c);
6391
6392 return (fptr)do_Lower;
6393}
6394
6395/*
6396 * regtilde(): Replace tildes in the pattern by the old pattern.
6397 *
6398 * Short explanation of the tilde: It stands for the previous replacement
6399 * pattern. If that previous pattern also contains a ~ we should go back a
6400 * step further... But we insert the previous pattern into the current one
6401 * and remember that.
6402 * This still does not handle the case where "magic" changes. TODO?
6403 *
6404 * The tildes are parsed once before the first call to vim_regsub().
6405 */
6406 char_u *
6407regtilde(source, magic)
6408 char_u *source;
6409 int magic;
6410{
6411 char_u *newsub = source;
6412 char_u *tmpsub;
6413 char_u *p;
6414 int len;
6415 int prevlen;
6416
6417 for (p = newsub; *p; ++p)
6418 {
6419 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6420 {
6421 if (reg_prev_sub != NULL)
6422 {
6423 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6424 prevlen = (int)STRLEN(reg_prev_sub);
6425 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6426 if (tmpsub != NULL)
6427 {
6428 /* copy prefix */
6429 len = (int)(p - newsub); /* not including ~ */
6430 mch_memmove(tmpsub, newsub, (size_t)len);
6431 /* interpretate tilde */
6432 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6433 /* copy postfix */
6434 if (!magic)
6435 ++p; /* back off \ */
6436 STRCPY(tmpsub + len + prevlen, p + 1);
6437
6438 if (newsub != source) /* already allocated newsub */
6439 vim_free(newsub);
6440 newsub = tmpsub;
6441 p = newsub + len + prevlen;
6442 }
6443 }
6444 else if (magic)
6445 STRCPY(p, p + 1); /* remove '~' */
6446 else
6447 STRCPY(p, p + 2); /* remove '\~' */
6448 --p;
6449 }
6450 else
6451 {
6452 if (*p == '\\' && p[1]) /* skip escaped characters */
6453 ++p;
6454#ifdef FEAT_MBYTE
6455 if (has_mbyte)
6456 p += (*mb_ptr2len_check)(p) - 1;
6457#endif
6458 }
6459 }
6460
6461 vim_free(reg_prev_sub);
6462 if (newsub != source) /* newsub was allocated, just keep it */
6463 reg_prev_sub = newsub;
6464 else /* no ~ found, need to save newsub */
6465 reg_prev_sub = vim_strsave(newsub);
6466 return newsub;
6467}
6468
6469#ifdef FEAT_EVAL
6470static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
6471
6472/* These pointers are used instead of reg_match and reg_mmatch for
6473 * reg_submatch(). Needed for when the substitution string is an expression
6474 * that contains a call to substitute() and submatch(). */
6475static regmatch_T *submatch_match;
6476static regmmatch_T *submatch_mmatch;
6477#endif
6478
6479#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6480/*
6481 * vim_regsub() - perform substitutions after a vim_regexec() or
6482 * vim_regexec_multi() match.
6483 *
6484 * If "copy" is TRUE really copy into "dest".
6485 * If "copy" is FALSE nothing is copied, this is just to find out the length
6486 * of the result.
6487 *
6488 * If "backslash" is TRUE, a backslash will be removed later, need to double
6489 * them to keep them, and insert a backslash before a CR to avoid it being
6490 * replaced with a line break later.
6491 *
6492 * Note: The matched text must not change between the call of
6493 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
6494 * references invalid!
6495 *
6496 * Returns the size of the replacement, including terminating NUL.
6497 */
6498 int
6499vim_regsub(rmp, source, dest, copy, magic, backslash)
6500 regmatch_T *rmp;
6501 char_u *source;
6502 char_u *dest;
6503 int copy;
6504 int magic;
6505 int backslash;
6506{
6507 reg_match = rmp;
6508 reg_mmatch = NULL;
6509 reg_maxline = 0;
6510 return vim_regsub_both(source, dest, copy, magic, backslash);
6511}
6512#endif
6513
6514 int
6515vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6516 regmmatch_T *rmp;
6517 linenr_T lnum;
6518 char_u *source;
6519 char_u *dest;
6520 int copy;
6521 int magic;
6522 int backslash;
6523{
6524 reg_match = NULL;
6525 reg_mmatch = rmp;
6526 reg_buf = curbuf; /* always works on the current buffer! */
6527 reg_firstlnum = lnum;
6528 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6529 return vim_regsub_both(source, dest, copy, magic, backslash);
6530}
6531
6532 static int
6533vim_regsub_both(source, dest, copy, magic, backslash)
6534 char_u *source;
6535 char_u *dest;
6536 int copy;
6537 int magic;
6538 int backslash;
6539{
6540 char_u *src;
6541 char_u *dst;
6542 char_u *s;
6543 int c;
6544 int no = -1;
6545 fptr func = (fptr)NULL;
6546 linenr_T clnum = 0; /* init for GCC */
6547 int len = 0; /* init for GCC */
6548#ifdef FEAT_EVAL
6549 static char_u *eval_result = NULL;
6550#endif
6551#ifdef FEAT_MBYTE
6552 int l;
6553#endif
6554
6555
6556 /* Be paranoid... */
6557 if (source == NULL || dest == NULL)
6558 {
6559 EMSG(_(e_null));
6560 return 0;
6561 }
6562 if (prog_magic_wrong())
6563 return 0;
6564 src = source;
6565 dst = dest;
6566
6567 /*
6568 * When the substitute part starts with "\=" evaluate it as an expression.
6569 */
6570 if (source[0] == '\\' && source[1] == '='
6571#ifdef FEAT_EVAL
6572 && !can_f_submatch /* can't do this recursively */
6573#endif
6574 )
6575 {
6576#ifdef FEAT_EVAL
6577 /* To make sure that the length doesn't change between checking the
6578 * length and copying the string, and to speed up things, the
6579 * resulting string is saved from the call with "copy" == FALSE to the
6580 * call with "copy" == TRUE. */
6581 if (copy)
6582 {
6583 if (eval_result != NULL)
6584 {
6585 STRCPY(dest, eval_result);
6586 dst += STRLEN(eval_result);
6587 vim_free(eval_result);
6588 eval_result = NULL;
6589 }
6590 }
6591 else
6592 {
6593 linenr_T save_reg_maxline;
6594 win_T *save_reg_win;
6595 int save_ireg_ic;
6596
6597 vim_free(eval_result);
6598
6599 /* The expression may contain substitute(), which calls us
6600 * recursively. Make sure submatch() gets the text from the first
6601 * level. Don't need to save "reg_buf", because
6602 * vim_regexec_multi() can't be called recursively. */
6603 submatch_match = reg_match;
6604 submatch_mmatch = reg_mmatch;
6605 save_reg_maxline = reg_maxline;
6606 save_reg_win = reg_win;
6607 save_ireg_ic = ireg_ic;
6608 can_f_submatch = TRUE;
6609
6610 eval_result = eval_to_string(source + 2, NULL);
6611 if (eval_result != NULL)
6612 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00006613 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006614 {
6615 /* Change NL to CR, so that it becomes a line break.
6616 * Skip over a backslashed character. */
6617 if (*s == NL)
6618 *s = CAR;
6619 else if (*s == '\\' && s[1] != NUL)
6620 ++s;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006621 }
6622
6623 dst += STRLEN(eval_result);
6624 }
6625
6626 reg_match = submatch_match;
6627 reg_mmatch = submatch_mmatch;
6628 reg_maxline = save_reg_maxline;
6629 reg_win = save_reg_win;
6630 ireg_ic = save_ireg_ic;
6631 can_f_submatch = FALSE;
6632 }
6633#endif
6634 }
6635 else
6636 while ((c = *src++) != NUL)
6637 {
6638 if (c == '&' && magic)
6639 no = 0;
6640 else if (c == '\\' && *src != NUL)
6641 {
6642 if (*src == '&' && !magic)
6643 {
6644 ++src;
6645 no = 0;
6646 }
6647 else if ('0' <= *src && *src <= '9')
6648 {
6649 no = *src++ - '0';
6650 }
6651 else if (vim_strchr((char_u *)"uUlLeE", *src))
6652 {
6653 switch (*src++)
6654 {
6655 case 'u': func = (fptr)do_upper;
6656 continue;
6657 case 'U': func = (fptr)do_Upper;
6658 continue;
6659 case 'l': func = (fptr)do_lower;
6660 continue;
6661 case 'L': func = (fptr)do_Lower;
6662 continue;
6663 case 'e':
6664 case 'E': func = (fptr)NULL;
6665 continue;
6666 }
6667 }
6668 }
6669 if (no < 0) /* Ordinary character. */
6670 {
6671 if (c == '\\' && *src != NUL)
6672 {
6673 /* Check for abbreviations -- webb */
6674 switch (*src)
6675 {
6676 case 'r': c = CAR; ++src; break;
6677 case 'n': c = NL; ++src; break;
6678 case 't': c = TAB; ++src; break;
6679 /* Oh no! \e already has meaning in subst pat :-( */
6680 /* case 'e': c = ESC; ++src; break; */
6681 case 'b': c = Ctrl_H; ++src; break;
6682
6683 /* If "backslash" is TRUE the backslash will be removed
6684 * later. Used to insert a literal CR. */
6685 default: if (backslash)
6686 {
6687 if (copy)
6688 *dst = '\\';
6689 ++dst;
6690 }
6691 c = *src++;
6692 }
6693 }
6694
6695 /* Write to buffer, if copy is set. */
6696#ifdef FEAT_MBYTE
6697 if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
6698 {
6699 /* TODO: should use "func" here. */
6700 if (copy)
6701 mch_memmove(dst, src - 1, l);
6702 dst += l - 1;
6703 src += l - 1;
6704 }
6705 else
6706 {
6707#endif
6708 if (copy)
6709 {
6710 if (func == (fptr)NULL) /* just copy */
6711 *dst = c;
6712 else /* change case */
6713 func = (fptr)(func(dst, c));
6714 /* Turbo C complains without the typecast */
6715 }
6716#ifdef FEAT_MBYTE
6717 }
6718#endif
6719 dst++;
6720 }
6721 else
6722 {
6723 if (REG_MULTI)
6724 {
6725 clnum = reg_mmatch->startpos[no].lnum;
6726 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
6727 s = NULL;
6728 else
6729 {
6730 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
6731 if (reg_mmatch->endpos[no].lnum == clnum)
6732 len = reg_mmatch->endpos[no].col
6733 - reg_mmatch->startpos[no].col;
6734 else
6735 len = (int)STRLEN(s);
6736 }
6737 }
6738 else
6739 {
6740 s = reg_match->startp[no];
6741 if (reg_match->endp[no] == NULL)
6742 s = NULL;
6743 else
6744 len = (int)(reg_match->endp[no] - s);
6745 }
6746 if (s != NULL)
6747 {
6748 for (;;)
6749 {
6750 if (len == 0)
6751 {
6752 if (REG_MULTI)
6753 {
6754 if (reg_mmatch->endpos[no].lnum == clnum)
6755 break;
6756 if (copy)
6757 *dst = CAR;
6758 ++dst;
6759 s = reg_getline(++clnum);
6760 if (reg_mmatch->endpos[no].lnum == clnum)
6761 len = reg_mmatch->endpos[no].col;
6762 else
6763 len = (int)STRLEN(s);
6764 }
6765 else
6766 break;
6767 }
6768 else if (*s == NUL) /* we hit NUL. */
6769 {
6770 if (copy)
6771 EMSG(_(e_re_damg));
6772 goto exit;
6773 }
6774 else
6775 {
6776 if (backslash && (*s == CAR || *s == '\\'))
6777 {
6778 /*
6779 * Insert a backslash in front of a CR, otherwise
6780 * it will be replaced by a line break.
6781 * Number of backslashes will be halved later,
6782 * double them here.
6783 */
6784 if (copy)
6785 {
6786 dst[0] = '\\';
6787 dst[1] = *s;
6788 }
6789 dst += 2;
6790 }
6791#ifdef FEAT_MBYTE
6792 else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
6793 {
6794 /* TODO: should use "func" here. */
6795 if (copy)
6796 mch_memmove(dst, s, l);
6797 dst += l;
6798 s += l - 1;
6799 len -= l - 1;
6800 }
6801#endif
6802 else
6803 {
6804 if (copy)
6805 {
6806 if (func == (fptr)NULL) /* just copy */
6807 *dst = *s;
6808 else /* change case */
6809 func = (fptr)(func(dst, *s));
6810 /* Turbo C complains without the typecast */
6811 }
6812 ++dst;
6813 }
6814 ++s;
6815 --len;
6816 }
6817 }
6818 }
6819 no = -1;
6820 }
6821 }
6822 if (copy)
6823 *dst = NUL;
6824
6825exit:
6826 return (int)((dst - dest) + 1);
6827}
6828
6829#ifdef FEAT_EVAL
6830/*
6831 * Used for the submatch() function: get the string from tne n'th submatch in
6832 * allocated memory.
6833 * Returns NULL when not in a ":s" command and for a non-existing submatch.
6834 */
6835 char_u *
6836reg_submatch(no)
6837 int no;
6838{
6839 char_u *retval = NULL;
6840 char_u *s;
6841 int len;
6842 int round;
6843 linenr_T lnum;
6844
6845 if (!can_f_submatch)
6846 return NULL;
6847
6848 if (submatch_match == NULL)
6849 {
6850 /*
6851 * First round: compute the length and allocate memory.
6852 * Second round: copy the text.
6853 */
6854 for (round = 1; round <= 2; ++round)
6855 {
6856 lnum = submatch_mmatch->startpos[no].lnum;
6857 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
6858 return NULL;
6859
6860 s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
6861 if (s == NULL) /* anti-crash check, cannot happen? */
6862 break;
6863 if (submatch_mmatch->endpos[no].lnum == lnum)
6864 {
6865 /* Within one line: take form start to end col. */
6866 len = submatch_mmatch->endpos[no].col
6867 - submatch_mmatch->startpos[no].col;
6868 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00006869 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006870 ++len;
6871 }
6872 else
6873 {
6874 /* Multiple lines: take start line from start col, middle
6875 * lines completely and end line up to end col. */
6876 len = (int)STRLEN(s);
6877 if (round == 2)
6878 {
6879 STRCPY(retval, s);
6880 retval[len] = '\n';
6881 }
6882 ++len;
6883 ++lnum;
6884 while (lnum < submatch_mmatch->endpos[no].lnum)
6885 {
6886 s = reg_getline(lnum++);
6887 if (round == 2)
6888 STRCPY(retval + len, s);
6889 len += (int)STRLEN(s);
6890 if (round == 2)
6891 retval[len] = '\n';
6892 ++len;
6893 }
6894 if (round == 2)
6895 STRNCPY(retval + len, reg_getline(lnum),
6896 submatch_mmatch->endpos[no].col);
6897 len += submatch_mmatch->endpos[no].col;
6898 if (round == 2)
6899 retval[len] = NUL;
6900 ++len;
6901 }
6902
6903 if (round == 1)
6904 {
6905 retval = lalloc((long_u)len, TRUE);
6906 if (s == NULL)
6907 return NULL;
6908 }
6909 }
6910 }
6911 else
6912 {
6913 if (submatch_match->endp[no] == NULL)
6914 retval = NULL;
6915 else
6916 {
6917 s = submatch_match->startp[no];
6918 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
6919 }
6920 }
6921
6922 return retval;
6923}
6924#endif