blob: dd0b1530a55ebeeb9113f4ecfa6d71e9d0c1f5ac [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
41#include "vim.h"
42
43#undef DEBUG
44
45/*
46 * The "internal use only" fields in regexp.h are present to pass info from
47 * compile to execute that permits the execute phase to run lots faster on
48 * simple cases. They are:
49 *
50 * regstart char that must begin a match; NUL if none obvious; Can be a
51 * multi-byte character.
52 * reganch is the match anchored (at beginning-of-line only)?
53 * regmust string (pointer into program) that match must include, or NULL
54 * regmlen length of regmust string
55 * regflags RF_ values or'ed together
56 *
57 * Regstart and reganch permit very fast decisions on suitable starting points
58 * for a match, cutting down the work a lot. Regmust permits fast rejection
59 * of lines that cannot possibly match. The regmust tests are costly enough
60 * that vim_regcomp() supplies a regmust only if the r.e. contains something
61 * potentially expensive (at present, the only such thing detected is * or +
62 * at the start of the r.e., which can involve a lot of backup). Regmlen is
63 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
64 * computing it anyway.
65 */
66
67/*
68 * Structure for regexp "program". This is essentially a linear encoding
69 * of a nondeterministic finite-state machine (aka syntax charts or
70 * "railroad normal form" in parsing technology). Each node is an opcode
71 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
72 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
73 * pointer with a BRANCH on both ends of it is connecting two alternatives.
74 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
75 * (as opposed to a collection of them) is never concatenated with anything
76 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000077 * node points to the node after the stuff to be repeated.
78 * The operand of some types of node is a literal string; for others, it is a
79 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
80 * is the first node of the branch.
81 * (NB this is *not* a tree structure: the tail of the branch connects to the
82 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000083 *
84 * pattern is coded like:
85 *
86 * +-----------------+
87 * | V
88 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
89 * | ^ | ^
90 * +------+ +----------+
91 *
92 *
93 * +------------------+
94 * V |
95 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
96 * | | ^ ^
97 * | +---------------+ |
98 * +---------------------------------------------+
99 *
100 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000101 * +----------------------+
102 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000103 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000104 * | | ^ ^
105 * | +-----------+ |
106 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000107 *
108 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 * +-------------------------+
110 * V |
111 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
112 * | | ^
113 * | +----------------+
114 * +-----------------------------------------------+
115 *
116 *
117 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
118 * | | ^ ^
119 * | +----------------+ |
120 * +--------------------------------+
121 *
122 * +---------+
123 * | V
124 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
125 * | | | | ^ ^
126 * | | | +-----+ |
127 * | | +----------------+ |
128 * | +---------------------------+ |
129 * +------------------------------------------------------+
130 *
131 * They all start with a BRANCH for "\|" alternaties, even when there is only
132 * one alternative.
133 */
134
135/*
136 * The opcodes are:
137 */
138
139/* definition number opnd? meaning */
140#define END 0 /* End of program or NOMATCH operand. */
141#define BOL 1 /* Match "" at beginning of line. */
142#define EOL 2 /* Match "" at end of line. */
143#define BRANCH 3 /* node Match this alternative, or the
144 * next... */
145#define BACK 4 /* Match "", "next" ptr points backward. */
146#define EXACTLY 5 /* str Match this string. */
147#define NOTHING 6 /* Match empty string. */
148#define STAR 7 /* node Match this (simple) thing 0 or more
149 * times. */
150#define PLUS 8 /* node Match this (simple) thing 1 or more
151 * times. */
152#define MATCH 9 /* node match the operand zero-width */
153#define NOMATCH 10 /* node check for no match with operand */
154#define BEHIND 11 /* node look behind for a match with operand */
155#define NOBEHIND 12 /* node look behind for no match with operand */
156#define SUBPAT 13 /* node match the operand here */
157#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
158 * n times (\{m,n\}). */
159#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
160#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
161#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
162 * and BRACE_COMPLEX. */
163#define NEWL 18 /* Match line-break */
164#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
165
166
167/* character classes: 20-48 normal, 50-78 include a line-break */
168#define ADD_NL 30
169#define FIRST_NL ANY + ADD_NL
170#define ANY 20 /* Match any one character. */
171#define ANYOF 21 /* str Match any character in this string. */
172#define ANYBUT 22 /* str Match any character not in this
173 * string. */
174#define IDENT 23 /* Match identifier char */
175#define SIDENT 24 /* Match identifier char but no digit */
176#define KWORD 25 /* Match keyword char */
177#define SKWORD 26 /* Match word char but no digit */
178#define FNAME 27 /* Match file name char */
179#define SFNAME 28 /* Match file name char but no digit */
180#define PRINT 29 /* Match printable char */
181#define SPRINT 30 /* Match printable char but no digit */
182#define WHITE 31 /* Match whitespace char */
183#define NWHITE 32 /* Match non-whitespace char */
184#define DIGIT 33 /* Match digit char */
185#define NDIGIT 34 /* Match non-digit char */
186#define HEX 35 /* Match hex char */
187#define NHEX 36 /* Match non-hex char */
188#define OCTAL 37 /* Match octal char */
189#define NOCTAL 38 /* Match non-octal char */
190#define WORD 39 /* Match word char */
191#define NWORD 40 /* Match non-word char */
192#define HEAD 41 /* Match head char */
193#define NHEAD 42 /* Match non-head char */
194#define ALPHA 43 /* Match alpha char */
195#define NALPHA 44 /* Match non-alpha char */
196#define LOWER 45 /* Match lowercase char */
197#define NLOWER 46 /* Match non-lowercase char */
198#define UPPER 47 /* Match uppercase char */
199#define NUPPER 48 /* Match non-uppercase char */
200#define LAST_NL NUPPER + ADD_NL
201#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
202
203#define MOPEN 80 /* -89 Mark this point in input as start of
204 * \( subexpr. MOPEN + 0 marks start of
205 * match. */
206#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
207 * end of match. */
208#define BACKREF 100 /* -109 node Match same string again \1-\9 */
209
210#ifdef FEAT_SYN_HL
211# define ZOPEN 110 /* -119 Mark this point in input as start of
212 * \z( subexpr. */
213# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
214# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
215#endif
216
217#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
218
219#define NOPEN 150 /* Mark this point in input as start of
220 \%( subexpr. */
221#define NCLOSE 151 /* Analogous to NOPEN. */
222
223#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
224#define RE_BOF 201 /* Match "" at beginning of file. */
225#define RE_EOF 202 /* Match "" at end of file. */
226#define CURSOR 203 /* Match location of cursor. */
227
228#define RE_LNUM 204 /* nr cmp Match line number */
229#define RE_COL 205 /* nr cmp Match column number */
230#define RE_VCOL 206 /* nr cmp Match virtual column number */
231
232/*
233 * Magic characters have a special meaning, they don't match literally.
234 * Magic characters are negative. This separates them from literal characters
235 * (possibly multi-byte). Only ASCII characters can be Magic.
236 */
237#define Magic(x) ((int)(x) - 256)
238#define un_Magic(x) ((x) + 256)
239#define is_Magic(x) ((x) < 0)
240
241static int no_Magic __ARGS((int x));
242static int toggle_Magic __ARGS((int x));
243
244 static int
245no_Magic(x)
246 int x;
247{
248 if (is_Magic(x))
249 return un_Magic(x);
250 return x;
251}
252
253 static int
254toggle_Magic(x)
255 int x;
256{
257 if (is_Magic(x))
258 return un_Magic(x);
259 return Magic(x);
260}
261
262/*
263 * The first byte of the regexp internal "program" is actually this magic
264 * number; the start node begins in the second byte. It's used to catch the
265 * most severe mutilation of the program by the caller.
266 */
267
268#define REGMAGIC 0234
269
270/*
271 * Opcode notes:
272 *
273 * BRANCH The set of branches constituting a single choice are hooked
274 * together with their "next" pointers, since precedence prevents
275 * anything being concatenated to any individual branch. The
276 * "next" pointer of the last BRANCH in a choice points to the
277 * thing following the whole choice. This is also where the
278 * final "next" pointer of each individual branch points; each
279 * branch starts with the operand node of a BRANCH node.
280 *
281 * BACK Normal "next" pointers all implicitly point forward; BACK
282 * exists to make loop structures possible.
283 *
284 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
285 * BRANCH structures using BACK. Simple cases (one character
286 * per match) are implemented with STAR and PLUS for speed
287 * and to minimize recursive plunges.
288 *
289 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
290 * node, and defines the min and max limits to be used for that
291 * node.
292 *
293 * MOPEN,MCLOSE ...are numbered at compile time.
294 * ZOPEN,ZCLOSE ...ditto
295 */
296
297/*
298 * A node is one char of opcode followed by two chars of "next" pointer.
299 * "Next" pointers are stored as two 8-bit bytes, high order first. The
300 * value is a positive offset from the opcode of the node containing it.
301 * An operand, if any, simply follows the node. (Note that much of the
302 * code generation knows about this implicit relationship.)
303 *
304 * Using two bytes for the "next" pointer is vast overkill for most things,
305 * but allows patterns to get big without disasters.
306 */
307#define OP(p) ((int)*(p))
308#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
309#define OPERAND(p) ((p) + 3)
310/* Obtain an operand that was stored as four bytes, MSB first. */
311#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
312 + ((long)(p)[5] << 8) + (long)(p)[6])
313/* Obtain a second operand stored as four bytes. */
314#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
315/* Obtain a second single-byte operand stored after a four bytes operand. */
316#define OPERAND_CMP(p) (p)[7]
317
318/*
319 * Utility definitions.
320 */
321#define UCHARAT(p) ((int)*(char_u *)(p))
322
323/* Used for an error (down from) vim_regcomp(): give the error message, set
324 * rc_did_emsg and return NULL */
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000325#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, NULL)
326#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, NULL)
327#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
329
330#define MAX_LIMIT (32767L << 16L)
331
332static int re_multi_type __ARGS((int));
333static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
334static char_u *cstrchr __ARGS((char_u *, int));
335
336#ifdef DEBUG
337static void regdump __ARGS((char_u *, regprog_T *));
338static char_u *regprop __ARGS((char_u *));
339#endif
340
341#define NOT_MULTI 0
342#define MULTI_ONE 1
343#define MULTI_MULT 2
344/*
345 * Return NOT_MULTI if c is not a "multi" operator.
346 * Return MULTI_ONE if c is a single "multi" operator.
347 * Return MULTI_MULT if c is a multi "multi" operator.
348 */
349 static int
350re_multi_type(c)
351 int c;
352{
353 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
354 return MULTI_ONE;
355 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
356 return MULTI_MULT;
357 return NOT_MULTI;
358}
359
360/*
361 * Flags to be passed up and down.
362 */
363#define HASWIDTH 0x1 /* Known never to match null string. */
364#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
365#define SPSTART 0x4 /* Starts with * or +. */
366#define HASNL 0x8 /* Contains some \n. */
367#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
368#define WORST 0 /* Worst case. */
369
370/*
371 * When regcode is set to this value, code is not emitted and size is computed
372 * instead.
373 */
374#define JUST_CALC_SIZE ((char_u *) -1)
375
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000376static char_u *reg_prev_sub = NULL;
377
378#if defined(EXITFREE) || defined(PROTO)
379 void
380free_regexp_stuff()
381{
382 vim_free(reg_prev_sub);
383}
384#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000385
386/*
387 * REGEXP_INRANGE contains all characters which are always special in a []
388 * range after '\'.
389 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
390 * These are:
391 * \n - New line (NL).
392 * \r - Carriage Return (CR).
393 * \t - Tab (TAB).
394 * \e - Escape (ESC).
395 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000396 * \d - Character code in decimal, eg \d123
397 * \o - Character code in octal, eg \o80
398 * \x - Character code in hex, eg \x4a
399 * \u - Multibyte character code, eg \u20ac
400 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000401 */
402static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000403static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
405static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000406static int get_char_class __ARGS((char_u **pp));
407static int get_equi_class __ARGS((char_u **pp));
408static void reg_equi_class __ARGS((int c));
409static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410static char_u *skip_anyof __ARGS((char_u *p));
411static void init_class_tab __ARGS((void));
412
413/*
414 * Translate '\x' to its control character, except "\n", which is Magic.
415 */
416 static int
417backslash_trans(c)
418 int c;
419{
420 switch (c)
421 {
422 case 'r': return CAR;
423 case 't': return TAB;
424 case 'e': return ESC;
425 case 'b': return BS;
426 }
427 return c;
428}
429
430/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000431 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000432 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
433 * recognized. Otherwise "pp" is advanced to after the item.
434 */
435 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000436get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000437 char_u **pp;
438{
439 static const char *(class_names[]) =
440 {
441 "alnum:]",
442#define CLASS_ALNUM 0
443 "alpha:]",
444#define CLASS_ALPHA 1
445 "blank:]",
446#define CLASS_BLANK 2
447 "cntrl:]",
448#define CLASS_CNTRL 3
449 "digit:]",
450#define CLASS_DIGIT 4
451 "graph:]",
452#define CLASS_GRAPH 5
453 "lower:]",
454#define CLASS_LOWER 6
455 "print:]",
456#define CLASS_PRINT 7
457 "punct:]",
458#define CLASS_PUNCT 8
459 "space:]",
460#define CLASS_SPACE 9
461 "upper:]",
462#define CLASS_UPPER 10
463 "xdigit:]",
464#define CLASS_XDIGIT 11
465 "tab:]",
466#define CLASS_TAB 12
467 "return:]",
468#define CLASS_RETURN 13
469 "backspace:]",
470#define CLASS_BACKSPACE 14
471 "escape:]",
472#define CLASS_ESCAPE 15
473 };
474#define CLASS_NONE 99
475 int i;
476
477 if ((*pp)[1] == ':')
478 {
479 for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
480 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
481 {
482 *pp += STRLEN(class_names[i]) + 2;
483 return i;
484 }
485 }
486 return CLASS_NONE;
487}
488
489/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490 * Specific version of character class functions.
491 * Using a table to keep this fast.
492 */
493static short class_tab[256];
494
495#define RI_DIGIT 0x01
496#define RI_HEX 0x02
497#define RI_OCTAL 0x04
498#define RI_WORD 0x08
499#define RI_HEAD 0x10
500#define RI_ALPHA 0x20
501#define RI_LOWER 0x40
502#define RI_UPPER 0x80
503#define RI_WHITE 0x100
504
505 static void
506init_class_tab()
507{
508 int i;
509 static int done = FALSE;
510
511 if (done)
512 return;
513
514 for (i = 0; i < 256; ++i)
515 {
516 if (i >= '0' && i <= '7')
517 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
518 else if (i >= '8' && i <= '9')
519 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
520 else if (i >= 'a' && i <= 'f')
521 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
522#ifdef EBCDIC
523 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
524 || (i >= 's' && i <= 'z'))
525#else
526 else if (i >= 'g' && i <= 'z')
527#endif
528 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
529 else if (i >= 'A' && i <= 'F')
530 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
531#ifdef EBCDIC
532 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
533 || (i >= 'S' && i <= 'Z'))
534#else
535 else if (i >= 'G' && i <= 'Z')
536#endif
537 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
538 else if (i == '_')
539 class_tab[i] = RI_WORD + RI_HEAD;
540 else
541 class_tab[i] = 0;
542 }
543 class_tab[' '] |= RI_WHITE;
544 class_tab['\t'] |= RI_WHITE;
545 done = TRUE;
546}
547
548#ifdef FEAT_MBYTE
549# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
550# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
551# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
552# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
553# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
554# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
555# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
556# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
557# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
558#else
559# define ri_digit(c) (class_tab[c] & RI_DIGIT)
560# define ri_hex(c) (class_tab[c] & RI_HEX)
561# define ri_octal(c) (class_tab[c] & RI_OCTAL)
562# define ri_word(c) (class_tab[c] & RI_WORD)
563# define ri_head(c) (class_tab[c] & RI_HEAD)
564# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
565# define ri_lower(c) (class_tab[c] & RI_LOWER)
566# define ri_upper(c) (class_tab[c] & RI_UPPER)
567# define ri_white(c) (class_tab[c] & RI_WHITE)
568#endif
569
570/* flags for regflags */
571#define RF_ICASE 1 /* ignore case */
572#define RF_NOICASE 2 /* don't ignore case */
573#define RF_HASNL 4 /* can match a NL */
574#define RF_ICOMBINE 8 /* ignore combining characters */
575#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
576
577/*
578 * Global work variables for vim_regcomp().
579 */
580
581static char_u *regparse; /* Input-scan pointer. */
582static int prevchr_len; /* byte length of previous char */
583static int num_complex_braces; /* Complex \{...} count */
584static int regnpar; /* () count. */
585#ifdef FEAT_SYN_HL
586static int regnzpar; /* \z() count. */
587static int re_has_z; /* \z item detected */
588#endif
589static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
590static long regsize; /* Code size. */
591static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
592static unsigned regflags; /* RF_ flags for prog */
593static long brace_min[10]; /* Minimums for complex brace repeats */
594static long brace_max[10]; /* Maximums for complex brace repeats */
595static int brace_count[10]; /* Current counts for complex brace repeats */
596#if defined(FEAT_SYN_HL) || defined(PROTO)
597static int had_eol; /* TRUE when EOL found by vim_regcomp() */
598#endif
599static int one_exactly = FALSE; /* only do one char for EXACTLY */
600
601static int reg_magic; /* magicness of the pattern: */
602#define MAGIC_NONE 1 /* "\V" very unmagic */
603#define MAGIC_OFF 2 /* "\M" or 'magic' off */
604#define MAGIC_ON 3 /* "\m" or 'magic' */
605#define MAGIC_ALL 4 /* "\v" very magic */
606
607static int reg_string; /* matching with a string instead of a buffer
608 line */
609
610/*
611 * META contains all characters that may be magic, except '^' and '$'.
612 */
613
614#ifdef EBCDIC
615static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
616#else
617/* META[] is used often enough to justify turning it into a table. */
618static char_u META_flags[] = {
619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
621/* % & ( ) * + . */
622 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
623/* 1 2 3 4 5 6 7 8 9 < = > ? */
624 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
625/* @ A C D F H I K L M O */
626 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
627/* P S U V W X Z [ _ */
628 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
629/* a c d f h i k l m n o */
630 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
631/* p s u v w x z { | ~ */
632 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
633};
634#endif
635
636static int curchr;
637
638/* arguments for reg() */
639#define REG_NOPAREN 0 /* toplevel reg() */
640#define REG_PAREN 1 /* \(\) */
641#define REG_ZPAREN 2 /* \z(\) */
642#define REG_NPAREN 3 /* \%(\) */
643
644/*
645 * Forward declarations for vim_regcomp()'s friends.
646 */
647static void initchr __ARGS((char_u *));
648static int getchr __ARGS((void));
649static void skipchr_keepstart __ARGS((void));
650static int peekchr __ARGS((void));
651static void skipchr __ARGS((void));
652static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000653static int gethexchrs __ARGS((int maxinputlen));
654static int getoctchrs __ARGS((void));
655static int getdecchrs __ARGS((void));
656static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000657static void regcomp_start __ARGS((char_u *expr, int flags));
658static char_u *reg __ARGS((int, int *));
659static char_u *regbranch __ARGS((int *flagp));
660static char_u *regconcat __ARGS((int *flagp));
661static char_u *regpiece __ARGS((int *));
662static char_u *regatom __ARGS((int *));
663static char_u *regnode __ARGS((int));
664static int prog_magic_wrong __ARGS((void));
665static char_u *regnext __ARGS((char_u *));
666static void regc __ARGS((int b));
667#ifdef FEAT_MBYTE
668static void regmbc __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000669#else
670# define regmbc(c) regc(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000671#endif
672static void reginsert __ARGS((int, char_u *));
673static void reginsert_limits __ARGS((int, long, long, char_u *));
674static char_u *re_put_long __ARGS((char_u *pr, long_u val));
675static int read_limits __ARGS((long *, long *));
676static void regtail __ARGS((char_u *, char_u *));
677static void regoptail __ARGS((char_u *, char_u *));
678
679/*
680 * Return TRUE if compiled regular expression "prog" can match a line break.
681 */
682 int
683re_multiline(prog)
684 regprog_T *prog;
685{
686 return (prog->regflags & RF_HASNL);
687}
688
689/*
690 * Return TRUE if compiled regular expression "prog" looks before the start
691 * position (pattern contains "\@<=" or "\@<!").
692 */
693 int
694re_lookbehind(prog)
695 regprog_T *prog;
696{
697 return (prog->regflags & RF_LOOKBH);
698}
699
700/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000701 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
702 * Returns a character representing the class. Zero means that no item was
703 * recognized. Otherwise "pp" is advanced to after the item.
704 */
705 static int
706get_equi_class(pp)
707 char_u **pp;
708{
709 int c;
710 int l = 1;
711 char_u *p = *pp;
712
713 if (p[1] == '=')
714 {
715#ifdef FEAT_MBYTE
716 if (has_mbyte)
717 l = mb_ptr2len_check(p + 2);
718#endif
719 if (p[l + 2] == '=' && p[l + 3] == ']')
720 {
721#ifdef FEAT_MBYTE
722 if (has_mbyte)
723 c = mb_ptr2char(p + 2);
724 else
725#endif
726 c = p[2];
727 *pp += l + 4;
728 return c;
729 }
730 }
731 return 0;
732}
733
734/*
735 * Produce the bytes for equivalence class "c".
736 * Currently only handles latin1, latin9 and utf-8.
737 */
738 static void
739reg_equi_class(c)
740 int c;
741{
742#ifdef FEAT_MBYTE
743 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
744 || STRCMP(p_enc, "latin9") == 0)
745#endif
746 {
747 switch (c)
748 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000749 case 'A': case '\300': case '\301': case '\302':
750 case '\303': case '\304': case '\305':
751 regmbc('A'); regmbc('\300'); regmbc('\301');
752 regmbc('\302'); regmbc('\303'); regmbc('\304');
753 regmbc('\305');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000754 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000755 case 'C': case '\307':
756 regmbc('C'); regmbc('\307');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000757 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000758 case 'E': case '\310': case '\311': case '\312': case '\313':
759 regmbc('E'); regmbc('\310'); regmbc('\311');
760 regmbc('\312'); regmbc('\313');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000761 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000762 case 'I': case '\314': case '\315': case '\316': case '\317':
763 regmbc('I'); regmbc('\314'); regmbc('\315');
764 regmbc('\316'); regmbc('\317');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000765 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000766 case 'N': case '\321':
767 regmbc('N'); regmbc('\321');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000768 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000769 case 'O': case '\322': case '\323': case '\324': case '\325':
770 case '\326':
771 regmbc('O'); regmbc('\322'); regmbc('\323');
772 regmbc('\324'); regmbc('\325'); regmbc('\326');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000773 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000774 case 'U': case '\331': case '\332': case '\333': case '\334':
775 regmbc('U'); regmbc('\331'); regmbc('\332');
776 regmbc('\333'); regmbc('\334');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000777 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000778 case 'Y': case '\335':
779 regmbc('Y'); regmbc('\335');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000780 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000781 case 'a': case '\340': case '\341': case '\342':
782 case '\343': case '\344': case '\345':
783 regmbc('a'); regmbc('\340'); regmbc('\341');
784 regmbc('\342'); regmbc('\343'); regmbc('\344');
785 regmbc('\345');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000786 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000787 case 'c': case '\347':
788 regmbc('c'); regmbc('\347');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000789 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000790 case 'e': case '\350': case '\351': case '\352': case '\353':
791 regmbc('e'); regmbc('\350'); regmbc('\351');
792 regmbc('\352'); regmbc('\353');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000793 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000794 case 'i': case '\354': case '\355': case '\356': case '\357':
795 regmbc('i'); regmbc('\354'); regmbc('\355');
796 regmbc('\356'); regmbc('\357');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000797 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000798 case 'n': case '\361':
799 regmbc('n'); regmbc('\361');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000800 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000801 case 'o': case '\362': case '\363': case '\364': case '\365':
802 case '\366':
803 regmbc('o'); regmbc('\362'); regmbc('\363');
804 regmbc('\364'); regmbc('\365'); regmbc('\366');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000805 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000806 case 'u': case '\371': case '\372': case '\373': case '\374':
807 regmbc('u'); regmbc('\371'); regmbc('\372');
808 regmbc('\373'); regmbc('\374');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000809 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000810 case 'y': case '\375': case '\377':
811 regmbc('y'); regmbc('\375'); regmbc('\377');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000812 return;
813 }
814 }
815 regmbc(c);
816}
817
818/*
819 * Check for a collating element "[.a.]". "pp" points to the '['.
820 * Returns a character. Zero means that no item was recognized. Otherwise
821 * "pp" is advanced to after the item.
822 * Currently only single characters are recognized!
823 */
824 static int
825get_coll_element(pp)
826 char_u **pp;
827{
828 int c;
829 int l = 1;
830 char_u *p = *pp;
831
832 if (p[1] == '.')
833 {
834#ifdef FEAT_MBYTE
835 if (has_mbyte)
836 l = mb_ptr2len_check(p + 2);
837#endif
838 if (p[l + 2] == '.' && p[l + 3] == ']')
839 {
840#ifdef FEAT_MBYTE
841 if (has_mbyte)
842 c = mb_ptr2char(p + 2);
843 else
844#endif
845 c = p[2];
846 *pp += l + 4;
847 return c;
848 }
849 }
850 return 0;
851}
852
853
854/*
855 * Skip over a "[]" range.
856 * "p" must point to the character after the '['.
857 * The returned pointer is on the matching ']', or the terminating NUL.
858 */
859 static char_u *
860skip_anyof(p)
861 char_u *p;
862{
863 int cpo_lit; /* 'cpoptions' contains 'l' flag */
864 int cpo_bsl; /* 'cpoptions' contains '\' flag */
865#ifdef FEAT_MBYTE
866 int l;
867#endif
868
869 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
870 cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
871
872 if (*p == '^') /* Complement of range. */
873 ++p;
874 if (*p == ']' || *p == '-')
875 ++p;
876 while (*p != NUL && *p != ']')
877 {
878#ifdef FEAT_MBYTE
879 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
880 p += l;
881 else
882#endif
883 if (*p == '-')
884 {
885 ++p;
886 if (*p != ']' && *p != NUL)
887 mb_ptr_adv(p);
888 }
889 else if (*p == '\\'
890 && !cpo_bsl
891 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
892 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
893 p += 2;
894 else if (*p == '[')
895 {
896 if (get_char_class(&p) == CLASS_NONE
897 && get_equi_class(&p) == 0
898 && get_coll_element(&p) == 0)
899 ++p; /* It was not a class name */
900 }
901 else
902 ++p;
903 }
904
905 return p;
906}
907
908/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000909 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +0000910 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000911 * Take care of characters with a backslash in front of it.
912 * Skip strings inside [ and ].
913 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
914 * expression and change "\?" to "?". If "*newp" is not NULL the expression
915 * is changed in-place.
916 */
917 char_u *
918skip_regexp(startp, dirc, magic, newp)
919 char_u *startp;
920 int dirc;
921 int magic;
922 char_u **newp;
923{
924 int mymagic;
925 char_u *p = startp;
926
927 if (magic)
928 mymagic = MAGIC_ON;
929 else
930 mymagic = MAGIC_OFF;
931
Bram Moolenaar1cd871b2004-12-19 22:46:22 +0000932 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000933 {
934 if (p[0] == dirc) /* found end of regexp */
935 break;
936 if ((p[0] == '[' && mymagic >= MAGIC_ON)
937 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
938 {
939 p = skip_anyof(p + 1);
940 if (p[0] == NUL)
941 break;
942 }
943 else if (p[0] == '\\' && p[1] != NUL)
944 {
945 if (dirc == '?' && newp != NULL && p[1] == '?')
946 {
947 /* change "\?" to "?", make a copy first. */
948 if (*newp == NULL)
949 {
950 *newp = vim_strsave(startp);
951 if (*newp != NULL)
952 p = *newp + (p - startp);
953 }
954 if (*newp != NULL)
955 mch_memmove(p, p + 1, STRLEN(p));
956 else
957 ++p;
958 }
959 else
960 ++p; /* skip next character */
961 if (*p == 'v')
962 mymagic = MAGIC_ALL;
963 else if (*p == 'V')
964 mymagic = MAGIC_NONE;
965 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000966 }
967 return p;
968}
969
970/*
Bram Moolenaar86b68352004-12-27 21:59:20 +0000971 * vim_regcomp() - compile a regular expression into internal code
972 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000973 *
974 * We can't allocate space until we know how big the compiled form will be,
975 * but we can't compile it (and thus know how big it is) until we've got a
976 * place to put the code. So we cheat: we compile it twice, once with code
977 * generation turned off and size counting turned on, and once "for real".
978 * This also means that we don't allocate space until we are sure that the
979 * thing really will compile successfully, and we never have to move the
980 * code and thus invalidate pointers into it. (Note that it has to be in
981 * one piece because vim_free() must be able to free it all.)
982 *
983 * Whether upper/lower case is to be ignored is decided when executing the
984 * program, it does not matter here.
985 *
986 * Beware that the optimization-preparation code in here knows about some
987 * of the structure of the compiled regexp.
988 * "re_flags": RE_MAGIC and/or RE_STRING.
989 */
990 regprog_T *
991vim_regcomp(expr, re_flags)
992 char_u *expr;
993 int re_flags;
994{
995 regprog_T *r;
996 char_u *scan;
997 char_u *longest;
998 int len;
999 int flags;
1000
1001 if (expr == NULL)
1002 EMSG_RET_NULL(_(e_null));
1003
1004 init_class_tab();
1005
1006 /*
1007 * First pass: determine size, legality.
1008 */
1009 regcomp_start(expr, re_flags);
1010 regcode = JUST_CALC_SIZE;
1011 regc(REGMAGIC);
1012 if (reg(REG_NOPAREN, &flags) == NULL)
1013 return NULL;
1014
1015 /* Small enough for pointer-storage convention? */
1016#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1017 if (regsize >= 65536L - 256L)
1018 EMSG_RET_NULL(_("E339: Pattern too long"));
1019#endif
1020
1021 /* Allocate space. */
1022 r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1023 if (r == NULL)
1024 return NULL;
1025
1026 /*
1027 * Second pass: emit code.
1028 */
1029 regcomp_start(expr, re_flags);
1030 regcode = r->program;
1031 regc(REGMAGIC);
1032 if (reg(REG_NOPAREN, &flags) == NULL)
1033 {
1034 vim_free(r);
1035 return NULL;
1036 }
1037
1038 /* Dig out information for optimizations. */
1039 r->regstart = NUL; /* Worst-case defaults. */
1040 r->reganch = 0;
1041 r->regmust = NULL;
1042 r->regmlen = 0;
1043 r->regflags = regflags;
1044 if (flags & HASNL)
1045 r->regflags |= RF_HASNL;
1046 if (flags & HASLOOKBH)
1047 r->regflags |= RF_LOOKBH;
1048#ifdef FEAT_SYN_HL
1049 /* Remember whether this pattern has any \z specials in it. */
1050 r->reghasz = re_has_z;
1051#endif
1052 scan = r->program + 1; /* First BRANCH. */
1053 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1054 {
1055 scan = OPERAND(scan);
1056
1057 /* Starting-point info. */
1058 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1059 {
1060 r->reganch++;
1061 scan = regnext(scan);
1062 }
1063
1064 if (OP(scan) == EXACTLY)
1065 {
1066#ifdef FEAT_MBYTE
1067 if (has_mbyte)
1068 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1069 else
1070#endif
1071 r->regstart = *OPERAND(scan);
1072 }
1073 else if ((OP(scan) == BOW
1074 || OP(scan) == EOW
1075 || OP(scan) == NOTHING
1076 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1077 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1078 && OP(regnext(scan)) == EXACTLY)
1079 {
1080#ifdef FEAT_MBYTE
1081 if (has_mbyte)
1082 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1083 else
1084#endif
1085 r->regstart = *OPERAND(regnext(scan));
1086 }
1087
1088 /*
1089 * If there's something expensive in the r.e., find the longest
1090 * literal string that must appear and make it the regmust. Resolve
1091 * ties in favor of later strings, since the regstart check works
1092 * with the beginning of the r.e. and avoiding duplication
1093 * strengthens checking. Not a strong reason, but sufficient in the
1094 * absence of others.
1095 */
1096 /*
1097 * When the r.e. starts with BOW, it is faster to look for a regmust
1098 * first. Used a lot for "#" and "*" commands. (Added by mool).
1099 */
1100 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1101 && !(flags & HASNL))
1102 {
1103 longest = NULL;
1104 len = 0;
1105 for (; scan != NULL; scan = regnext(scan))
1106 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1107 {
1108 longest = OPERAND(scan);
1109 len = (int)STRLEN(OPERAND(scan));
1110 }
1111 r->regmust = longest;
1112 r->regmlen = len;
1113 }
1114 }
1115#ifdef DEBUG
1116 regdump(expr, r);
1117#endif
1118 return r;
1119}
1120
1121/*
1122 * Setup to parse the regexp. Used once to get the length and once to do it.
1123 */
1124 static void
1125regcomp_start(expr, re_flags)
1126 char_u *expr;
1127 int re_flags; /* see vim_regcomp() */
1128{
1129 initchr(expr);
1130 if (re_flags & RE_MAGIC)
1131 reg_magic = MAGIC_ON;
1132 else
1133 reg_magic = MAGIC_OFF;
1134 reg_string = (re_flags & RE_STRING);
1135
1136 num_complex_braces = 0;
1137 regnpar = 1;
1138 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1139#ifdef FEAT_SYN_HL
1140 regnzpar = 1;
1141 re_has_z = 0;
1142#endif
1143 regsize = 0L;
1144 regflags = 0;
1145#if defined(FEAT_SYN_HL) || defined(PROTO)
1146 had_eol = FALSE;
1147#endif
1148}
1149
1150#if defined(FEAT_SYN_HL) || defined(PROTO)
1151/*
1152 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1153 * found. This is messy, but it works fine.
1154 */
1155 int
1156vim_regcomp_had_eol()
1157{
1158 return had_eol;
1159}
1160#endif
1161
1162/*
1163 * reg - regular expression, i.e. main body or parenthesized thing
1164 *
1165 * Caller must absorb opening parenthesis.
1166 *
1167 * Combining parenthesis handling with the base level of regular expression
1168 * is a trifle forced, but the need to tie the tails of the branches to what
1169 * follows makes it hard to avoid.
1170 */
1171 static char_u *
1172reg(paren, flagp)
1173 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1174 int *flagp;
1175{
1176 char_u *ret;
1177 char_u *br;
1178 char_u *ender;
1179 int parno = 0;
1180 int flags;
1181
1182 *flagp = HASWIDTH; /* Tentatively. */
1183
1184#ifdef FEAT_SYN_HL
1185 if (paren == REG_ZPAREN)
1186 {
1187 /* Make a ZOPEN node. */
1188 if (regnzpar >= NSUBEXP)
1189 EMSG_RET_NULL(_("E50: Too many \\z("));
1190 parno = regnzpar;
1191 regnzpar++;
1192 ret = regnode(ZOPEN + parno);
1193 }
1194 else
1195#endif
1196 if (paren == REG_PAREN)
1197 {
1198 /* Make a MOPEN node. */
1199 if (regnpar >= NSUBEXP)
1200 EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1201 parno = regnpar;
1202 ++regnpar;
1203 ret = regnode(MOPEN + parno);
1204 }
1205 else if (paren == REG_NPAREN)
1206 {
1207 /* Make a NOPEN node. */
1208 ret = regnode(NOPEN);
1209 }
1210 else
1211 ret = NULL;
1212
1213 /* Pick up the branches, linking them together. */
1214 br = regbranch(&flags);
1215 if (br == NULL)
1216 return NULL;
1217 if (ret != NULL)
1218 regtail(ret, br); /* [MZ]OPEN -> first. */
1219 else
1220 ret = br;
1221 /* If one of the branches can be zero-width, the whole thing can.
1222 * If one of the branches has * at start or matches a line-break, the
1223 * whole thing can. */
1224 if (!(flags & HASWIDTH))
1225 *flagp &= ~HASWIDTH;
1226 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1227 while (peekchr() == Magic('|'))
1228 {
1229 skipchr();
1230 br = regbranch(&flags);
1231 if (br == NULL)
1232 return NULL;
1233 regtail(ret, br); /* BRANCH -> BRANCH. */
1234 if (!(flags & HASWIDTH))
1235 *flagp &= ~HASWIDTH;
1236 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1237 }
1238
1239 /* Make a closing node, and hook it on the end. */
1240 ender = regnode(
1241#ifdef FEAT_SYN_HL
1242 paren == REG_ZPAREN ? ZCLOSE + parno :
1243#endif
1244 paren == REG_PAREN ? MCLOSE + parno :
1245 paren == REG_NPAREN ? NCLOSE : END);
1246 regtail(ret, ender);
1247
1248 /* Hook the tails of the branches to the closing node. */
1249 for (br = ret; br != NULL; br = regnext(br))
1250 regoptail(br, ender);
1251
1252 /* Check for proper termination. */
1253 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1254 {
1255#ifdef FEAT_SYN_HL
1256 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001257 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001258 else
1259#endif
1260 if (paren == REG_NPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001261 EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001262 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001263 EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001264 }
1265 else if (paren == REG_NOPAREN && peekchr() != NUL)
1266 {
1267 if (curchr == Magic(')'))
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001268 EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001269 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001270 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 /* NOTREACHED */
1272 }
1273 /*
1274 * Here we set the flag allowing back references to this set of
1275 * parentheses.
1276 */
1277 if (paren == REG_PAREN)
1278 had_endbrace[parno] = TRUE; /* have seen the close paren */
1279 return ret;
1280}
1281
1282/*
1283 * regbranch - one alternative of an | operator
1284 *
1285 * Implements the & operator.
1286 */
1287 static char_u *
1288regbranch(flagp)
1289 int *flagp;
1290{
1291 char_u *ret;
1292 char_u *chain = NULL;
1293 char_u *latest;
1294 int flags;
1295
1296 *flagp = WORST | HASNL; /* Tentatively. */
1297
1298 ret = regnode(BRANCH);
1299 for (;;)
1300 {
1301 latest = regconcat(&flags);
1302 if (latest == NULL)
1303 return NULL;
1304 /* If one of the branches has width, the whole thing has. If one of
1305 * the branches anchors at start-of-line, the whole thing does.
1306 * If one of the branches uses look-behind, the whole thing does. */
1307 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1308 /* If one of the branches doesn't match a line-break, the whole thing
1309 * doesn't. */
1310 *flagp &= ~HASNL | (flags & HASNL);
1311 if (chain != NULL)
1312 regtail(chain, latest);
1313 if (peekchr() != Magic('&'))
1314 break;
1315 skipchr();
1316 regtail(latest, regnode(END)); /* operand ends */
1317 reginsert(MATCH, latest);
1318 chain = latest;
1319 }
1320
1321 return ret;
1322}
1323
1324/*
1325 * regbranch - one alternative of an | or & operator
1326 *
1327 * Implements the concatenation operator.
1328 */
1329 static char_u *
1330regconcat(flagp)
1331 int *flagp;
1332{
1333 char_u *first = NULL;
1334 char_u *chain = NULL;
1335 char_u *latest;
1336 int flags;
1337 int cont = TRUE;
1338
1339 *flagp = WORST; /* Tentatively. */
1340
1341 while (cont)
1342 {
1343 switch (peekchr())
1344 {
1345 case NUL:
1346 case Magic('|'):
1347 case Magic('&'):
1348 case Magic(')'):
1349 cont = FALSE;
1350 break;
1351 case Magic('Z'):
1352#ifdef FEAT_MBYTE
1353 regflags |= RF_ICOMBINE;
1354#endif
1355 skipchr_keepstart();
1356 break;
1357 case Magic('c'):
1358 regflags |= RF_ICASE;
1359 skipchr_keepstart();
1360 break;
1361 case Magic('C'):
1362 regflags |= RF_NOICASE;
1363 skipchr_keepstart();
1364 break;
1365 case Magic('v'):
1366 reg_magic = MAGIC_ALL;
1367 skipchr_keepstart();
1368 curchr = -1;
1369 break;
1370 case Magic('m'):
1371 reg_magic = MAGIC_ON;
1372 skipchr_keepstart();
1373 curchr = -1;
1374 break;
1375 case Magic('M'):
1376 reg_magic = MAGIC_OFF;
1377 skipchr_keepstart();
1378 curchr = -1;
1379 break;
1380 case Magic('V'):
1381 reg_magic = MAGIC_NONE;
1382 skipchr_keepstart();
1383 curchr = -1;
1384 break;
1385 default:
1386 latest = regpiece(&flags);
1387 if (latest == NULL)
1388 return NULL;
1389 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1390 if (chain == NULL) /* First piece. */
1391 *flagp |= flags & SPSTART;
1392 else
1393 regtail(chain, latest);
1394 chain = latest;
1395 if (first == NULL)
1396 first = latest;
1397 break;
1398 }
1399 }
1400 if (first == NULL) /* Loop ran zero times. */
1401 first = regnode(NOTHING);
1402 return first;
1403}
1404
1405/*
1406 * regpiece - something followed by possible [*+=]
1407 *
1408 * Note that the branching code sequences used for = and the general cases
1409 * of * and + are somewhat optimized: they use the same NOTHING node as
1410 * both the endmarker for their branch list and the body of the last branch.
1411 * It might seem that this node could be dispensed with entirely, but the
1412 * endmarker role is not redundant.
1413 */
1414 static char_u *
1415regpiece(flagp)
1416 int *flagp;
1417{
1418 char_u *ret;
1419 int op;
1420 char_u *next;
1421 int flags;
1422 long minval;
1423 long maxval;
1424
1425 ret = regatom(&flags);
1426 if (ret == NULL)
1427 return NULL;
1428
1429 op = peekchr();
1430 if (re_multi_type(op) == NOT_MULTI)
1431 {
1432 *flagp = flags;
1433 return ret;
1434 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001435 /* default flags */
1436 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1437
1438 skipchr();
1439 switch (op)
1440 {
1441 case Magic('*'):
1442 if (flags & SIMPLE)
1443 reginsert(STAR, ret);
1444 else
1445 {
1446 /* Emit x* as (x&|), where & means "self". */
1447 reginsert(BRANCH, ret); /* Either x */
1448 regoptail(ret, regnode(BACK)); /* and loop */
1449 regoptail(ret, ret); /* back */
1450 regtail(ret, regnode(BRANCH)); /* or */
1451 regtail(ret, regnode(NOTHING)); /* null. */
1452 }
1453 break;
1454
1455 case Magic('+'):
1456 if (flags & SIMPLE)
1457 reginsert(PLUS, ret);
1458 else
1459 {
1460 /* Emit x+ as x(&|), where & means "self". */
1461 next = regnode(BRANCH); /* Either */
1462 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001463 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001464 regtail(next, regnode(BRANCH)); /* or */
1465 regtail(ret, regnode(NOTHING)); /* null. */
1466 }
1467 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1468 break;
1469
1470 case Magic('@'):
1471 {
1472 int lop = END;
1473
1474 switch (no_Magic(getchr()))
1475 {
1476 case '=': lop = MATCH; break; /* \@= */
1477 case '!': lop = NOMATCH; break; /* \@! */
1478 case '>': lop = SUBPAT; break; /* \@> */
1479 case '<': switch (no_Magic(getchr()))
1480 {
1481 case '=': lop = BEHIND; break; /* \@<= */
1482 case '!': lop = NOBEHIND; break; /* \@<! */
1483 }
1484 }
1485 if (lop == END)
1486 EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1487 reg_magic == MAGIC_ALL);
1488 /* Look behind must match with behind_pos. */
1489 if (lop == BEHIND || lop == NOBEHIND)
1490 {
1491 regtail(ret, regnode(BHPOS));
1492 *flagp |= HASLOOKBH;
1493 }
1494 regtail(ret, regnode(END)); /* operand ends */
1495 reginsert(lop, ret);
1496 break;
1497 }
1498
1499 case Magic('?'):
1500 case Magic('='):
1501 /* Emit x= as (x|) */
1502 reginsert(BRANCH, ret); /* Either x */
1503 regtail(ret, regnode(BRANCH)); /* or */
1504 next = regnode(NOTHING); /* null. */
1505 regtail(ret, next);
1506 regoptail(ret, next);
1507 break;
1508
1509 case Magic('{'):
1510 if (!read_limits(&minval, &maxval))
1511 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001512 if (flags & SIMPLE)
1513 {
1514 reginsert(BRACE_SIMPLE, ret);
1515 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1516 }
1517 else
1518 {
1519 if (num_complex_braces >= 10)
1520 EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1521 reg_magic == MAGIC_ALL);
1522 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1523 regoptail(ret, regnode(BACK));
1524 regoptail(ret, ret);
1525 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1526 ++num_complex_braces;
1527 }
1528 if (minval > 0 && maxval > 0)
1529 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1530 break;
1531 }
1532 if (re_multi_type(peekchr()) != NOT_MULTI)
1533 {
1534 /* Can't have a multi follow a multi. */
1535 if (peekchr() == Magic('*'))
1536 sprintf((char *)IObuff, _("E61: Nested %s*"),
1537 reg_magic >= MAGIC_ON ? "" : "\\");
1538 else
1539 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1540 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1541 EMSG_RET_NULL(IObuff);
1542 }
1543
1544 return ret;
1545}
1546
1547/*
1548 * regatom - the lowest level
1549 *
1550 * Optimization: gobbles an entire sequence of ordinary characters so that
1551 * it can turn them into a single node, which is smaller to store and
1552 * faster to run. Don't do this when one_exactly is set.
1553 */
1554 static char_u *
1555regatom(flagp)
1556 int *flagp;
1557{
1558 char_u *ret;
1559 int flags;
1560 int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001561 int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001562 int c;
1563 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1564 static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1565 FNAME, SFNAME, PRINT, SPRINT,
1566 WHITE, NWHITE, DIGIT, NDIGIT,
1567 HEX, NHEX, OCTAL, NOCTAL,
1568 WORD, NWORD, HEAD, NHEAD,
1569 ALPHA, NALPHA, LOWER, NLOWER,
1570 UPPER, NUPPER
1571 };
1572 char_u *p;
1573 int extra = 0;
1574
1575 *flagp = WORST; /* Tentatively. */
1576 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
Bram Moolenaardf177f62005-02-22 08:39:57 +00001577 cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001578
1579 c = getchr();
1580 switch (c)
1581 {
1582 case Magic('^'):
1583 ret = regnode(BOL);
1584 break;
1585
1586 case Magic('$'):
1587 ret = regnode(EOL);
1588#if defined(FEAT_SYN_HL) || defined(PROTO)
1589 had_eol = TRUE;
1590#endif
1591 break;
1592
1593 case Magic('<'):
1594 ret = regnode(BOW);
1595 break;
1596
1597 case Magic('>'):
1598 ret = regnode(EOW);
1599 break;
1600
1601 case Magic('_'):
1602 c = no_Magic(getchr());
1603 if (c == '^') /* "\_^" is start-of-line */
1604 {
1605 ret = regnode(BOL);
1606 break;
1607 }
1608 if (c == '$') /* "\_$" is end-of-line */
1609 {
1610 ret = regnode(EOL);
1611#if defined(FEAT_SYN_HL) || defined(PROTO)
1612 had_eol = TRUE;
1613#endif
1614 break;
1615 }
1616
1617 extra = ADD_NL;
1618 *flagp |= HASNL;
1619
1620 /* "\_[" is character range plus newline */
1621 if (c == '[')
1622 goto collection;
1623
1624 /* "\_x" is character class plus newline */
1625 /*FALLTHROUGH*/
1626
1627 /*
1628 * Character classes.
1629 */
1630 case Magic('.'):
1631 case Magic('i'):
1632 case Magic('I'):
1633 case Magic('k'):
1634 case Magic('K'):
1635 case Magic('f'):
1636 case Magic('F'):
1637 case Magic('p'):
1638 case Magic('P'):
1639 case Magic('s'):
1640 case Magic('S'):
1641 case Magic('d'):
1642 case Magic('D'):
1643 case Magic('x'):
1644 case Magic('X'):
1645 case Magic('o'):
1646 case Magic('O'):
1647 case Magic('w'):
1648 case Magic('W'):
1649 case Magic('h'):
1650 case Magic('H'):
1651 case Magic('a'):
1652 case Magic('A'):
1653 case Magic('l'):
1654 case Magic('L'):
1655 case Magic('u'):
1656 case Magic('U'):
1657 p = vim_strchr(classchars, no_Magic(c));
1658 if (p == NULL)
1659 EMSG_RET_NULL(_("E63: invalid use of \\_"));
1660 ret = regnode(classcodes[p - classchars] + extra);
1661 *flagp |= HASWIDTH | SIMPLE;
1662 break;
1663
1664 case Magic('n'):
1665 if (reg_string)
1666 {
1667 /* In a string "\n" matches a newline character. */
1668 ret = regnode(EXACTLY);
1669 regc(NL);
1670 regc(NUL);
1671 *flagp |= HASWIDTH | SIMPLE;
1672 }
1673 else
1674 {
1675 /* In buffer text "\n" matches the end of a line. */
1676 ret = regnode(NEWL);
1677 *flagp |= HASWIDTH | HASNL;
1678 }
1679 break;
1680
1681 case Magic('('):
1682 if (one_exactly)
1683 EMSG_ONE_RET_NULL;
1684 ret = reg(REG_PAREN, &flags);
1685 if (ret == NULL)
1686 return NULL;
1687 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1688 break;
1689
1690 case NUL:
1691 case Magic('|'):
1692 case Magic('&'):
1693 case Magic(')'):
1694 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
1695 /* NOTREACHED */
1696
1697 case Magic('='):
1698 case Magic('?'):
1699 case Magic('+'):
1700 case Magic('@'):
1701 case Magic('{'):
1702 case Magic('*'):
1703 c = no_Magic(c);
1704 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1705 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1706 ? "" : "\\", c);
1707 EMSG_RET_NULL(IObuff);
1708 /* NOTREACHED */
1709
1710 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00001711 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001712 {
1713 char_u *lp;
1714
1715 ret = regnode(EXACTLY);
1716 lp = reg_prev_sub;
1717 while (*lp != NUL)
1718 regc(*lp++);
1719 regc(NUL);
1720 if (*reg_prev_sub != NUL)
1721 {
1722 *flagp |= HASWIDTH;
1723 if ((lp - reg_prev_sub) == 1)
1724 *flagp |= SIMPLE;
1725 }
1726 }
1727 else
1728 EMSG_RET_NULL(_(e_nopresub));
1729 break;
1730
1731 case Magic('1'):
1732 case Magic('2'):
1733 case Magic('3'):
1734 case Magic('4'):
1735 case Magic('5'):
1736 case Magic('6'):
1737 case Magic('7'):
1738 case Magic('8'):
1739 case Magic('9'):
1740 {
1741 int refnum;
1742
1743 refnum = c - Magic('0');
1744 /*
1745 * Check if the back reference is legal. We must have seen the
1746 * close brace.
1747 * TODO: Should also check that we don't refer to something
1748 * that is repeated (+*=): what instance of the repetition
1749 * should we match?
1750 */
1751 if (!had_endbrace[refnum])
1752 {
1753 /* Trick: check if "@<=" or "@<!" follows, in which case
1754 * the \1 can appear before the referenced match. */
1755 for (p = regparse; *p != NUL; ++p)
1756 if (p[0] == '@' && p[1] == '<'
1757 && (p[2] == '!' || p[2] == '='))
1758 break;
1759 if (*p == NUL)
1760 EMSG_RET_NULL(_("E65: Illegal back reference"));
1761 }
1762 ret = regnode(BACKREF + refnum);
1763 }
1764 break;
1765
1766#ifdef FEAT_SYN_HL
1767 case Magic('z'):
1768 {
1769 c = no_Magic(getchr());
1770 switch (c)
1771 {
1772 case '(': if (reg_do_extmatch != REX_SET)
1773 EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1774 if (one_exactly)
1775 EMSG_ONE_RET_NULL;
1776 ret = reg(REG_ZPAREN, &flags);
1777 if (ret == NULL)
1778 return NULL;
1779 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1780 re_has_z = REX_SET;
1781 break;
1782
1783 case '1':
1784 case '2':
1785 case '3':
1786 case '4':
1787 case '5':
1788 case '6':
1789 case '7':
1790 case '8':
1791 case '9': if (reg_do_extmatch != REX_USE)
1792 EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1793 ret = regnode(ZREF + c - '0');
1794 re_has_z = REX_USE;
1795 break;
1796
1797 case 's': ret = regnode(MOPEN + 0);
1798 break;
1799
1800 case 'e': ret = regnode(MCLOSE + 0);
1801 break;
1802
1803 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1804 }
1805 }
1806 break;
1807#endif
1808
1809 case Magic('%'):
1810 {
1811 c = no_Magic(getchr());
1812 switch (c)
1813 {
1814 /* () without a back reference */
1815 case '(':
1816 if (one_exactly)
1817 EMSG_ONE_RET_NULL;
1818 ret = reg(REG_NPAREN, &flags);
1819 if (ret == NULL)
1820 return NULL;
1821 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1822 break;
1823
1824 /* Catch \%^ and \%$ regardless of where they appear in the
1825 * pattern -- regardless of whether or not it makes sense. */
1826 case '^':
1827 ret = regnode(RE_BOF);
1828 break;
1829
1830 case '$':
1831 ret = regnode(RE_EOF);
1832 break;
1833
1834 case '#':
1835 ret = regnode(CURSOR);
1836 break;
1837
1838 /* \%[abc]: Emit as a list of branches, all ending at the last
1839 * branch which matches nothing. */
1840 case '[':
1841 if (one_exactly) /* doesn't nest */
1842 EMSG_ONE_RET_NULL;
1843 {
1844 char_u *lastbranch;
1845 char_u *lastnode = NULL;
1846 char_u *br;
1847
1848 ret = NULL;
1849 while ((c = getchr()) != ']')
1850 {
1851 if (c == NUL)
1852 EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1853 reg_magic == MAGIC_ALL);
1854 br = regnode(BRANCH);
1855 if (ret == NULL)
1856 ret = br;
1857 else
1858 regtail(lastnode, br);
1859
1860 ungetchr();
1861 one_exactly = TRUE;
1862 lastnode = regatom(flagp);
1863 one_exactly = FALSE;
1864 if (lastnode == NULL)
1865 return NULL;
1866 }
1867 if (ret == NULL)
1868 EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1869 reg_magic == MAGIC_ALL);
1870 lastbranch = regnode(BRANCH);
1871 br = regnode(NOTHING);
1872 if (ret != JUST_CALC_SIZE)
1873 {
1874 regtail(lastnode, br);
1875 regtail(lastbranch, br);
1876 /* connect all branches to the NOTHING
1877 * branch at the end */
1878 for (br = ret; br != lastnode; )
1879 {
1880 if (OP(br) == BRANCH)
1881 {
1882 regtail(br, lastbranch);
1883 br = OPERAND(br);
1884 }
1885 else
1886 br = regnext(br);
1887 }
1888 }
1889 *flagp &= ~HASWIDTH;
1890 break;
1891 }
1892
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001893 case 'd': /* %d123 decimal */
1894 case 'o': /* %o123 octal */
1895 case 'x': /* %xab hex 2 */
1896 case 'u': /* %uabcd hex 4 */
1897 case 'U': /* %U1234abcd hex 8 */
1898 {
1899 int i;
1900
1901 switch (c)
1902 {
1903 case 'd': i = getdecchrs(); break;
1904 case 'o': i = getoctchrs(); break;
1905 case 'x': i = gethexchrs(2); break;
1906 case 'u': i = gethexchrs(4); break;
1907 case 'U': i = gethexchrs(8); break;
1908 default: i = -1; break;
1909 }
1910
1911 if (i < 0)
1912 EMSG_M_RET_NULL(
1913 _("E678: Invalid character after %s%%[dxouU]"),
1914 reg_magic == MAGIC_ALL);
1915 ret = regnode(EXACTLY);
1916 if (i == 0)
1917 regc(0x0a);
1918 else
1919#ifdef FEAT_MBYTE
1920 regmbc(i);
1921#else
1922 regc(i);
1923#endif
1924 regc(NUL);
1925 *flagp |= HASWIDTH;
1926 break;
1927 }
1928
Bram Moolenaar071d4272004-06-13 20:20:40 +00001929 default:
1930 if (VIM_ISDIGIT(c) || c == '<' || c == '>')
1931 {
1932 long_u n = 0;
1933 int cmp;
1934
1935 cmp = c;
1936 if (cmp == '<' || cmp == '>')
1937 c = getchr();
1938 while (VIM_ISDIGIT(c))
1939 {
1940 n = n * 10 + (c - '0');
1941 c = getchr();
1942 }
1943 if (c == 'l' || c == 'c' || c == 'v')
1944 {
1945 if (c == 'l')
1946 ret = regnode(RE_LNUM);
1947 else if (c == 'c')
1948 ret = regnode(RE_COL);
1949 else
1950 ret = regnode(RE_VCOL);
1951 if (ret == JUST_CALC_SIZE)
1952 regsize += 5;
1953 else
1954 {
1955 /* put the number and the optional
1956 * comparator after the opcode */
1957 regcode = re_put_long(regcode, n);
1958 *regcode++ = cmp;
1959 }
1960 break;
1961 }
1962 }
1963
1964 EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
1965 reg_magic == MAGIC_ALL);
1966 }
1967 }
1968 break;
1969
1970 case Magic('['):
1971collection:
1972 {
1973 char_u *lp;
1974
1975 /*
1976 * If there is no matching ']', we assume the '[' is a normal
1977 * character. This makes 'incsearch' and ":help [" work.
1978 */
1979 lp = skip_anyof(regparse);
1980 if (*lp == ']') /* there is a matching ']' */
1981 {
1982 int startc = -1; /* > 0 when next '-' is a range */
1983 int endc;
1984
1985 /*
1986 * In a character class, different parsing rules apply.
1987 * Not even \ is special anymore, nothing is.
1988 */
1989 if (*regparse == '^') /* Complement of range. */
1990 {
1991 ret = regnode(ANYBUT + extra);
1992 regparse++;
1993 }
1994 else
1995 ret = regnode(ANYOF + extra);
1996
1997 /* At the start ']' and '-' mean the literal character. */
1998 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00001999 {
2000 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002001 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002002 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002003
2004 while (*regparse != NUL && *regparse != ']')
2005 {
2006 if (*regparse == '-')
2007 {
2008 ++regparse;
2009 /* The '-' is not used for a range at the end and
2010 * after or before a '\n'. */
2011 if (*regparse == ']' || *regparse == NUL
2012 || startc == -1
2013 || (regparse[0] == '\\' && regparse[1] == 'n'))
2014 {
2015 regc('-');
2016 startc = '-'; /* [--x] is a range */
2017 }
2018 else
2019 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002020 /* Also accept "a-[.z.]" */
2021 endc = 0;
2022 if (*regparse == '[')
2023 endc = get_coll_element(&regparse);
2024 if (endc == 0)
2025 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002026#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002027 if (has_mbyte)
2028 endc = mb_ptr2char_adv(&regparse);
2029 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002030#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002031 endc = *regparse++;
2032 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002033
2034 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002035 if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002036 endc = coll_get_char();
2037
Bram Moolenaar071d4272004-06-13 20:20:40 +00002038 if (startc > endc)
2039 EMSG_RET_NULL(_(e_invrange));
2040#ifdef FEAT_MBYTE
2041 if (has_mbyte && ((*mb_char2len)(startc) > 1
2042 || (*mb_char2len)(endc) > 1))
2043 {
2044 /* Limit to a range of 256 chars */
2045 if (endc > startc + 256)
2046 EMSG_RET_NULL(_(e_invrange));
2047 while (++startc <= endc)
2048 regmbc(startc);
2049 }
2050 else
2051#endif
2052 {
2053#ifdef EBCDIC
2054 int alpha_only = FALSE;
2055
2056 /* for alphabetical range skip the gaps
2057 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2058 if (isalpha(startc) && isalpha(endc))
2059 alpha_only = TRUE;
2060#endif
2061 while (++startc <= endc)
2062#ifdef EBCDIC
2063 if (!alpha_only || isalpha(startc))
2064#endif
2065 regc(startc);
2066 }
2067 startc = -1;
2068 }
2069 }
2070 /*
2071 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2072 * accepts "\t", "\e", etc., but only when the 'l' flag in
2073 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002074 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002075 */
2076 else if (*regparse == '\\'
Bram Moolenaardf177f62005-02-22 08:39:57 +00002077 && !cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002078 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2079 || (!cpo_lit
2080 && vim_strchr(REGEXP_ABBR,
2081 regparse[1]) != NULL)))
2082 {
2083 regparse++;
2084 if (*regparse == 'n')
2085 {
2086 /* '\n' in range: also match NL */
2087 if (ret != JUST_CALC_SIZE)
2088 {
2089 if (*ret == ANYBUT)
2090 *ret = ANYBUT + ADD_NL;
2091 else if (*ret == ANYOF)
2092 *ret = ANYOF + ADD_NL;
2093 /* else: must have had a \n already */
2094 }
2095 *flagp |= HASNL;
2096 regparse++;
2097 startc = -1;
2098 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002099 else if (*regparse == 'd'
2100 || *regparse == 'o'
2101 || *regparse == 'x'
2102 || *regparse == 'u'
2103 || *regparse == 'U')
2104 {
2105 startc = coll_get_char();
2106 if (startc == 0)
2107 regc(0x0a);
2108 else
2109#ifdef FEAT_MBYTE
2110 regmbc(startc);
2111#else
2112 regc(startc);
2113#endif
2114 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002115 else
2116 {
2117 startc = backslash_trans(*regparse++);
2118 regc(startc);
2119 }
2120 }
2121 else if (*regparse == '[')
2122 {
2123 int c_class;
2124 int cu;
2125
Bram Moolenaardf177f62005-02-22 08:39:57 +00002126 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002127 startc = -1;
2128 /* Characters assumed to be 8 bits! */
2129 switch (c_class)
2130 {
2131 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002132 c_class = get_equi_class(&regparse);
2133 if (c_class != 0)
2134 {
2135 /* produce equivalence class */
2136 reg_equi_class(c_class);
2137 }
2138 else if ((c_class =
2139 get_coll_element(&regparse)) != 0)
2140 {
2141 /* produce a collating element */
2142 regmbc(c_class);
2143 }
2144 else
2145 {
2146 /* literal '[', allow [[-x] as a range */
2147 startc = *regparse++;
2148 regc(startc);
2149 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002150 break;
2151 case CLASS_ALNUM:
2152 for (cu = 1; cu <= 255; cu++)
2153 if (isalnum(cu))
2154 regc(cu);
2155 break;
2156 case CLASS_ALPHA:
2157 for (cu = 1; cu <= 255; cu++)
2158 if (isalpha(cu))
2159 regc(cu);
2160 break;
2161 case CLASS_BLANK:
2162 regc(' ');
2163 regc('\t');
2164 break;
2165 case CLASS_CNTRL:
2166 for (cu = 1; cu <= 255; cu++)
2167 if (iscntrl(cu))
2168 regc(cu);
2169 break;
2170 case CLASS_DIGIT:
2171 for (cu = 1; cu <= 255; cu++)
2172 if (VIM_ISDIGIT(cu))
2173 regc(cu);
2174 break;
2175 case CLASS_GRAPH:
2176 for (cu = 1; cu <= 255; cu++)
2177 if (isgraph(cu))
2178 regc(cu);
2179 break;
2180 case CLASS_LOWER:
2181 for (cu = 1; cu <= 255; cu++)
2182 if (islower(cu))
2183 regc(cu);
2184 break;
2185 case CLASS_PRINT:
2186 for (cu = 1; cu <= 255; cu++)
2187 if (vim_isprintc(cu))
2188 regc(cu);
2189 break;
2190 case CLASS_PUNCT:
2191 for (cu = 1; cu <= 255; cu++)
2192 if (ispunct(cu))
2193 regc(cu);
2194 break;
2195 case CLASS_SPACE:
2196 for (cu = 9; cu <= 13; cu++)
2197 regc(cu);
2198 regc(' ');
2199 break;
2200 case CLASS_UPPER:
2201 for (cu = 1; cu <= 255; cu++)
2202 if (isupper(cu))
2203 regc(cu);
2204 break;
2205 case CLASS_XDIGIT:
2206 for (cu = 1; cu <= 255; cu++)
2207 if (vim_isxdigit(cu))
2208 regc(cu);
2209 break;
2210 case CLASS_TAB:
2211 regc('\t');
2212 break;
2213 case CLASS_RETURN:
2214 regc('\r');
2215 break;
2216 case CLASS_BACKSPACE:
2217 regc('\b');
2218 break;
2219 case CLASS_ESCAPE:
2220 regc('\033');
2221 break;
2222 }
2223 }
2224 else
2225 {
2226#ifdef FEAT_MBYTE
2227 if (has_mbyte)
2228 {
2229 int len;
2230
2231 /* produce a multibyte character, including any
2232 * following composing characters */
2233 startc = mb_ptr2char(regparse);
2234 len = (*mb_ptr2len_check)(regparse);
2235 if (enc_utf8 && utf_char2len(startc) != len)
2236 startc = -1; /* composing chars */
2237 while (--len >= 0)
2238 regc(*regparse++);
2239 }
2240 else
2241#endif
2242 {
2243 startc = *regparse++;
2244 regc(startc);
2245 }
2246 }
2247 }
2248 regc(NUL);
2249 prevchr_len = 1; /* last char was the ']' */
2250 if (*regparse != ']')
2251 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2252 skipchr(); /* let's be friends with the lexer again */
2253 *flagp |= HASWIDTH | SIMPLE;
2254 break;
2255 }
2256 }
2257 /* FALLTHROUGH */
2258
2259 default:
2260 {
2261 int len;
2262
2263#ifdef FEAT_MBYTE
2264 /* A multi-byte character is handled as a separate atom if it's
2265 * before a multi. */
2266 if (has_mbyte && (*mb_char2len)(c) > 1
2267 && re_multi_type(peekchr()) != NOT_MULTI)
2268 {
2269 ret = regnode(MULTIBYTECODE);
2270 regmbc(c);
2271 *flagp |= HASWIDTH | SIMPLE;
2272 break;
2273 }
2274#endif
2275
2276 ret = regnode(EXACTLY);
2277
2278 /*
2279 * Append characters as long as:
2280 * - there is no following multi, we then need the character in
2281 * front of it as a single character operand
2282 * - not running into a Magic character
2283 * - "one_exactly" is not set
2284 * But always emit at least one character. Might be a Multi,
2285 * e.g., a "[" without matching "]".
2286 */
2287 for (len = 0; c != NUL && (len == 0
2288 || (re_multi_type(peekchr()) == NOT_MULTI
2289 && !one_exactly
2290 && !is_Magic(c))); ++len)
2291 {
2292 c = no_Magic(c);
2293#ifdef FEAT_MBYTE
2294 if (has_mbyte)
2295 {
2296 regmbc(c);
2297 if (enc_utf8)
2298 {
2299 int off;
2300 int l;
2301
2302 /* Need to get composing character too, directly
2303 * access regparse for that, because skipchr() skips
2304 * over composing chars. */
2305 ungetchr();
2306 if (*regparse == '\\' && regparse[1] != NUL)
2307 off = 1;
2308 else
2309 off = 0;
2310 for (;;)
2311 {
2312 l = utf_ptr2len_check(regparse + off);
2313 if (!UTF_COMPOSINGLIKE(regparse + off,
2314 regparse + off + l))
2315 break;
2316 off += l;
2317 regmbc(utf_ptr2char(regparse + off));
2318 }
2319 skipchr();
2320 }
2321 }
2322 else
2323#endif
2324 regc(c);
2325 c = getchr();
2326 }
2327 ungetchr();
2328
2329 regc(NUL);
2330 *flagp |= HASWIDTH;
2331 if (len == 1)
2332 *flagp |= SIMPLE;
2333 }
2334 break;
2335 }
2336
2337 return ret;
2338}
2339
2340/*
2341 * emit a node
2342 * Return pointer to generated code.
2343 */
2344 static char_u *
2345regnode(op)
2346 int op;
2347{
2348 char_u *ret;
2349
2350 ret = regcode;
2351 if (ret == JUST_CALC_SIZE)
2352 regsize += 3;
2353 else
2354 {
2355 *regcode++ = op;
2356 *regcode++ = NUL; /* Null "next" pointer. */
2357 *regcode++ = NUL;
2358 }
2359 return ret;
2360}
2361
2362/*
2363 * Emit (if appropriate) a byte of code
2364 */
2365 static void
2366regc(b)
2367 int b;
2368{
2369 if (regcode == JUST_CALC_SIZE)
2370 regsize++;
2371 else
2372 *regcode++ = b;
2373}
2374
2375#ifdef FEAT_MBYTE
2376/*
2377 * Emit (if appropriate) a multi-byte character of code
2378 */
2379 static void
2380regmbc(c)
2381 int c;
2382{
2383 if (regcode == JUST_CALC_SIZE)
2384 regsize += (*mb_char2len)(c);
2385 else
2386 regcode += (*mb_char2bytes)(c, regcode);
2387}
2388#endif
2389
2390/*
2391 * reginsert - insert an operator in front of already-emitted operand
2392 *
2393 * Means relocating the operand.
2394 */
2395 static void
2396reginsert(op, opnd)
2397 int op;
2398 char_u *opnd;
2399{
2400 char_u *src;
2401 char_u *dst;
2402 char_u *place;
2403
2404 if (regcode == JUST_CALC_SIZE)
2405 {
2406 regsize += 3;
2407 return;
2408 }
2409 src = regcode;
2410 regcode += 3;
2411 dst = regcode;
2412 while (src > opnd)
2413 *--dst = *--src;
2414
2415 place = opnd; /* Op node, where operand used to be. */
2416 *place++ = op;
2417 *place++ = NUL;
2418 *place = NUL;
2419}
2420
2421/*
2422 * reginsert_limits - insert an operator in front of already-emitted operand.
2423 * The operator has the given limit values as operands. Also set next pointer.
2424 *
2425 * Means relocating the operand.
2426 */
2427 static void
2428reginsert_limits(op, minval, maxval, opnd)
2429 int op;
2430 long minval;
2431 long maxval;
2432 char_u *opnd;
2433{
2434 char_u *src;
2435 char_u *dst;
2436 char_u *place;
2437
2438 if (regcode == JUST_CALC_SIZE)
2439 {
2440 regsize += 11;
2441 return;
2442 }
2443 src = regcode;
2444 regcode += 11;
2445 dst = regcode;
2446 while (src > opnd)
2447 *--dst = *--src;
2448
2449 place = opnd; /* Op node, where operand used to be. */
2450 *place++ = op;
2451 *place++ = NUL;
2452 *place++ = NUL;
2453 place = re_put_long(place, (long_u)minval);
2454 place = re_put_long(place, (long_u)maxval);
2455 regtail(opnd, place);
2456}
2457
2458/*
2459 * Write a long as four bytes at "p" and return pointer to the next char.
2460 */
2461 static char_u *
2462re_put_long(p, val)
2463 char_u *p;
2464 long_u val;
2465{
2466 *p++ = (char_u) ((val >> 24) & 0377);
2467 *p++ = (char_u) ((val >> 16) & 0377);
2468 *p++ = (char_u) ((val >> 8) & 0377);
2469 *p++ = (char_u) (val & 0377);
2470 return p;
2471}
2472
2473/*
2474 * regtail - set the next-pointer at the end of a node chain
2475 */
2476 static void
2477regtail(p, val)
2478 char_u *p;
2479 char_u *val;
2480{
2481 char_u *scan;
2482 char_u *temp;
2483 int offset;
2484
2485 if (p == JUST_CALC_SIZE)
2486 return;
2487
2488 /* Find last node. */
2489 scan = p;
2490 for (;;)
2491 {
2492 temp = regnext(scan);
2493 if (temp == NULL)
2494 break;
2495 scan = temp;
2496 }
2497
Bram Moolenaar582fd852005-03-28 20:58:01 +00002498 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002499 offset = (int)(scan - val);
2500 else
2501 offset = (int)(val - scan);
2502 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2503 *(scan + 2) = (char_u) (offset & 0377);
2504}
2505
2506/*
2507 * regoptail - regtail on item after a BRANCH; nop if none
2508 */
2509 static void
2510regoptail(p, val)
2511 char_u *p;
2512 char_u *val;
2513{
2514 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2515 if (p == NULL || p == JUST_CALC_SIZE
2516 || (OP(p) != BRANCH
2517 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2518 return;
2519 regtail(OPERAND(p), val);
2520}
2521
2522/*
2523 * getchr() - get the next character from the pattern. We know about
2524 * magic and such, so therefore we need a lexical analyzer.
2525 */
2526
2527/* static int curchr; */
2528static int prevprevchr;
2529static int prevchr;
2530static int nextchr; /* used for ungetchr() */
2531/*
2532 * Note: prevchr is sometimes -1 when we are not at the start,
2533 * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2534 * taken to be magic -- webb
2535 */
2536static int at_start; /* True when on the first character */
2537static int prev_at_start; /* True when on the second character */
2538
2539 static void
2540initchr(str)
2541 char_u *str;
2542{
2543 regparse = str;
2544 prevchr_len = 0;
2545 curchr = prevprevchr = prevchr = nextchr = -1;
2546 at_start = TRUE;
2547 prev_at_start = FALSE;
2548}
2549
2550 static int
2551peekchr()
2552{
Bram Moolenaardf177f62005-02-22 08:39:57 +00002553 static int after_slash = FALSE;
2554
Bram Moolenaar071d4272004-06-13 20:20:40 +00002555 if (curchr == -1)
2556 {
2557 switch (curchr = regparse[0])
2558 {
2559 case '.':
2560 case '[':
2561 case '~':
2562 /* magic when 'magic' is on */
2563 if (reg_magic >= MAGIC_ON)
2564 curchr = Magic(curchr);
2565 break;
2566 case '(':
2567 case ')':
2568 case '{':
2569 case '%':
2570 case '+':
2571 case '=':
2572 case '?':
2573 case '@':
2574 case '!':
2575 case '&':
2576 case '|':
2577 case '<':
2578 case '>':
2579 case '#': /* future ext. */
2580 case '"': /* future ext. */
2581 case '\'': /* future ext. */
2582 case ',': /* future ext. */
2583 case '-': /* future ext. */
2584 case ':': /* future ext. */
2585 case ';': /* future ext. */
2586 case '`': /* future ext. */
2587 case '/': /* Can't be used in / command */
2588 /* magic only after "\v" */
2589 if (reg_magic == MAGIC_ALL)
2590 curchr = Magic(curchr);
2591 break;
2592 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00002593 /* * is not magic as the very first character, eg "?*ptr", when
2594 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
2595 * "\(\*" is not magic, thus must be magic if "after_slash" */
2596 if (reg_magic >= MAGIC_ON
2597 && !at_start
2598 && !(prev_at_start && prevchr == Magic('^'))
2599 && (after_slash
2600 || (prevchr != Magic('(')
2601 && prevchr != Magic('&')
2602 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002603 curchr = Magic('*');
2604 break;
2605 case '^':
2606 /* '^' is only magic as the very first character and if it's after
2607 * "\(", "\|", "\&' or "\n" */
2608 if (reg_magic >= MAGIC_OFF
2609 && (at_start
2610 || reg_magic == MAGIC_ALL
2611 || prevchr == Magic('(')
2612 || prevchr == Magic('|')
2613 || prevchr == Magic('&')
2614 || prevchr == Magic('n')
2615 || (no_Magic(prevchr) == '('
2616 && prevprevchr == Magic('%'))))
2617 {
2618 curchr = Magic('^');
2619 at_start = TRUE;
2620 prev_at_start = FALSE;
2621 }
2622 break;
2623 case '$':
2624 /* '$' is only magic as the very last char and if it's in front of
2625 * either "\|", "\)", "\&", or "\n" */
2626 if (reg_magic >= MAGIC_OFF)
2627 {
2628 char_u *p = regparse + 1;
2629
2630 /* ignore \c \C \m and \M after '$' */
2631 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2632 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2633 p += 2;
2634 if (p[0] == NUL
2635 || (p[0] == '\\'
2636 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2637 || p[1] == 'n'))
2638 || reg_magic == MAGIC_ALL)
2639 curchr = Magic('$');
2640 }
2641 break;
2642 case '\\':
2643 {
2644 int c = regparse[1];
2645
2646 if (c == NUL)
2647 curchr = '\\'; /* trailing '\' */
2648 else if (
2649#ifdef EBCDIC
2650 vim_strchr(META, c)
2651#else
2652 c <= '~' && META_flags[c]
2653#endif
2654 )
2655 {
2656 /*
2657 * META contains everything that may be magic sometimes,
2658 * except ^ and $ ("\^" and "\$" are only magic after
2659 * "\v"). We now fetch the next character and toggle its
2660 * magicness. Therefore, \ is so meta-magic that it is
2661 * not in META.
2662 */
2663 curchr = -1;
2664 prev_at_start = at_start;
2665 at_start = FALSE; /* be able to say "/\*ptr" */
2666 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00002667 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002668 peekchr();
2669 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00002670 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002671 curchr = toggle_Magic(curchr);
2672 }
2673 else if (vim_strchr(REGEXP_ABBR, c))
2674 {
2675 /*
2676 * Handle abbreviations, like "\t" for TAB -- webb
2677 */
2678 curchr = backslash_trans(c);
2679 }
2680 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2681 curchr = toggle_Magic(c);
2682 else
2683 {
2684 /*
2685 * Next character can never be (made) magic?
2686 * Then backslashing it won't do anything.
2687 */
2688#ifdef FEAT_MBYTE
2689 if (has_mbyte)
2690 curchr = (*mb_ptr2char)(regparse + 1);
2691 else
2692#endif
2693 curchr = c;
2694 }
2695 break;
2696 }
2697
2698#ifdef FEAT_MBYTE
2699 default:
2700 if (has_mbyte)
2701 curchr = (*mb_ptr2char)(regparse);
2702#endif
2703 }
2704 }
2705
2706 return curchr;
2707}
2708
2709/*
2710 * Eat one lexed character. Do this in a way that we can undo it.
2711 */
2712 static void
2713skipchr()
2714{
2715 /* peekchr() eats a backslash, do the same here */
2716 if (*regparse == '\\')
2717 prevchr_len = 1;
2718 else
2719 prevchr_len = 0;
2720 if (regparse[prevchr_len] != NUL)
2721 {
2722#ifdef FEAT_MBYTE
2723 if (has_mbyte)
2724 prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
2725 else
2726#endif
2727 ++prevchr_len;
2728 }
2729 regparse += prevchr_len;
2730 prev_at_start = at_start;
2731 at_start = FALSE;
2732 prevprevchr = prevchr;
2733 prevchr = curchr;
2734 curchr = nextchr; /* use previously unget char, or -1 */
2735 nextchr = -1;
2736}
2737
2738/*
2739 * Skip a character while keeping the value of prev_at_start for at_start.
2740 * prevchr and prevprevchr are also kept.
2741 */
2742 static void
2743skipchr_keepstart()
2744{
2745 int as = prev_at_start;
2746 int pr = prevchr;
2747 int prpr = prevprevchr;
2748
2749 skipchr();
2750 at_start = as;
2751 prevchr = pr;
2752 prevprevchr = prpr;
2753}
2754
2755 static int
2756getchr()
2757{
2758 int chr = peekchr();
2759
2760 skipchr();
2761 return chr;
2762}
2763
2764/*
2765 * put character back. Works only once!
2766 */
2767 static void
2768ungetchr()
2769{
2770 nextchr = curchr;
2771 curchr = prevchr;
2772 prevchr = prevprevchr;
2773 at_start = prev_at_start;
2774 prev_at_start = FALSE;
2775
2776 /* Backup regparse, so that it's at the same position as before the
2777 * getchr(). */
2778 regparse -= prevchr_len;
2779}
2780
2781/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00002782 * Get and return the value of the hex string at the current position.
2783 * Return -1 if there is no valid hex number.
2784 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002785 * blahblah\%x20asdf
2786 * before-^ ^-after
2787 * The parameter controls the maximum number of input characters. This will be
2788 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2789 */
2790 static int
2791gethexchrs(maxinputlen)
2792 int maxinputlen;
2793{
2794 int nr = 0;
2795 int c;
2796 int i;
2797
2798 for (i = 0; i < maxinputlen; ++i)
2799 {
2800 c = regparse[0];
2801 if (!vim_isxdigit(c))
2802 break;
2803 nr <<= 4;
2804 nr |= hex2nr(c);
2805 ++regparse;
2806 }
2807
2808 if (i == 0)
2809 return -1;
2810 return nr;
2811}
2812
2813/*
2814 * get and return the value of the decimal string immediately after the
2815 * current position. Return -1 for invalid. Consumes all digits.
2816 */
2817 static int
2818getdecchrs()
2819{
2820 int nr = 0;
2821 int c;
2822 int i;
2823
2824 for (i = 0; ; ++i)
2825 {
2826 c = regparse[0];
2827 if (c < '0' || c > '9')
2828 break;
2829 nr *= 10;
2830 nr += c - '0';
2831 ++regparse;
2832 }
2833
2834 if (i == 0)
2835 return -1;
2836 return nr;
2837}
2838
2839/*
2840 * get and return the value of the octal string immediately after the current
2841 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2842 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2843 * treat 8 or 9 as recognised characters. Position is updated:
2844 * blahblah\%o210asdf
2845 * before-^ ^-after
2846 */
2847 static int
2848getoctchrs()
2849{
2850 int nr = 0;
2851 int c;
2852 int i;
2853
2854 for (i = 0; i < 3 && nr < 040; ++i)
2855 {
2856 c = regparse[0];
2857 if (c < '0' || c > '7')
2858 break;
2859 nr <<= 3;
2860 nr |= hex2nr(c);
2861 ++regparse;
2862 }
2863
2864 if (i == 0)
2865 return -1;
2866 return nr;
2867}
2868
2869/*
2870 * Get a number after a backslash that is inside [].
2871 * When nothing is recognized return a backslash.
2872 */
2873 static int
2874coll_get_char()
2875{
2876 int nr = -1;
2877
2878 switch (*regparse++)
2879 {
2880 case 'd': nr = getdecchrs(); break;
2881 case 'o': nr = getoctchrs(); break;
2882 case 'x': nr = gethexchrs(2); break;
2883 case 'u': nr = gethexchrs(4); break;
2884 case 'U': nr = gethexchrs(8); break;
2885 }
2886 if (nr < 0)
2887 {
2888 /* If getting the number fails be backwards compatible: the character
2889 * is a backslash. */
2890 --regparse;
2891 nr = '\\';
2892 }
2893 return nr;
2894}
2895
2896/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00002897 * read_limits - Read two integers to be taken as a minimum and maximum.
2898 * If the first character is '-', then the range is reversed.
2899 * Should end with 'end'. If minval is missing, zero is default, if maxval is
2900 * missing, a very big number is the default.
2901 */
2902 static int
2903read_limits(minval, maxval)
2904 long *minval;
2905 long *maxval;
2906{
2907 int reverse = FALSE;
2908 char_u *first_char;
2909 long tmp;
2910
2911 if (*regparse == '-')
2912 {
2913 /* Starts with '-', so reverse the range later */
2914 regparse++;
2915 reverse = TRUE;
2916 }
2917 first_char = regparse;
2918 *minval = getdigits(&regparse);
2919 if (*regparse == ',') /* There is a comma */
2920 {
2921 if (vim_isdigit(*++regparse))
2922 *maxval = getdigits(&regparse);
2923 else
2924 *maxval = MAX_LIMIT;
2925 }
2926 else if (VIM_ISDIGIT(*first_char))
2927 *maxval = *minval; /* It was \{n} or \{-n} */
2928 else
2929 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
2930 if (*regparse == '\\')
2931 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002932 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002933 {
2934 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2935 reg_magic == MAGIC_ALL ? "" : "\\");
2936 EMSG_RET_FAIL(IObuff);
2937 }
2938
2939 /*
2940 * Reverse the range if there was a '-', or make sure it is in the right
2941 * order otherwise.
2942 */
2943 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
2944 {
2945 tmp = *minval;
2946 *minval = *maxval;
2947 *maxval = tmp;
2948 }
2949 skipchr(); /* let's be friends with the lexer again */
2950 return OK;
2951}
2952
2953/*
2954 * vim_regexec and friends
2955 */
2956
2957/*
2958 * Global work variables for vim_regexec().
2959 */
2960
2961/* The current match-position is remembered with these variables: */
2962static linenr_T reglnum; /* line number, relative to first line */
2963static char_u *regline; /* start of current line */
2964static char_u *reginput; /* current input, points into "regline" */
2965
2966static int need_clear_subexpr; /* subexpressions still need to be
2967 * cleared */
2968#ifdef FEAT_SYN_HL
2969static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
2970 * still need to be cleared */
2971#endif
2972
Bram Moolenaar071d4272004-06-13 20:20:40 +00002973/*
2974 * Structure used to save the current input state, when it needs to be
2975 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00002976 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00002977 */
2978typedef struct
2979{
2980 union
2981 {
2982 char_u *ptr; /* reginput pointer, for single-line regexp */
2983 lpos_T pos; /* reginput pos, for multi-line regexp */
2984 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00002985 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002986} regsave_T;
2987
2988/* struct to save start/end pointer/position in for \(\) */
2989typedef struct
2990{
2991 union
2992 {
2993 char_u *ptr;
2994 lpos_T pos;
2995 } se_u;
2996} save_se_T;
2997
2998static char_u *reg_getline __ARGS((linenr_T lnum));
2999static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
3000static long regtry __ARGS((regprog_T *prog, colnr_T col));
3001static void cleanup_subexpr __ARGS((void));
3002#ifdef FEAT_SYN_HL
3003static void cleanup_zsubexpr __ARGS((void));
3004#endif
3005static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003006static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3007static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003008static int reg_save_equal __ARGS((regsave_T *save));
3009static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3010static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3011
3012/* Save the sub-expressions before attempting a match. */
3013#define save_se(savep, posp, pp) \
3014 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3015
3016/* After a failed match restore the sub-expressions. */
3017#define restore_se(savep, posp, pp) { \
3018 if (REG_MULTI) \
3019 *(posp) = (savep)->se_u.pos; \
3020 else \
3021 *(pp) = (savep)->se_u.ptr; }
3022
3023static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003024static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003025static int regrepeat __ARGS((char_u *p, long maxcount));
3026
3027#ifdef DEBUG
3028int regnarrate = 0;
3029#endif
3030
3031/*
3032 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3033 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3034 * contains '\c' or '\C' the value is overruled.
3035 */
3036static int ireg_ic;
3037
3038#ifdef FEAT_MBYTE
3039/*
3040 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3041 * in the regexp. Defaults to false, always.
3042 */
3043static int ireg_icombine;
3044#endif
3045
3046/*
3047 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3048 * slow, we keep one allocated piece of memory and only re-allocate it when
3049 * it's too small. It's freed in vim_regexec_both() when finished.
3050 */
3051static char_u *reg_tofree;
3052static unsigned reg_tofreelen;
3053
3054/*
3055 * These variables are set when executing a regexp to speed up the execution.
3056 * Which ones are set depends on whethere a single-line or multi-line match is
3057 * done:
3058 * single-line multi-line
3059 * reg_match &regmatch_T NULL
3060 * reg_mmatch NULL &regmmatch_T
3061 * reg_startp reg_match->startp <invalid>
3062 * reg_endp reg_match->endp <invalid>
3063 * reg_startpos <invalid> reg_mmatch->startpos
3064 * reg_endpos <invalid> reg_mmatch->endpos
3065 * reg_win NULL window in which to search
3066 * reg_buf <invalid> buffer in which to search
3067 * reg_firstlnum <invalid> first line in which to search
3068 * reg_maxline 0 last line nr
3069 * reg_line_lbr FALSE or TRUE FALSE
3070 */
3071static regmatch_T *reg_match;
3072static regmmatch_T *reg_mmatch;
3073static char_u **reg_startp = NULL;
3074static char_u **reg_endp = NULL;
3075static lpos_T *reg_startpos = NULL;
3076static lpos_T *reg_endpos = NULL;
3077static win_T *reg_win;
3078static buf_T *reg_buf;
3079static linenr_T reg_firstlnum;
3080static linenr_T reg_maxline;
3081static int reg_line_lbr; /* "\n" in string is line break */
3082
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003083/* Values for rs_state in regitem_T. */
3084typedef enum regstate_E
3085{
3086 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3087 , RS_MOPEN /* MOPEN + [0-9] */
3088 , RS_MCLOSE /* MCLOSE + [0-9] */
3089#ifdef FEAT_SYN_HL
3090 , RS_ZOPEN /* ZOPEN + [0-9] */
3091 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3092#endif
3093 , RS_BRANCH /* BRANCH */
3094 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3095 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3096 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3097 , RS_NOMATCH /* NOMATCH */
3098 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3099 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3100 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3101 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3102} regstate_T;
3103
3104/*
3105 * When there are alternatives a regstate_T is put on the regstack to remember
3106 * what we are doing.
3107 * Before it may be another type of item, depending on rs_state, to remember
3108 * more things.
3109 */
3110typedef struct regitem_S
3111{
3112 regstate_T rs_state; /* what we are doing, one of RS_ above */
3113 char_u *rs_scan; /* current node in program */
3114 union
3115 {
3116 save_se_T sesave;
3117 regsave_T regsave;
3118 } rs_un; /* room for saving reginput */
3119 short rs_no; /* submatch nr */
3120} regitem_T;
3121
3122static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3123static void regstack_pop __ARGS((char_u **scan));
3124
3125/* used for BEHIND and NOBEHIND matching */
3126typedef struct regbehind_S
3127{
3128 regsave_T save_after;
3129 regsave_T save_behind;
3130} regbehind_T;
3131
3132/* used for STAR, PLUS and BRACE_SIMPLE matching */
3133typedef struct regstar_S
3134{
3135 int nextb; /* next byte */
3136 int nextb_ic; /* next byte reverse case */
3137 long count;
3138 long minval;
3139 long maxval;
3140} regstar_T;
3141
3142/* used to store input position when a BACK was encountered, so that we now if
3143 * we made any progress since the last time. */
3144typedef struct backpos_S
3145{
3146 char_u *bp_scan; /* "scan" where BACK was encountered */
3147 regsave_T bp_pos; /* last input position */
3148} backpos_T;
3149
3150/*
3151 * regstack and backpos are used by regmatch(). They are kept over calls to
3152 * avoid invoking malloc() and free() often.
3153 */
3154static garray_T regstack; /* stack with regitem_T items, sometimes
3155 preceded by regstar_T or regbehind_T. */
3156static garray_T backpos; /* table with backpos_T for BACK */
3157
Bram Moolenaar071d4272004-06-13 20:20:40 +00003158/*
3159 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3160 */
3161 static char_u *
3162reg_getline(lnum)
3163 linenr_T lnum;
3164{
3165 /* when looking behind for a match/no-match lnum is negative. But we
3166 * can't go before line 1 */
3167 if (reg_firstlnum + lnum < 1)
3168 return NULL;
3169 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3170}
3171
3172static regsave_T behind_pos;
3173
3174#ifdef FEAT_SYN_HL
3175static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3176static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3177static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3178static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3179#endif
3180
3181/* TRUE if using multi-line regexp. */
3182#define REG_MULTI (reg_match == NULL)
3183
3184/*
3185 * Match a regexp against a string.
3186 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3187 * Uses curbuf for line count and 'iskeyword'.
3188 *
3189 * Return TRUE if there is a match, FALSE if not.
3190 */
3191 int
3192vim_regexec(rmp, line, col)
3193 regmatch_T *rmp;
3194 char_u *line; /* string to match against */
3195 colnr_T col; /* column to start looking for match */
3196{
3197 reg_match = rmp;
3198 reg_mmatch = NULL;
3199 reg_maxline = 0;
3200 reg_line_lbr = FALSE;
3201 reg_win = NULL;
3202 ireg_ic = rmp->rm_ic;
3203#ifdef FEAT_MBYTE
3204 ireg_icombine = FALSE;
3205#endif
3206 return (vim_regexec_both(line, col) != 0);
3207}
3208
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003209#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3210 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003211/*
3212 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3213 */
3214 int
3215vim_regexec_nl(rmp, line, col)
3216 regmatch_T *rmp;
3217 char_u *line; /* string to match against */
3218 colnr_T col; /* column to start looking for match */
3219{
3220 reg_match = rmp;
3221 reg_mmatch = NULL;
3222 reg_maxline = 0;
3223 reg_line_lbr = TRUE;
3224 reg_win = NULL;
3225 ireg_ic = rmp->rm_ic;
3226#ifdef FEAT_MBYTE
3227 ireg_icombine = FALSE;
3228#endif
3229 return (vim_regexec_both(line, col) != 0);
3230}
3231#endif
3232
3233/*
3234 * Match a regexp against multiple lines.
3235 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3236 * Uses curbuf for line count and 'iskeyword'.
3237 *
3238 * Return zero if there is no match. Return number of lines contained in the
3239 * match otherwise.
3240 */
3241 long
3242vim_regexec_multi(rmp, win, buf, lnum, col)
3243 regmmatch_T *rmp;
3244 win_T *win; /* window in which to search or NULL */
3245 buf_T *buf; /* buffer in which to search */
3246 linenr_T lnum; /* nr of line to start looking for match */
3247 colnr_T col; /* column to start looking for match */
3248{
3249 long r;
3250 buf_T *save_curbuf = curbuf;
3251
3252 reg_match = NULL;
3253 reg_mmatch = rmp;
3254 reg_buf = buf;
3255 reg_win = win;
3256 reg_firstlnum = lnum;
3257 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3258 reg_line_lbr = FALSE;
3259 ireg_ic = rmp->rmm_ic;
3260#ifdef FEAT_MBYTE
3261 ireg_icombine = FALSE;
3262#endif
3263
3264 /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3265 curbuf = buf;
3266 r = vim_regexec_both(NULL, col);
3267 curbuf = save_curbuf;
3268
3269 return r;
3270}
3271
3272/*
3273 * Match a regexp against a string ("line" points to the string) or multiple
3274 * lines ("line" is NULL, use reg_getline()).
3275 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003276 static long
3277vim_regexec_both(line, col)
3278 char_u *line;
3279 colnr_T col; /* column to start looking for match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003280{
3281 regprog_T *prog;
3282 char_u *s;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003283 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003284
3285 reg_tofree = NULL;
3286
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003287 /* Init the regstack empty. Use an item size of 1 byte, since we push
3288 * different things onto it. Use a large grow size to avoid reallocating
3289 * it too often. */
3290 ga_init2(&regstack, 1, 10000);
3291
3292 /* Init the backpos table empty. */
3293 ga_init2(&backpos, sizeof(backpos_T), 10);
3294
Bram Moolenaar071d4272004-06-13 20:20:40 +00003295 if (REG_MULTI)
3296 {
3297 prog = reg_mmatch->regprog;
3298 line = reg_getline((linenr_T)0);
3299 reg_startpos = reg_mmatch->startpos;
3300 reg_endpos = reg_mmatch->endpos;
3301 }
3302 else
3303 {
3304 prog = reg_match->regprog;
3305 reg_startp = reg_match->startp;
3306 reg_endp = reg_match->endp;
3307 }
3308
3309 /* Be paranoid... */
3310 if (prog == NULL || line == NULL)
3311 {
3312 EMSG(_(e_null));
3313 goto theend;
3314 }
3315
3316 /* Check validity of program. */
3317 if (prog_magic_wrong())
3318 goto theend;
3319
3320 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3321 if (prog->regflags & RF_ICASE)
3322 ireg_ic = TRUE;
3323 else if (prog->regflags & RF_NOICASE)
3324 ireg_ic = FALSE;
3325
3326#ifdef FEAT_MBYTE
3327 /* If pattern contains "\Z" overrule value of ireg_icombine */
3328 if (prog->regflags & RF_ICOMBINE)
3329 ireg_icombine = TRUE;
3330#endif
3331
3332 /* If there is a "must appear" string, look for it. */
3333 if (prog->regmust != NULL)
3334 {
3335 int c;
3336
3337#ifdef FEAT_MBYTE
3338 if (has_mbyte)
3339 c = (*mb_ptr2char)(prog->regmust);
3340 else
3341#endif
3342 c = *prog->regmust;
3343 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003344
3345 /*
3346 * This is used very often, esp. for ":global". Use three versions of
3347 * the loop to avoid overhead of conditions.
3348 */
3349 if (!ireg_ic
3350#ifdef FEAT_MBYTE
3351 && !has_mbyte
3352#endif
3353 )
3354 while ((s = vim_strbyte(s, c)) != NULL)
3355 {
3356 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3357 break; /* Found it. */
3358 ++s;
3359 }
3360#ifdef FEAT_MBYTE
3361 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3362 while ((s = vim_strchr(s, c)) != NULL)
3363 {
3364 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3365 break; /* Found it. */
3366 mb_ptr_adv(s);
3367 }
3368#endif
3369 else
3370 while ((s = cstrchr(s, c)) != NULL)
3371 {
3372 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3373 break; /* Found it. */
3374 mb_ptr_adv(s);
3375 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003376 if (s == NULL) /* Not present. */
3377 goto theend;
3378 }
3379
3380 regline = line;
3381 reglnum = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003382
3383 /* Simplest case: Anchored match need be tried only once. */
3384 if (prog->reganch)
3385 {
3386 int c;
3387
3388#ifdef FEAT_MBYTE
3389 if (has_mbyte)
3390 c = (*mb_ptr2char)(regline + col);
3391 else
3392#endif
3393 c = regline[col];
3394 if (prog->regstart == NUL
3395 || prog->regstart == c
3396 || (ireg_ic && ((
3397#ifdef FEAT_MBYTE
3398 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3399 || (c < 255 && prog->regstart < 255 &&
3400#endif
3401 TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
3402 retval = regtry(prog, col);
3403 else
3404 retval = 0;
3405 }
3406 else
3407 {
3408 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003409 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003410 {
3411 if (prog->regstart != NUL)
3412 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003413 /* Skip until the char we know it must start with.
3414 * Used often, do some work to avoid call overhead. */
3415 if (!ireg_ic
3416#ifdef FEAT_MBYTE
3417 && !has_mbyte
3418#endif
3419 )
3420 s = vim_strbyte(regline + col, prog->regstart);
3421 else
3422 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003423 if (s == NULL)
3424 {
3425 retval = 0;
3426 break;
3427 }
3428 col = (int)(s - regline);
3429 }
3430
3431 retval = regtry(prog, col);
3432 if (retval > 0)
3433 break;
3434
3435 /* if not currently on the first line, get it again */
3436 if (reglnum != 0)
3437 {
3438 regline = reg_getline((linenr_T)0);
3439 reglnum = 0;
3440 }
3441 if (regline[col] == NUL)
3442 break;
3443#ifdef FEAT_MBYTE
3444 if (has_mbyte)
3445 col += (*mb_ptr2len_check)(regline + col);
3446 else
3447#endif
3448 ++col;
3449 }
3450 }
3451
Bram Moolenaar071d4272004-06-13 20:20:40 +00003452theend:
Bram Moolenaar071d4272004-06-13 20:20:40 +00003453 vim_free(reg_tofree);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003454 ga_clear(&regstack);
3455 ga_clear(&backpos);
3456
Bram Moolenaar071d4272004-06-13 20:20:40 +00003457 return retval;
3458}
3459
3460#ifdef FEAT_SYN_HL
3461static reg_extmatch_T *make_extmatch __ARGS((void));
3462
3463/*
3464 * Create a new extmatch and mark it as referenced once.
3465 */
3466 static reg_extmatch_T *
3467make_extmatch()
3468{
3469 reg_extmatch_T *em;
3470
3471 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3472 if (em != NULL)
3473 em->refcnt = 1;
3474 return em;
3475}
3476
3477/*
3478 * Add a reference to an extmatch.
3479 */
3480 reg_extmatch_T *
3481ref_extmatch(em)
3482 reg_extmatch_T *em;
3483{
3484 if (em != NULL)
3485 em->refcnt++;
3486 return em;
3487}
3488
3489/*
3490 * Remove a reference to an extmatch. If there are no references left, free
3491 * the info.
3492 */
3493 void
3494unref_extmatch(em)
3495 reg_extmatch_T *em;
3496{
3497 int i;
3498
3499 if (em != NULL && --em->refcnt <= 0)
3500 {
3501 for (i = 0; i < NSUBEXP; ++i)
3502 vim_free(em->matches[i]);
3503 vim_free(em);
3504 }
3505}
3506#endif
3507
3508/*
3509 * regtry - try match of "prog" with at regline["col"].
3510 * Returns 0 for failure, number of lines contained in the match otherwise.
3511 */
3512 static long
3513regtry(prog, col)
3514 regprog_T *prog;
3515 colnr_T col;
3516{
3517 reginput = regline + col;
3518 need_clear_subexpr = TRUE;
3519#ifdef FEAT_SYN_HL
3520 /* Clear the external match subpointers if necessary. */
3521 if (prog->reghasz == REX_SET)
3522 need_clear_zsubexpr = TRUE;
3523#endif
3524
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003525 if (regmatch(prog->program + 1) == 0)
3526 return 0;
3527
3528 cleanup_subexpr();
3529 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003530 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003531 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003532 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003533 reg_startpos[0].lnum = 0;
3534 reg_startpos[0].col = col;
3535 }
3536 if (reg_endpos[0].lnum < 0)
3537 {
3538 reg_endpos[0].lnum = reglnum;
3539 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003540 }
3541 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003542 /* Use line number of "\ze". */
3543 reglnum = reg_endpos[0].lnum;
3544 }
3545 else
3546 {
3547 if (reg_startp[0] == NULL)
3548 reg_startp[0] = regline + col;
3549 if (reg_endp[0] == NULL)
3550 reg_endp[0] = reginput;
3551 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003552#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003553 /* Package any found \z(...\) matches for export. Default is none. */
3554 unref_extmatch(re_extmatch_out);
3555 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003556
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003557 if (prog->reghasz == REX_SET)
3558 {
3559 int i;
3560
3561 cleanup_zsubexpr();
3562 re_extmatch_out = make_extmatch();
3563 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003564 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003565 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003566 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003567 /* Only accept single line matches. */
3568 if (reg_startzpos[i].lnum >= 0
3569 && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3570 re_extmatch_out->matches[i] =
3571 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003572 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003573 reg_endzpos[i].col - reg_startzpos[i].col);
3574 }
3575 else
3576 {
3577 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3578 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00003579 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003580 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003581 }
3582 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003583 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003584#endif
3585 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003586}
3587
3588#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00003589static int reg_prev_class __ARGS((void));
3590
Bram Moolenaar071d4272004-06-13 20:20:40 +00003591/*
3592 * Get class of previous character.
3593 */
3594 static int
3595reg_prev_class()
3596{
3597 if (reginput > regline)
3598 return mb_get_class(reginput - 1
3599 - (*mb_head_off)(regline, reginput - 1));
3600 return -1;
3601}
3602
Bram Moolenaar071d4272004-06-13 20:20:40 +00003603#endif
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003604#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003605
3606/*
3607 * The arguments from BRACE_LIMITS are stored here. They are actually local
3608 * to regmatch(), but they are here to reduce the amount of stack space used
3609 * (it can be called recursively many times).
3610 */
3611static long bl_minval;
3612static long bl_maxval;
3613
3614/*
3615 * regmatch - main matching routine
3616 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003617 * Conceptually the strategy is simple: Check to see whether the current node
3618 * matches, push an item onto the regstack and loop to see whether the rest
3619 * matches, and then act accordingly. In practice we make some effort to
3620 * avoid using the regstack, in particular by going through "ordinary" nodes
3621 * (that don't need to know whether the rest of the match failed) by a nested
3622 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003623 *
3624 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
3625 * the last matched character.
3626 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
3627 * undefined state!
3628 */
3629 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003630regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003631 char_u *scan; /* Current node. */
3632{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003633 char_u *next; /* Next node. */
3634 int op;
3635 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003636 regitem_T *rp;
3637 int no;
3638 int status; /* one of the RA_ values: */
3639#define RA_FAIL 1 /* something failed, abort */
3640#define RA_CONT 2 /* continue in inner loop */
3641#define RA_BREAK 3 /* break inner loop */
3642#define RA_MATCH 4 /* successful match */
3643#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003644
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003645 /* Init the regstack and backpos table empty. They are initialized and
3646 * freed in vim_regexec_both() to reduce malloc()/free() calls. */
3647 regstack.ga_len = 0;
3648 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003649
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003650 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00003651 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003652 */
3653 for (;;)
3654 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003655 /* Some patterns my cause a long time to match, even though they are not
3656 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
3657 fast_breakcheck();
3658
3659#ifdef DEBUG
3660 if (scan != NULL && regnarrate)
3661 {
3662 mch_errmsg(regprop(scan));
3663 mch_errmsg("(\n");
3664 }
3665#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003666
3667 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00003668 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003669 * regstack.
3670 */
3671 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003672 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003673 if (got_int || scan == NULL)
3674 {
3675 status = RA_FAIL;
3676 break;
3677 }
3678 status = RA_CONT;
3679
Bram Moolenaar071d4272004-06-13 20:20:40 +00003680#ifdef DEBUG
3681 if (regnarrate)
3682 {
3683 mch_errmsg(regprop(scan));
3684 mch_errmsg("...\n");
3685# ifdef FEAT_SYN_HL
3686 if (re_extmatch_in != NULL)
3687 {
3688 int i;
3689
3690 mch_errmsg(_("External submatches:\n"));
3691 for (i = 0; i < NSUBEXP; i++)
3692 {
3693 mch_errmsg(" \"");
3694 if (re_extmatch_in->matches[i] != NULL)
3695 mch_errmsg(re_extmatch_in->matches[i]);
3696 mch_errmsg("\"\n");
3697 }
3698 }
3699# endif
3700 }
3701#endif
3702 next = regnext(scan);
3703
3704 op = OP(scan);
3705 /* Check for character class with NL added. */
3706 if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
3707 {
3708 reg_nextline();
3709 }
3710 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3711 {
3712 ADVANCE_REGINPUT();
3713 }
3714 else
3715 {
3716 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003717 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003718#ifdef FEAT_MBYTE
3719 if (has_mbyte)
3720 c = (*mb_ptr2char)(reginput);
3721 else
3722#endif
3723 c = *reginput;
3724 switch (op)
3725 {
3726 case BOL:
3727 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003728 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003729 break;
3730
3731 case EOL:
3732 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003733 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003734 break;
3735
3736 case RE_BOF:
3737 /* Passing -1 to the getline() function provided for the search
3738 * should always return NULL if the current line is the first
3739 * line of the file. */
3740 if (reglnum != 0 || reginput != regline
3741 || (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003742 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003743 break;
3744
3745 case RE_EOF:
3746 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003747 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003748 break;
3749
3750 case CURSOR:
3751 /* Check if the buffer is in a window and compare the
3752 * reg_win->w_cursor position to the match position. */
3753 if (reg_win == NULL
3754 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3755 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003756 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003757 break;
3758
3759 case RE_LNUM:
3760 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
3761 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003762 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003763 break;
3764
3765 case RE_COL:
3766 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003767 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003768 break;
3769
3770 case RE_VCOL:
3771 if (!re_num_cmp((long_u)win_linetabsize(
3772 reg_win == NULL ? curwin : reg_win,
3773 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003774 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003775 break;
3776
3777 case BOW: /* \<word; reginput points to w */
3778 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003779 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003780#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003781 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003782 {
3783 int this_class;
3784
3785 /* Get class of current and previous char (if it exists). */
3786 this_class = mb_get_class(reginput);
3787 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003788 status = RA_NOMATCH; /* not on a word at all */
3789 else if (reg_prev_class() == this_class)
3790 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003791 }
3792#endif
3793 else
3794 {
3795 if (!vim_iswordc(c)
3796 || (reginput > regline && vim_iswordc(reginput[-1])))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003797 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003798 }
3799 break;
3800
3801 case EOW: /* word\>; reginput points after d */
3802 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003803 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003804#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003805 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003806 {
3807 int this_class, prev_class;
3808
3809 /* Get class of current and previous char (if it exists). */
3810 this_class = mb_get_class(reginput);
3811 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003812 if (this_class == prev_class
3813 || prev_class == 0 || prev_class == 1)
3814 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003815 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003816#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003817 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003818 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003819 if (!vim_iswordc(reginput[-1])
3820 || (reginput[0] != NUL && vim_iswordc(c)))
3821 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003822 }
3823 break; /* Matched with EOW */
3824
3825 case ANY:
3826 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003827 status = RA_NOMATCH;
3828 else
3829 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003830 break;
3831
3832 case IDENT:
3833 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003834 status = RA_NOMATCH;
3835 else
3836 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003837 break;
3838
3839 case SIDENT:
3840 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003841 status = RA_NOMATCH;
3842 else
3843 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003844 break;
3845
3846 case KWORD:
3847 if (!vim_iswordp(reginput))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003848 status = RA_NOMATCH;
3849 else
3850 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003851 break;
3852
3853 case SKWORD:
3854 if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003855 status = RA_NOMATCH;
3856 else
3857 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003858 break;
3859
3860 case FNAME:
3861 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003862 status = RA_NOMATCH;
3863 else
3864 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003865 break;
3866
3867 case SFNAME:
3868 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003869 status = RA_NOMATCH;
3870 else
3871 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003872 break;
3873
3874 case PRINT:
3875 if (ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003876 status = RA_NOMATCH;
3877 else
3878 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003879 break;
3880
3881 case SPRINT:
3882 if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003883 status = RA_NOMATCH;
3884 else
3885 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003886 break;
3887
3888 case WHITE:
3889 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003890 status = RA_NOMATCH;
3891 else
3892 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003893 break;
3894
3895 case NWHITE:
3896 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003897 status = RA_NOMATCH;
3898 else
3899 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003900 break;
3901
3902 case DIGIT:
3903 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003904 status = RA_NOMATCH;
3905 else
3906 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003907 break;
3908
3909 case NDIGIT:
3910 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003911 status = RA_NOMATCH;
3912 else
3913 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003914 break;
3915
3916 case HEX:
3917 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003918 status = RA_NOMATCH;
3919 else
3920 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003921 break;
3922
3923 case NHEX:
3924 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003925 status = RA_NOMATCH;
3926 else
3927 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003928 break;
3929
3930 case OCTAL:
3931 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003932 status = RA_NOMATCH;
3933 else
3934 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003935 break;
3936
3937 case NOCTAL:
3938 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003939 status = RA_NOMATCH;
3940 else
3941 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003942 break;
3943
3944 case WORD:
3945 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003946 status = RA_NOMATCH;
3947 else
3948 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003949 break;
3950
3951 case NWORD:
3952 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003953 status = RA_NOMATCH;
3954 else
3955 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003956 break;
3957
3958 case HEAD:
3959 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003960 status = RA_NOMATCH;
3961 else
3962 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003963 break;
3964
3965 case NHEAD:
3966 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003967 status = RA_NOMATCH;
3968 else
3969 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003970 break;
3971
3972 case ALPHA:
3973 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003974 status = RA_NOMATCH;
3975 else
3976 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003977 break;
3978
3979 case NALPHA:
3980 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003981 status = RA_NOMATCH;
3982 else
3983 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003984 break;
3985
3986 case LOWER:
3987 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003988 status = RA_NOMATCH;
3989 else
3990 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003991 break;
3992
3993 case NLOWER:
3994 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003995 status = RA_NOMATCH;
3996 else
3997 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003998 break;
3999
4000 case UPPER:
4001 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004002 status = RA_NOMATCH;
4003 else
4004 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004005 break;
4006
4007 case NUPPER:
4008 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004009 status = RA_NOMATCH;
4010 else
4011 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004012 break;
4013
4014 case EXACTLY:
4015 {
4016 int len;
4017 char_u *opnd;
4018
4019 opnd = OPERAND(scan);
4020 /* Inline the first byte, for speed. */
4021 if (*opnd != *reginput
4022 && (!ireg_ic || (
4023#ifdef FEAT_MBYTE
4024 !enc_utf8 &&
4025#endif
4026 TOLOWER_LOC(*opnd) != TOLOWER_LOC(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004027 status = RA_NOMATCH;
4028 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004029 {
4030 /* match empty string always works; happens when "~" is
4031 * empty. */
4032 }
4033 else if (opnd[1] == NUL
4034#ifdef FEAT_MBYTE
4035 && !(enc_utf8 && ireg_ic)
4036#endif
4037 )
4038 ++reginput; /* matched a single char */
4039 else
4040 {
4041 len = (int)STRLEN(opnd);
4042 /* Need to match first byte again for multi-byte. */
4043 if (cstrncmp(opnd, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004044 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004045#ifdef FEAT_MBYTE
4046 /* Check for following composing character. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004047 else if (enc_utf8
4048 && UTF_COMPOSINGLIKE(reginput, reginput + len))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004049 {
4050 /* raaron: This code makes a composing character get
4051 * ignored, which is the correct behavior (sometimes)
4052 * for voweled Hebrew texts. */
4053 if (!ireg_icombine)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004054 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004055 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004056#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004057 else
4058 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004059 }
4060 }
4061 break;
4062
4063 case ANYOF:
4064 case ANYBUT:
4065 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004066 status = RA_NOMATCH;
4067 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4068 status = RA_NOMATCH;
4069 else
4070 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004071 break;
4072
4073#ifdef FEAT_MBYTE
4074 case MULTIBYTECODE:
4075 if (has_mbyte)
4076 {
4077 int i, len;
4078 char_u *opnd;
4079
4080 opnd = OPERAND(scan);
4081 /* Safety check (just in case 'encoding' was changed since
4082 * compiling the program). */
4083 if ((len = (*mb_ptr2len_check)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004084 {
4085 status = RA_NOMATCH;
4086 break;
4087 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004088 for (i = 0; i < len; ++i)
4089 if (opnd[i] != reginput[i])
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004090 {
4091 status = RA_NOMATCH;
4092 break;
4093 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004094 reginput += len;
4095 }
4096 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004097 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004098 break;
4099#endif
4100
4101 case NOTHING:
4102 break;
4103
4104 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004105 {
4106 int i;
4107 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004108
Bram Moolenaar582fd852005-03-28 20:58:01 +00004109 /*
4110 * When we run into BACK we need to check if we don't keep
4111 * looping without matching any input. The second and later
4112 * times a BACK is encountered it fails if the input is still
4113 * at the same position as the previous time.
4114 * The positions are stored in "backpos" and found by the
4115 * current value of "scan", the position in the RE program.
4116 */
4117 bp = (backpos_T *)backpos.ga_data;
4118 for (i = 0; i < backpos.ga_len; ++i)
4119 if (bp[i].bp_scan == scan)
4120 break;
4121 if (i == backpos.ga_len)
4122 {
4123 /* First time at this BACK, make room to store the pos. */
4124 if (ga_grow(&backpos, 1) == FAIL)
4125 status = RA_FAIL;
4126 else
4127 {
4128 /* get "ga_data" again, it may have changed */
4129 bp = (backpos_T *)backpos.ga_data;
4130 bp[i].bp_scan = scan;
4131 ++backpos.ga_len;
4132 }
4133 }
4134 else if (reg_save_equal(&bp[i].bp_pos))
4135 /* Still at same position as last time, fail. */
4136 status = RA_NOMATCH;
4137
4138 if (status != RA_FAIL && status != RA_NOMATCH)
4139 reg_save(&bp[i].bp_pos, &backpos);
4140 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004141 break;
4142
Bram Moolenaar071d4272004-06-13 20:20:40 +00004143 case MOPEN + 0: /* Match start: \zs */
4144 case MOPEN + 1: /* \( */
4145 case MOPEN + 2:
4146 case MOPEN + 3:
4147 case MOPEN + 4:
4148 case MOPEN + 5:
4149 case MOPEN + 6:
4150 case MOPEN + 7:
4151 case MOPEN + 8:
4152 case MOPEN + 9:
4153 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004154 no = op - MOPEN;
4155 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004156 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004157 if (rp == NULL)
4158 status = RA_FAIL;
4159 else
4160 {
4161 rp->rs_no = no;
4162 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4163 &reg_startp[no]);
4164 /* We simply continue and handle the result when done. */
4165 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004166 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004167 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004168
4169 case NOPEN: /* \%( */
4170 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004171 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004172 status = RA_FAIL;
4173 /* We simply continue and handle the result when done. */
4174 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004175
4176#ifdef FEAT_SYN_HL
4177 case ZOPEN + 1:
4178 case ZOPEN + 2:
4179 case ZOPEN + 3:
4180 case ZOPEN + 4:
4181 case ZOPEN + 5:
4182 case ZOPEN + 6:
4183 case ZOPEN + 7:
4184 case ZOPEN + 8:
4185 case ZOPEN + 9:
4186 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004187 no = op - ZOPEN;
4188 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004189 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004190 if (rp == NULL)
4191 status = RA_FAIL;
4192 else
4193 {
4194 rp->rs_no = no;
4195 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4196 &reg_startzp[no]);
4197 /* We simply continue and handle the result when done. */
4198 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004199 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004200 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004201#endif
4202
4203 case MCLOSE + 0: /* Match end: \ze */
4204 case MCLOSE + 1: /* \) */
4205 case MCLOSE + 2:
4206 case MCLOSE + 3:
4207 case MCLOSE + 4:
4208 case MCLOSE + 5:
4209 case MCLOSE + 6:
4210 case MCLOSE + 7:
4211 case MCLOSE + 8:
4212 case MCLOSE + 9:
4213 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004214 no = op - MCLOSE;
4215 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004216 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004217 if (rp == NULL)
4218 status = RA_FAIL;
4219 else
4220 {
4221 rp->rs_no = no;
4222 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4223 /* We simply continue and handle the result when done. */
4224 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004225 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004226 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004227
4228#ifdef FEAT_SYN_HL
4229 case ZCLOSE + 1: /* \) after \z( */
4230 case ZCLOSE + 2:
4231 case ZCLOSE + 3:
4232 case ZCLOSE + 4:
4233 case ZCLOSE + 5:
4234 case ZCLOSE + 6:
4235 case ZCLOSE + 7:
4236 case ZCLOSE + 8:
4237 case ZCLOSE + 9:
4238 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004239 no = op - ZCLOSE;
4240 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004241 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004242 if (rp == NULL)
4243 status = RA_FAIL;
4244 else
4245 {
4246 rp->rs_no = no;
4247 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4248 &reg_endzp[no]);
4249 /* We simply continue and handle the result when done. */
4250 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004251 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004252 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004253#endif
4254
4255 case BACKREF + 1:
4256 case BACKREF + 2:
4257 case BACKREF + 3:
4258 case BACKREF + 4:
4259 case BACKREF + 5:
4260 case BACKREF + 6:
4261 case BACKREF + 7:
4262 case BACKREF + 8:
4263 case BACKREF + 9:
4264 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004265 int len;
4266 linenr_T clnum;
4267 colnr_T ccol;
4268 char_u *p;
4269
4270 no = op - BACKREF;
4271 cleanup_subexpr();
4272 if (!REG_MULTI) /* Single-line regexp */
4273 {
4274 if (reg_endp[no] == NULL)
4275 {
4276 /* Backref was not set: Match an empty string. */
4277 len = 0;
4278 }
4279 else
4280 {
4281 /* Compare current input with back-ref in the same
4282 * line. */
4283 len = (int)(reg_endp[no] - reg_startp[no]);
4284 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004285 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004286 }
4287 }
4288 else /* Multi-line regexp */
4289 {
4290 if (reg_endpos[no].lnum < 0)
4291 {
4292 /* Backref was not set: Match an empty string. */
4293 len = 0;
4294 }
4295 else
4296 {
4297 if (reg_startpos[no].lnum == reglnum
4298 && reg_endpos[no].lnum == reglnum)
4299 {
4300 /* Compare back-ref within the current line. */
4301 len = reg_endpos[no].col - reg_startpos[no].col;
4302 if (cstrncmp(regline + reg_startpos[no].col,
4303 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004304 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004305 }
4306 else
4307 {
4308 /* Messy situation: Need to compare between two
4309 * lines. */
4310 ccol = reg_startpos[no].col;
4311 clnum = reg_startpos[no].lnum;
4312 for (;;)
4313 {
4314 /* Since getting one line may invalidate
4315 * the other, need to make copy. Slow! */
4316 if (regline != reg_tofree)
4317 {
4318 len = (int)STRLEN(regline);
4319 if (reg_tofree == NULL
4320 || len >= (int)reg_tofreelen)
4321 {
4322 len += 50; /* get some extra */
4323 vim_free(reg_tofree);
4324 reg_tofree = alloc(len);
4325 if (reg_tofree == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004326 {
4327 status = RA_FAIL; /* outof memory!*/
4328 break;
4329 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004330 reg_tofreelen = len;
4331 }
4332 STRCPY(reg_tofree, regline);
4333 reginput = reg_tofree
4334 + (reginput - regline);
4335 regline = reg_tofree;
4336 }
4337
4338 /* Get the line to compare with. */
4339 p = reg_getline(clnum);
4340 if (clnum == reg_endpos[no].lnum)
4341 len = reg_endpos[no].col - ccol;
4342 else
4343 len = (int)STRLEN(p + ccol);
4344
4345 if (cstrncmp(p + ccol, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004346 {
4347 status = RA_NOMATCH; /* doesn't match */
4348 break;
4349 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004350 if (clnum == reg_endpos[no].lnum)
4351 break; /* match and at end! */
4352 if (reglnum == reg_maxline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004353 {
4354 status = RA_NOMATCH; /* text too short */
4355 break;
4356 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004357
4358 /* Advance to next line. */
4359 reg_nextline();
4360 ++clnum;
4361 ccol = 0;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004362 if (got_int)
4363 {
4364 status = RA_FAIL;
4365 break;
4366 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004367 }
4368
4369 /* found a match! Note that regline may now point
4370 * to a copy of the line, that should not matter. */
4371 }
4372 }
4373 }
4374
4375 /* Matched the backref, skip over it. */
4376 reginput += len;
4377 }
4378 break;
4379
4380#ifdef FEAT_SYN_HL
4381 case ZREF + 1:
4382 case ZREF + 2:
4383 case ZREF + 3:
4384 case ZREF + 4:
4385 case ZREF + 5:
4386 case ZREF + 6:
4387 case ZREF + 7:
4388 case ZREF + 8:
4389 case ZREF + 9:
4390 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004391 int len;
4392
4393 cleanup_zsubexpr();
4394 no = op - ZREF;
4395 if (re_extmatch_in != NULL
4396 && re_extmatch_in->matches[no] != NULL)
4397 {
4398 len = (int)STRLEN(re_extmatch_in->matches[no]);
4399 if (cstrncmp(re_extmatch_in->matches[no],
4400 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004401 status = RA_NOMATCH;
4402 else
4403 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004404 }
4405 else
4406 {
4407 /* Backref was not set: Match an empty string. */
4408 }
4409 }
4410 break;
4411#endif
4412
4413 case BRANCH:
4414 {
4415 if (OP(next) != BRANCH) /* No choice. */
4416 next = OPERAND(scan); /* Avoid recursion. */
4417 else
4418 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004419 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004420 if (rp == NULL)
4421 status = RA_FAIL;
4422 else
4423 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004424 }
4425 }
4426 break;
4427
4428 case BRACE_LIMITS:
4429 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004430 if (OP(next) == BRACE_SIMPLE)
4431 {
4432 bl_minval = OPERAND_MIN(scan);
4433 bl_maxval = OPERAND_MAX(scan);
4434 }
4435 else if (OP(next) >= BRACE_COMPLEX
4436 && OP(next) < BRACE_COMPLEX + 10)
4437 {
4438 no = OP(next) - BRACE_COMPLEX;
4439 brace_min[no] = OPERAND_MIN(scan);
4440 brace_max[no] = OPERAND_MAX(scan);
4441 brace_count[no] = 0;
4442 }
4443 else
4444 {
4445 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004446 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004447 }
4448 }
4449 break;
4450
4451 case BRACE_COMPLEX + 0:
4452 case BRACE_COMPLEX + 1:
4453 case BRACE_COMPLEX + 2:
4454 case BRACE_COMPLEX + 3:
4455 case BRACE_COMPLEX + 4:
4456 case BRACE_COMPLEX + 5:
4457 case BRACE_COMPLEX + 6:
4458 case BRACE_COMPLEX + 7:
4459 case BRACE_COMPLEX + 8:
4460 case BRACE_COMPLEX + 9:
4461 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004462 no = op - BRACE_COMPLEX;
4463 ++brace_count[no];
4464
4465 /* If not matched enough times yet, try one more */
4466 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004467 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004468 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004469 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004470 if (rp == NULL)
4471 status = RA_FAIL;
4472 else
4473 {
4474 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004475 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004476 next = OPERAND(scan);
4477 /* We continue and handle the result when done. */
4478 }
4479 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004480 }
4481
4482 /* If matched enough times, may try matching some more */
4483 if (brace_min[no] <= brace_max[no])
4484 {
4485 /* Range is the normal way around, use longest match */
4486 if (brace_count[no] <= brace_max[no])
4487 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004488 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004489 if (rp == NULL)
4490 status = RA_FAIL;
4491 else
4492 {
4493 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004494 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004495 next = OPERAND(scan);
4496 /* We continue and handle the result when done. */
4497 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004498 }
4499 }
4500 else
4501 {
4502 /* Range is backwards, use shortest match first */
4503 if (brace_count[no] <= brace_min[no])
4504 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004505 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004506 if (rp == NULL)
4507 status = RA_FAIL;
4508 else
4509 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004510 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004511 /* We continue and handle the result when done. */
4512 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004513 }
4514 }
4515 }
4516 break;
4517
4518 case BRACE_SIMPLE:
4519 case STAR:
4520 case PLUS:
4521 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004522 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004523
4524 /*
4525 * Lookahead to avoid useless match attempts when we know
4526 * what character comes next.
4527 */
4528 if (OP(next) == EXACTLY)
4529 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004530 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004531 if (ireg_ic)
4532 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004533 if (isupper(rst.nextb))
4534 rst.nextb_ic = TOLOWER_LOC(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004535 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004536 rst.nextb_ic = TOUPPER_LOC(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004537 }
4538 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004539 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004540 }
4541 else
4542 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004543 rst.nextb = NUL;
4544 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004545 }
4546 if (op != BRACE_SIMPLE)
4547 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004548 rst.minval = (op == STAR) ? 0 : 1;
4549 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004550 }
4551 else
4552 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004553 rst.minval = bl_minval;
4554 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004555 }
4556
4557 /*
4558 * When maxval > minval, try matching as much as possible, up
4559 * to maxval. When maxval < minval, try matching at least the
4560 * minimal number (since the range is backwards, that's also
4561 * maxval!).
4562 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004563 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004564 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004565 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004566 status = RA_FAIL;
4567 break;
4568 }
4569 if (rst.minval <= rst.maxval
4570 ? rst.count >= rst.minval : rst.count >= rst.maxval)
4571 {
4572 /* It could match. Prepare for trying to match what
4573 * follows. The code is below. Parameters are stored in
4574 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00004575 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004576 {
4577 EMSG(_(e_maxmempat));
4578 status = RA_FAIL;
4579 }
4580 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004581 status = RA_FAIL;
4582 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004583 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004584 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004585 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00004586 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004587 if (rp == NULL)
4588 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004589 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004590 {
4591 *(((regstar_T *)rp) - 1) = rst;
4592 status = RA_BREAK; /* skip the restore bits */
4593 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004594 }
4595 }
4596 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004597 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004598
Bram Moolenaar071d4272004-06-13 20:20:40 +00004599 }
4600 break;
4601
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004602 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00004603 case MATCH:
4604 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004605 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004606 if (rp == NULL)
4607 status = RA_FAIL;
4608 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004609 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004610 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004611 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004612 next = OPERAND(scan);
4613 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004614 }
4615 break;
4616
4617 case BEHIND:
4618 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004619 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00004620 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004621 {
4622 EMSG(_(e_maxmempat));
4623 status = RA_FAIL;
4624 }
4625 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004626 status = RA_FAIL;
4627 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004628 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004629 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004630 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004631 if (rp == NULL)
4632 status = RA_FAIL;
4633 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004634 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004635 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004636 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004637 /* First try if what follows matches. If it does then we
4638 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004639 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004640 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004641 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004642
4643 case BHPOS:
4644 if (REG_MULTI)
4645 {
4646 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4647 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004648 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004649 }
4650 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004651 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004652 break;
4653
4654 case NEWL:
4655 if ((c != NUL || reglnum == reg_maxline)
4656 && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004657 status = RA_NOMATCH;
4658 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004659 ADVANCE_REGINPUT();
4660 else
4661 reg_nextline();
4662 break;
4663
4664 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004665 status = RA_MATCH; /* Success! */
4666 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004667
4668 default:
4669 EMSG(_(e_re_corr));
4670#ifdef DEBUG
4671 printf("Illegal op code %d\n", op);
4672#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004673 status = RA_FAIL;
4674 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004675 }
4676 }
4677
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004678 /* If we can't continue sequentially, break the inner loop. */
4679 if (status != RA_CONT)
4680 break;
4681
4682 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004683 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004684
4685 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004686
4687 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004688 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00004689 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004690 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004691 while (regstack.ga_len > 0 && status != RA_FAIL)
4692 {
4693 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4694 switch (rp->rs_state)
4695 {
4696 case RS_NOPEN:
4697 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004698 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004699 break;
4700
4701 case RS_MOPEN:
4702 /* Pop the state. Restore pointers when there is no match. */
4703 if (status == RA_NOMATCH)
4704 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4705 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004706 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004707 break;
4708
4709#ifdef FEAT_SYN_HL
4710 case RS_ZOPEN:
4711 /* Pop the state. Restore pointers when there is no match. */
4712 if (status == RA_NOMATCH)
4713 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4714 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004715 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004716 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004717#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004718
4719 case RS_MCLOSE:
4720 /* Pop the state. Restore pointers when there is no match. */
4721 if (status == RA_NOMATCH)
4722 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
4723 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004724 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004725 break;
4726
4727#ifdef FEAT_SYN_HL
4728 case RS_ZCLOSE:
4729 /* Pop the state. Restore pointers when there is no match. */
4730 if (status == RA_NOMATCH)
4731 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4732 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004733 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004734 break;
4735#endif
4736
4737 case RS_BRANCH:
4738 if (status == RA_MATCH)
4739 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004740 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004741 else
4742 {
4743 if (status != RA_BREAK)
4744 {
4745 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004746 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004747 scan = rp->rs_scan;
4748 }
4749 if (scan == NULL || OP(scan) != BRANCH)
4750 {
4751 /* no more branches, didn't find a match */
4752 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004753 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004754 }
4755 else
4756 {
4757 /* Prepare to try a branch. */
4758 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00004759 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004760 scan = OPERAND(scan);
4761 }
4762 }
4763 break;
4764
4765 case RS_BRCPLX_MORE:
4766 /* Pop the state. Restore pointers when there is no match. */
4767 if (status == RA_NOMATCH)
4768 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004769 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004770 --brace_count[rp->rs_no]; /* decrement match count */
4771 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004772 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004773 break;
4774
4775 case RS_BRCPLX_LONG:
4776 /* Pop the state. Restore pointers when there is no match. */
4777 if (status == RA_NOMATCH)
4778 {
4779 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004780 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004781 --brace_count[rp->rs_no];
4782 /* continue with the items after "\{}" */
4783 status = RA_CONT;
4784 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004785 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004786 if (status == RA_CONT)
4787 scan = regnext(scan);
4788 break;
4789
4790 case RS_BRCPLX_SHORT:
4791 /* Pop the state. Restore pointers when there is no match. */
4792 if (status == RA_NOMATCH)
4793 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004794 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004795 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004796 if (status == RA_NOMATCH)
4797 {
4798 scan = OPERAND(scan);
4799 status = RA_CONT;
4800 }
4801 break;
4802
4803 case RS_NOMATCH:
4804 /* Pop the state. If the operand matches for NOMATCH or
4805 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
4806 * except for SUBPAT, and continue with the next item. */
4807 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
4808 status = RA_NOMATCH;
4809 else
4810 {
4811 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004812 if (rp->rs_no != SUBPAT) /* zero-width */
4813 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004814 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004815 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004816 if (status == RA_CONT)
4817 scan = regnext(scan);
4818 break;
4819
4820 case RS_BEHIND1:
4821 if (status == RA_NOMATCH)
4822 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004823 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004824 regstack.ga_len -= sizeof(regbehind_T);
4825 }
4826 else
4827 {
4828 /* The stuff after BEHIND/NOBEHIND matches. Now try if
4829 * the behind part does (not) match before the current
4830 * position in the input. This must be done at every
4831 * position in the input and checking if the match ends at
4832 * the current position. */
4833
4834 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004835 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004836
4837 /* start looking for a match with operand at the current
4838 * postion. Go back one character until we find the
4839 * result, hitting the start of the line or the previous
4840 * line (for multi-line matching).
4841 * Set behind_pos to where the match should end, BHPOS
4842 * will match it. Save the current value. */
4843 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
4844 behind_pos = rp->rs_un.regsave;
4845
4846 rp->rs_state = RS_BEHIND2;
4847
Bram Moolenaar582fd852005-03-28 20:58:01 +00004848 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004849 scan = OPERAND(rp->rs_scan);
4850 }
4851 break;
4852
4853 case RS_BEHIND2:
4854 /*
4855 * Looping for BEHIND / NOBEHIND match.
4856 */
4857 if (status == RA_MATCH && reg_save_equal(&behind_pos))
4858 {
4859 /* found a match that ends where "next" started */
4860 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4861 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00004862 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4863 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004864 else
4865 /* But we didn't want a match. */
4866 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004867 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004868 regstack.ga_len -= sizeof(regbehind_T);
4869 }
4870 else
4871 {
4872 /* No match: Go back one character. May go to previous
4873 * line once. */
4874 no = OK;
4875 if (REG_MULTI)
4876 {
4877 if (rp->rs_un.regsave.rs_u.pos.col == 0)
4878 {
4879 if (rp->rs_un.regsave.rs_u.pos.lnum
4880 < behind_pos.rs_u.pos.lnum
4881 || reg_getline(
4882 --rp->rs_un.regsave.rs_u.pos.lnum)
4883 == NULL)
4884 no = FAIL;
4885 else
4886 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004887 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004888 rp->rs_un.regsave.rs_u.pos.col =
4889 (colnr_T)STRLEN(regline);
4890 }
4891 }
4892 else
4893 --rp->rs_un.regsave.rs_u.pos.col;
4894 }
4895 else
4896 {
4897 if (rp->rs_un.regsave.rs_u.ptr == regline)
4898 no = FAIL;
4899 else
4900 --rp->rs_un.regsave.rs_u.ptr;
4901 }
4902 if (no == OK)
4903 {
4904 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004905 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004906 scan = OPERAND(rp->rs_scan);
4907 }
4908 else
4909 {
4910 /* Can't advance. For NOBEHIND that's a match. */
4911 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4912 if (rp->rs_no == NOBEHIND)
4913 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004914 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4915 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004916 status = RA_MATCH;
4917 }
4918 else
4919 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004920 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004921 regstack.ga_len -= sizeof(regbehind_T);
4922 }
4923 }
4924 break;
4925
4926 case RS_STAR_LONG:
4927 case RS_STAR_SHORT:
4928 {
4929 regstar_T *rst = ((regstar_T *)rp) - 1;
4930
4931 if (status == RA_MATCH)
4932 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004933 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004934 regstack.ga_len -= sizeof(regstar_T);
4935 break;
4936 }
4937
4938 /* Tried once already, restore input pointers. */
4939 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00004940 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004941
4942 /* Repeat until we found a position where it could match. */
4943 for (;;)
4944 {
4945 if (status != RA_BREAK)
4946 {
4947 /* Tried first position already, advance. */
4948 if (rp->rs_state == RS_STAR_LONG)
4949 {
4950 /* Trying for longest matc, but couldn't or didn't
4951 * match -- back up one char. */
4952 if (--rst->count < rst->minval)
4953 break;
4954 if (reginput == regline)
4955 {
4956 /* backup to last char of previous line */
4957 --reglnum;
4958 regline = reg_getline(reglnum);
4959 /* Just in case regrepeat() didn't count
4960 * right. */
4961 if (regline == NULL)
4962 break;
4963 reginput = regline + STRLEN(regline);
4964 fast_breakcheck();
4965 }
4966 else
4967 mb_ptr_back(regline, reginput);
4968 }
4969 else
4970 {
4971 /* Range is backwards, use shortest match first.
4972 * Careful: maxval and minval are exchanged!
4973 * Couldn't or didn't match: try advancing one
4974 * char. */
4975 if (rst->count == rst->minval
4976 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
4977 break;
4978 ++rst->count;
4979 }
4980 if (got_int)
4981 break;
4982 }
4983 else
4984 status = RA_NOMATCH;
4985
4986 /* If it could match, try it. */
4987 if (rst->nextb == NUL || *reginput == rst->nextb
4988 || *reginput == rst->nextb_ic)
4989 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004990 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004991 scan = regnext(rp->rs_scan);
4992 status = RA_CONT;
4993 break;
4994 }
4995 }
4996 if (status != RA_CONT)
4997 {
4998 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004999 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005000 regstack.ga_len -= sizeof(regstar_T);
5001 status = RA_NOMATCH;
5002 }
5003 }
5004 break;
5005 }
5006
5007 /* If we want to continue the inner loop or didn't pop a state contine
5008 * matching loop */
5009 if (status == RA_CONT || rp == (regitem_T *)
5010 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5011 break;
5012 }
5013
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005014 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005015 if (status == RA_CONT)
5016 continue;
5017
5018 /*
5019 * If the regstack is empty or something failed we are done.
5020 */
5021 if (regstack.ga_len == 0 || status == RA_FAIL)
5022 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005023 if (scan == NULL)
5024 {
5025 /*
5026 * We get here only if there's trouble -- normally "case END" is
5027 * the terminating point.
5028 */
5029 EMSG(_(e_re_corr));
5030#ifdef DEBUG
5031 printf("Premature EOL\n");
5032#endif
5033 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005034 if (status == RA_FAIL)
5035 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005036 return (status == RA_MATCH);
5037 }
5038
5039 } /* End of loop until the regstack is empty. */
5040
5041 /* NOTREACHED */
5042}
5043
5044/*
5045 * Push an item onto the regstack.
5046 * Returns pointer to new item. Returns NULL when out of memory.
5047 */
5048 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005049regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005050 regstate_T state;
5051 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005052{
5053 regitem_T *rp;
5054
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005055 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005056 {
5057 EMSG(_(e_maxmempat));
5058 return NULL;
5059 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005060 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005061 return NULL;
5062
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005063 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005064 rp->rs_state = state;
5065 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005066
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005067 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005068 return rp;
5069}
5070
5071/*
5072 * Pop an item from the regstack.
5073 */
5074 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005075regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005076 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005077{
5078 regitem_T *rp;
5079
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005080 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005081 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005082
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005083 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005084}
5085
Bram Moolenaar071d4272004-06-13 20:20:40 +00005086/*
5087 * regrepeat - repeatedly match something simple, return how many.
5088 * Advances reginput (and reglnum) to just after the matched chars.
5089 */
5090 static int
5091regrepeat(p, maxcount)
5092 char_u *p;
5093 long maxcount; /* maximum number of matches allowed */
5094{
5095 long count = 0;
5096 char_u *scan;
5097 char_u *opnd;
5098 int mask;
5099 int testval = 0;
5100
5101 scan = reginput; /* Make local copy of reginput for speed. */
5102 opnd = OPERAND(p);
5103 switch (OP(p))
5104 {
5105 case ANY:
5106 case ANY + ADD_NL:
5107 while (count < maxcount)
5108 {
5109 /* Matching anything means we continue until end-of-line (or
5110 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5111 while (*scan != NUL && count < maxcount)
5112 {
5113 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005114 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005115 }
5116 if (!WITH_NL(OP(p)) || reglnum == reg_maxline || count == maxcount)
5117 break;
5118 ++count; /* count the line-break */
5119 reg_nextline();
5120 scan = reginput;
5121 if (got_int)
5122 break;
5123 }
5124 break;
5125
5126 case IDENT:
5127 case IDENT + ADD_NL:
5128 testval = TRUE;
5129 /*FALLTHROUGH*/
5130 case SIDENT:
5131 case SIDENT + ADD_NL:
5132 while (count < maxcount)
5133 {
5134 if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5135 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005136 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005137 }
5138 else if (*scan == NUL)
5139 {
5140 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5141 break;
5142 reg_nextline();
5143 scan = reginput;
5144 if (got_int)
5145 break;
5146 }
5147 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5148 ++scan;
5149 else
5150 break;
5151 ++count;
5152 }
5153 break;
5154
5155 case KWORD:
5156 case KWORD + ADD_NL:
5157 testval = TRUE;
5158 /*FALLTHROUGH*/
5159 case SKWORD:
5160 case SKWORD + ADD_NL:
5161 while (count < maxcount)
5162 {
5163 if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5164 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005165 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005166 }
5167 else if (*scan == NUL)
5168 {
5169 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5170 break;
5171 reg_nextline();
5172 scan = reginput;
5173 if (got_int)
5174 break;
5175 }
5176 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5177 ++scan;
5178 else
5179 break;
5180 ++count;
5181 }
5182 break;
5183
5184 case FNAME:
5185 case FNAME + ADD_NL:
5186 testval = TRUE;
5187 /*FALLTHROUGH*/
5188 case SFNAME:
5189 case SFNAME + ADD_NL:
5190 while (count < maxcount)
5191 {
5192 if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5193 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005194 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005195 }
5196 else if (*scan == NUL)
5197 {
5198 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5199 break;
5200 reg_nextline();
5201 scan = reginput;
5202 if (got_int)
5203 break;
5204 }
5205 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5206 ++scan;
5207 else
5208 break;
5209 ++count;
5210 }
5211 break;
5212
5213 case PRINT:
5214 case PRINT + ADD_NL:
5215 testval = TRUE;
5216 /*FALLTHROUGH*/
5217 case SPRINT:
5218 case SPRINT + ADD_NL:
5219 while (count < maxcount)
5220 {
5221 if (*scan == NUL)
5222 {
5223 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5224 break;
5225 reg_nextline();
5226 scan = reginput;
5227 if (got_int)
5228 break;
5229 }
5230 else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5231 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005232 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005233 }
5234 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5235 ++scan;
5236 else
5237 break;
5238 ++count;
5239 }
5240 break;
5241
5242 case WHITE:
5243 case WHITE + ADD_NL:
5244 testval = mask = RI_WHITE;
5245do_class:
5246 while (count < maxcount)
5247 {
5248#ifdef FEAT_MBYTE
5249 int l;
5250#endif
5251 if (*scan == NUL)
5252 {
5253 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5254 break;
5255 reg_nextline();
5256 scan = reginput;
5257 if (got_int)
5258 break;
5259 }
5260#ifdef FEAT_MBYTE
5261 else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
5262 {
5263 if (testval != 0)
5264 break;
5265 scan += l;
5266 }
5267#endif
5268 else if ((class_tab[*scan] & mask) == testval)
5269 ++scan;
5270 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5271 ++scan;
5272 else
5273 break;
5274 ++count;
5275 }
5276 break;
5277
5278 case NWHITE:
5279 case NWHITE + ADD_NL:
5280 mask = RI_WHITE;
5281 goto do_class;
5282 case DIGIT:
5283 case DIGIT + ADD_NL:
5284 testval = mask = RI_DIGIT;
5285 goto do_class;
5286 case NDIGIT:
5287 case NDIGIT + ADD_NL:
5288 mask = RI_DIGIT;
5289 goto do_class;
5290 case HEX:
5291 case HEX + ADD_NL:
5292 testval = mask = RI_HEX;
5293 goto do_class;
5294 case NHEX:
5295 case NHEX + ADD_NL:
5296 mask = RI_HEX;
5297 goto do_class;
5298 case OCTAL:
5299 case OCTAL + ADD_NL:
5300 testval = mask = RI_OCTAL;
5301 goto do_class;
5302 case NOCTAL:
5303 case NOCTAL + ADD_NL:
5304 mask = RI_OCTAL;
5305 goto do_class;
5306 case WORD:
5307 case WORD + ADD_NL:
5308 testval = mask = RI_WORD;
5309 goto do_class;
5310 case NWORD:
5311 case NWORD + ADD_NL:
5312 mask = RI_WORD;
5313 goto do_class;
5314 case HEAD:
5315 case HEAD + ADD_NL:
5316 testval = mask = RI_HEAD;
5317 goto do_class;
5318 case NHEAD:
5319 case NHEAD + ADD_NL:
5320 mask = RI_HEAD;
5321 goto do_class;
5322 case ALPHA:
5323 case ALPHA + ADD_NL:
5324 testval = mask = RI_ALPHA;
5325 goto do_class;
5326 case NALPHA:
5327 case NALPHA + ADD_NL:
5328 mask = RI_ALPHA;
5329 goto do_class;
5330 case LOWER:
5331 case LOWER + ADD_NL:
5332 testval = mask = RI_LOWER;
5333 goto do_class;
5334 case NLOWER:
5335 case NLOWER + ADD_NL:
5336 mask = RI_LOWER;
5337 goto do_class;
5338 case UPPER:
5339 case UPPER + ADD_NL:
5340 testval = mask = RI_UPPER;
5341 goto do_class;
5342 case NUPPER:
5343 case NUPPER + ADD_NL:
5344 mask = RI_UPPER;
5345 goto do_class;
5346
5347 case EXACTLY:
5348 {
5349 int cu, cl;
5350
5351 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5352 * would have been used for it. */
5353 if (ireg_ic)
5354 {
5355 cu = TOUPPER_LOC(*opnd);
5356 cl = TOLOWER_LOC(*opnd);
5357 while (count < maxcount && (*scan == cu || *scan == cl))
5358 {
5359 count++;
5360 scan++;
5361 }
5362 }
5363 else
5364 {
5365 cu = *opnd;
5366 while (count < maxcount && *scan == cu)
5367 {
5368 count++;
5369 scan++;
5370 }
5371 }
5372 break;
5373 }
5374
5375#ifdef FEAT_MBYTE
5376 case MULTIBYTECODE:
5377 {
5378 int i, len, cf = 0;
5379
5380 /* Safety check (just in case 'encoding' was changed since
5381 * compiling the program). */
5382 if ((len = (*mb_ptr2len_check)(opnd)) > 1)
5383 {
5384 if (ireg_ic && enc_utf8)
5385 cf = utf_fold(utf_ptr2char(opnd));
5386 while (count < maxcount)
5387 {
5388 for (i = 0; i < len; ++i)
5389 if (opnd[i] != scan[i])
5390 break;
5391 if (i < len && (!ireg_ic || !enc_utf8
5392 || utf_fold(utf_ptr2char(scan)) != cf))
5393 break;
5394 scan += len;
5395 ++count;
5396 }
5397 }
5398 }
5399 break;
5400#endif
5401
5402 case ANYOF:
5403 case ANYOF + ADD_NL:
5404 testval = TRUE;
5405 /*FALLTHROUGH*/
5406
5407 case ANYBUT:
5408 case ANYBUT + ADD_NL:
5409 while (count < maxcount)
5410 {
5411#ifdef FEAT_MBYTE
5412 int len;
5413#endif
5414 if (*scan == NUL)
5415 {
5416 if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
5417 break;
5418 reg_nextline();
5419 scan = reginput;
5420 if (got_int)
5421 break;
5422 }
5423 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5424 ++scan;
5425#ifdef FEAT_MBYTE
5426 else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
5427 {
5428 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5429 break;
5430 scan += len;
5431 }
5432#endif
5433 else
5434 {
5435 if ((cstrchr(opnd, *scan) == NULL) == testval)
5436 break;
5437 ++scan;
5438 }
5439 ++count;
5440 }
5441 break;
5442
5443 case NEWL:
5444 while (count < maxcount
5445 && ((*scan == NUL && reglnum < reg_maxline)
5446 || (*scan == '\n' && reg_line_lbr)))
5447 {
5448 count++;
5449 if (reg_line_lbr)
5450 ADVANCE_REGINPUT();
5451 else
5452 reg_nextline();
5453 scan = reginput;
5454 if (got_int)
5455 break;
5456 }
5457 break;
5458
5459 default: /* Oh dear. Called inappropriately. */
5460 EMSG(_(e_re_corr));
5461#ifdef DEBUG
5462 printf("Called regrepeat with op code %d\n", OP(p));
5463#endif
5464 break;
5465 }
5466
5467 reginput = scan;
5468
5469 return (int)count;
5470}
5471
5472/*
5473 * regnext - dig the "next" pointer out of a node
5474 */
5475 static char_u *
5476regnext(p)
5477 char_u *p;
5478{
5479 int offset;
5480
5481 if (p == JUST_CALC_SIZE)
5482 return NULL;
5483
5484 offset = NEXT(p);
5485 if (offset == 0)
5486 return NULL;
5487
Bram Moolenaar582fd852005-03-28 20:58:01 +00005488 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005489 return p - offset;
5490 else
5491 return p + offset;
5492}
5493
5494/*
5495 * Check the regexp program for its magic number.
5496 * Return TRUE if it's wrong.
5497 */
5498 static int
5499prog_magic_wrong()
5500{
5501 if (UCHARAT(REG_MULTI
5502 ? reg_mmatch->regprog->program
5503 : reg_match->regprog->program) != REGMAGIC)
5504 {
5505 EMSG(_(e_re_corr));
5506 return TRUE;
5507 }
5508 return FALSE;
5509}
5510
5511/*
5512 * Cleanup the subexpressions, if this wasn't done yet.
5513 * This construction is used to clear the subexpressions only when they are
5514 * used (to increase speed).
5515 */
5516 static void
5517cleanup_subexpr()
5518{
5519 if (need_clear_subexpr)
5520 {
5521 if (REG_MULTI)
5522 {
5523 /* Use 0xff to set lnum to -1 */
5524 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5525 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5526 }
5527 else
5528 {
5529 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5530 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5531 }
5532 need_clear_subexpr = FALSE;
5533 }
5534}
5535
5536#ifdef FEAT_SYN_HL
5537 static void
5538cleanup_zsubexpr()
5539{
5540 if (need_clear_zsubexpr)
5541 {
5542 if (REG_MULTI)
5543 {
5544 /* Use 0xff to set lnum to -1 */
5545 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5546 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5547 }
5548 else
5549 {
5550 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5551 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5552 }
5553 need_clear_zsubexpr = FALSE;
5554 }
5555}
5556#endif
5557
5558/*
5559 * Advance reglnum, regline and reginput to the next line.
5560 */
5561 static void
5562reg_nextline()
5563{
5564 regline = reg_getline(++reglnum);
5565 reginput = regline;
5566 fast_breakcheck();
5567}
5568
5569/*
5570 * Save the input line and position in a regsave_T.
5571 */
5572 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00005573reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005574 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005575 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005576{
5577 if (REG_MULTI)
5578 {
5579 save->rs_u.pos.col = (colnr_T)(reginput - regline);
5580 save->rs_u.pos.lnum = reglnum;
5581 }
5582 else
5583 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005584 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005585}
5586
5587/*
5588 * Restore the input line and position from a regsave_T.
5589 */
5590 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00005591reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005592 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005593 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005594{
5595 if (REG_MULTI)
5596 {
5597 if (reglnum != save->rs_u.pos.lnum)
5598 {
5599 /* only call reg_getline() when the line number changed to save
5600 * a bit of time */
5601 reglnum = save->rs_u.pos.lnum;
5602 regline = reg_getline(reglnum);
5603 }
5604 reginput = regline + save->rs_u.pos.col;
5605 }
5606 else
5607 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005608 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005609}
5610
5611/*
5612 * Return TRUE if current position is equal to saved position.
5613 */
5614 static int
5615reg_save_equal(save)
5616 regsave_T *save;
5617{
5618 if (REG_MULTI)
5619 return reglnum == save->rs_u.pos.lnum
5620 && reginput == regline + save->rs_u.pos.col;
5621 return reginput == save->rs_u.ptr;
5622}
5623
5624/*
5625 * Tentatively set the sub-expression start to the current position (after
5626 * calling regmatch() they will have changed). Need to save the existing
5627 * values for when there is no match.
5628 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5629 * depending on REG_MULTI.
5630 */
5631 static void
5632save_se_multi(savep, posp)
5633 save_se_T *savep;
5634 lpos_T *posp;
5635{
5636 savep->se_u.pos = *posp;
5637 posp->lnum = reglnum;
5638 posp->col = (colnr_T)(reginput - regline);
5639}
5640
5641 static void
5642save_se_one(savep, pp)
5643 save_se_T *savep;
5644 char_u **pp;
5645{
5646 savep->se_u.ptr = *pp;
5647 *pp = reginput;
5648}
5649
5650/*
5651 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
5652 */
5653 static int
5654re_num_cmp(val, scan)
5655 long_u val;
5656 char_u *scan;
5657{
5658 long_u n = OPERAND_MIN(scan);
5659
5660 if (OPERAND_CMP(scan) == '>')
5661 return val > n;
5662 if (OPERAND_CMP(scan) == '<')
5663 return val < n;
5664 return val == n;
5665}
5666
5667
5668#ifdef DEBUG
5669
5670/*
5671 * regdump - dump a regexp onto stdout in vaguely comprehensible form
5672 */
5673 static void
5674regdump(pattern, r)
5675 char_u *pattern;
5676 regprog_T *r;
5677{
5678 char_u *s;
5679 int op = EXACTLY; /* Arbitrary non-END op. */
5680 char_u *next;
5681 char_u *end = NULL;
5682
5683 printf("\r\nregcomp(%s):\r\n", pattern);
5684
5685 s = r->program + 1;
5686 /*
5687 * Loop until we find the END that isn't before a referred next (an END
5688 * can also appear in a NOMATCH operand).
5689 */
5690 while (op != END || s <= end)
5691 {
5692 op = OP(s);
5693 printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
5694 next = regnext(s);
5695 if (next == NULL) /* Next ptr. */
5696 printf("(0)");
5697 else
5698 printf("(%d)", (int)((s - r->program) + (next - s)));
5699 if (end < next)
5700 end = next;
5701 if (op == BRACE_LIMITS)
5702 {
5703 /* Two short ints */
5704 printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
5705 s += 8;
5706 }
5707 s += 3;
5708 if (op == ANYOF || op == ANYOF + ADD_NL
5709 || op == ANYBUT || op == ANYBUT + ADD_NL
5710 || op == EXACTLY)
5711 {
5712 /* Literal string, where present. */
5713 while (*s != NUL)
5714 printf("%c", *s++);
5715 s++;
5716 }
5717 printf("\r\n");
5718 }
5719
5720 /* Header fields of interest. */
5721 if (r->regstart != NUL)
5722 printf("start `%s' 0x%x; ", r->regstart < 256
5723 ? (char *)transchar(r->regstart)
5724 : "multibyte", r->regstart);
5725 if (r->reganch)
5726 printf("anchored; ");
5727 if (r->regmust != NULL)
5728 printf("must have \"%s\"", r->regmust);
5729 printf("\r\n");
5730}
5731
5732/*
5733 * regprop - printable representation of opcode
5734 */
5735 static char_u *
5736regprop(op)
5737 char_u *op;
5738{
5739 char_u *p;
5740 static char_u buf[50];
5741
5742 (void) strcpy(buf, ":");
5743
5744 switch (OP(op))
5745 {
5746 case BOL:
5747 p = "BOL";
5748 break;
5749 case EOL:
5750 p = "EOL";
5751 break;
5752 case RE_BOF:
5753 p = "BOF";
5754 break;
5755 case RE_EOF:
5756 p = "EOF";
5757 break;
5758 case CURSOR:
5759 p = "CURSOR";
5760 break;
5761 case RE_LNUM:
5762 p = "RE_LNUM";
5763 break;
5764 case RE_COL:
5765 p = "RE_COL";
5766 break;
5767 case RE_VCOL:
5768 p = "RE_VCOL";
5769 break;
5770 case BOW:
5771 p = "BOW";
5772 break;
5773 case EOW:
5774 p = "EOW";
5775 break;
5776 case ANY:
5777 p = "ANY";
5778 break;
5779 case ANY + ADD_NL:
5780 p = "ANY+NL";
5781 break;
5782 case ANYOF:
5783 p = "ANYOF";
5784 break;
5785 case ANYOF + ADD_NL:
5786 p = "ANYOF+NL";
5787 break;
5788 case ANYBUT:
5789 p = "ANYBUT";
5790 break;
5791 case ANYBUT + ADD_NL:
5792 p = "ANYBUT+NL";
5793 break;
5794 case IDENT:
5795 p = "IDENT";
5796 break;
5797 case IDENT + ADD_NL:
5798 p = "IDENT+NL";
5799 break;
5800 case SIDENT:
5801 p = "SIDENT";
5802 break;
5803 case SIDENT + ADD_NL:
5804 p = "SIDENT+NL";
5805 break;
5806 case KWORD:
5807 p = "KWORD";
5808 break;
5809 case KWORD + ADD_NL:
5810 p = "KWORD+NL";
5811 break;
5812 case SKWORD:
5813 p = "SKWORD";
5814 break;
5815 case SKWORD + ADD_NL:
5816 p = "SKWORD+NL";
5817 break;
5818 case FNAME:
5819 p = "FNAME";
5820 break;
5821 case FNAME + ADD_NL:
5822 p = "FNAME+NL";
5823 break;
5824 case SFNAME:
5825 p = "SFNAME";
5826 break;
5827 case SFNAME + ADD_NL:
5828 p = "SFNAME+NL";
5829 break;
5830 case PRINT:
5831 p = "PRINT";
5832 break;
5833 case PRINT + ADD_NL:
5834 p = "PRINT+NL";
5835 break;
5836 case SPRINT:
5837 p = "SPRINT";
5838 break;
5839 case SPRINT + ADD_NL:
5840 p = "SPRINT+NL";
5841 break;
5842 case WHITE:
5843 p = "WHITE";
5844 break;
5845 case WHITE + ADD_NL:
5846 p = "WHITE+NL";
5847 break;
5848 case NWHITE:
5849 p = "NWHITE";
5850 break;
5851 case NWHITE + ADD_NL:
5852 p = "NWHITE+NL";
5853 break;
5854 case DIGIT:
5855 p = "DIGIT";
5856 break;
5857 case DIGIT + ADD_NL:
5858 p = "DIGIT+NL";
5859 break;
5860 case NDIGIT:
5861 p = "NDIGIT";
5862 break;
5863 case NDIGIT + ADD_NL:
5864 p = "NDIGIT+NL";
5865 break;
5866 case HEX:
5867 p = "HEX";
5868 break;
5869 case HEX + ADD_NL:
5870 p = "HEX+NL";
5871 break;
5872 case NHEX:
5873 p = "NHEX";
5874 break;
5875 case NHEX + ADD_NL:
5876 p = "NHEX+NL";
5877 break;
5878 case OCTAL:
5879 p = "OCTAL";
5880 break;
5881 case OCTAL + ADD_NL:
5882 p = "OCTAL+NL";
5883 break;
5884 case NOCTAL:
5885 p = "NOCTAL";
5886 break;
5887 case NOCTAL + ADD_NL:
5888 p = "NOCTAL+NL";
5889 break;
5890 case WORD:
5891 p = "WORD";
5892 break;
5893 case WORD + ADD_NL:
5894 p = "WORD+NL";
5895 break;
5896 case NWORD:
5897 p = "NWORD";
5898 break;
5899 case NWORD + ADD_NL:
5900 p = "NWORD+NL";
5901 break;
5902 case HEAD:
5903 p = "HEAD";
5904 break;
5905 case HEAD + ADD_NL:
5906 p = "HEAD+NL";
5907 break;
5908 case NHEAD:
5909 p = "NHEAD";
5910 break;
5911 case NHEAD + ADD_NL:
5912 p = "NHEAD+NL";
5913 break;
5914 case ALPHA:
5915 p = "ALPHA";
5916 break;
5917 case ALPHA + ADD_NL:
5918 p = "ALPHA+NL";
5919 break;
5920 case NALPHA:
5921 p = "NALPHA";
5922 break;
5923 case NALPHA + ADD_NL:
5924 p = "NALPHA+NL";
5925 break;
5926 case LOWER:
5927 p = "LOWER";
5928 break;
5929 case LOWER + ADD_NL:
5930 p = "LOWER+NL";
5931 break;
5932 case NLOWER:
5933 p = "NLOWER";
5934 break;
5935 case NLOWER + ADD_NL:
5936 p = "NLOWER+NL";
5937 break;
5938 case UPPER:
5939 p = "UPPER";
5940 break;
5941 case UPPER + ADD_NL:
5942 p = "UPPER+NL";
5943 break;
5944 case NUPPER:
5945 p = "NUPPER";
5946 break;
5947 case NUPPER + ADD_NL:
5948 p = "NUPPER+NL";
5949 break;
5950 case BRANCH:
5951 p = "BRANCH";
5952 break;
5953 case EXACTLY:
5954 p = "EXACTLY";
5955 break;
5956 case NOTHING:
5957 p = "NOTHING";
5958 break;
5959 case BACK:
5960 p = "BACK";
5961 break;
5962 case END:
5963 p = "END";
5964 break;
5965 case MOPEN + 0:
5966 p = "MATCH START";
5967 break;
5968 case MOPEN + 1:
5969 case MOPEN + 2:
5970 case MOPEN + 3:
5971 case MOPEN + 4:
5972 case MOPEN + 5:
5973 case MOPEN + 6:
5974 case MOPEN + 7:
5975 case MOPEN + 8:
5976 case MOPEN + 9:
5977 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
5978 p = NULL;
5979 break;
5980 case MCLOSE + 0:
5981 p = "MATCH END";
5982 break;
5983 case MCLOSE + 1:
5984 case MCLOSE + 2:
5985 case MCLOSE + 3:
5986 case MCLOSE + 4:
5987 case MCLOSE + 5:
5988 case MCLOSE + 6:
5989 case MCLOSE + 7:
5990 case MCLOSE + 8:
5991 case MCLOSE + 9:
5992 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
5993 p = NULL;
5994 break;
5995 case BACKREF + 1:
5996 case BACKREF + 2:
5997 case BACKREF + 3:
5998 case BACKREF + 4:
5999 case BACKREF + 5:
6000 case BACKREF + 6:
6001 case BACKREF + 7:
6002 case BACKREF + 8:
6003 case BACKREF + 9:
6004 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6005 p = NULL;
6006 break;
6007 case NOPEN:
6008 p = "NOPEN";
6009 break;
6010 case NCLOSE:
6011 p = "NCLOSE";
6012 break;
6013#ifdef FEAT_SYN_HL
6014 case ZOPEN + 1:
6015 case ZOPEN + 2:
6016 case ZOPEN + 3:
6017 case ZOPEN + 4:
6018 case ZOPEN + 5:
6019 case ZOPEN + 6:
6020 case ZOPEN + 7:
6021 case ZOPEN + 8:
6022 case ZOPEN + 9:
6023 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6024 p = NULL;
6025 break;
6026 case ZCLOSE + 1:
6027 case ZCLOSE + 2:
6028 case ZCLOSE + 3:
6029 case ZCLOSE + 4:
6030 case ZCLOSE + 5:
6031 case ZCLOSE + 6:
6032 case ZCLOSE + 7:
6033 case ZCLOSE + 8:
6034 case ZCLOSE + 9:
6035 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6036 p = NULL;
6037 break;
6038 case ZREF + 1:
6039 case ZREF + 2:
6040 case ZREF + 3:
6041 case ZREF + 4:
6042 case ZREF + 5:
6043 case ZREF + 6:
6044 case ZREF + 7:
6045 case ZREF + 8:
6046 case ZREF + 9:
6047 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6048 p = NULL;
6049 break;
6050#endif
6051 case STAR:
6052 p = "STAR";
6053 break;
6054 case PLUS:
6055 p = "PLUS";
6056 break;
6057 case NOMATCH:
6058 p = "NOMATCH";
6059 break;
6060 case MATCH:
6061 p = "MATCH";
6062 break;
6063 case BEHIND:
6064 p = "BEHIND";
6065 break;
6066 case NOBEHIND:
6067 p = "NOBEHIND";
6068 break;
6069 case SUBPAT:
6070 p = "SUBPAT";
6071 break;
6072 case BRACE_LIMITS:
6073 p = "BRACE_LIMITS";
6074 break;
6075 case BRACE_SIMPLE:
6076 p = "BRACE_SIMPLE";
6077 break;
6078 case BRACE_COMPLEX + 0:
6079 case BRACE_COMPLEX + 1:
6080 case BRACE_COMPLEX + 2:
6081 case BRACE_COMPLEX + 3:
6082 case BRACE_COMPLEX + 4:
6083 case BRACE_COMPLEX + 5:
6084 case BRACE_COMPLEX + 6:
6085 case BRACE_COMPLEX + 7:
6086 case BRACE_COMPLEX + 8:
6087 case BRACE_COMPLEX + 9:
6088 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6089 p = NULL;
6090 break;
6091#ifdef FEAT_MBYTE
6092 case MULTIBYTECODE:
6093 p = "MULTIBYTECODE";
6094 break;
6095#endif
6096 case NEWL:
6097 p = "NEWL";
6098 break;
6099 default:
6100 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6101 p = NULL;
6102 break;
6103 }
6104 if (p != NULL)
6105 (void) strcat(buf, p);
6106 return buf;
6107}
6108#endif
6109
6110#ifdef FEAT_MBYTE
6111static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6112
6113typedef struct
6114{
6115 int a, b, c;
6116} decomp_T;
6117
6118
6119/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006120static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006121{
6122 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6123 {0x5d0,0,0}, /* 0xfb21 alt alef */
6124 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6125 {0x5d4,0,0}, /* 0xfb23 alt he */
6126 {0x5db,0,0}, /* 0xfb24 alt kaf */
6127 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6128 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6129 {0x5e8,0,0}, /* 0xfb27 alt resh */
6130 {0x5ea,0,0}, /* 0xfb28 alt tav */
6131 {'+', 0, 0}, /* 0xfb29 alt plus */
6132 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6133 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6134 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6135 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6136 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6137 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6138 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6139 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6140 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6141 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6142 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6143 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6144 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
6145 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
6146 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
6147 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
6148 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
6149 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
6150 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
6151 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
6152 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
6153 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
6154 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
6155 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
6156 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
6157 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
6158 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
6159 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
6160 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
6161 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
6162 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
6163 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
6164 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
6165 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
6166 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
6167 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
6168 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
6169 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
6170};
6171
6172 static void
6173mb_decompose(c, c1, c2, c3)
6174 int c, *c1, *c2, *c3;
6175{
6176 decomp_T d;
6177
6178 if (c >= 0x4b20 && c <= 0xfb4f)
6179 {
6180 d = decomp_table[c - 0xfb20];
6181 *c1 = d.a;
6182 *c2 = d.b;
6183 *c3 = d.c;
6184 }
6185 else
6186 {
6187 *c1 = c;
6188 *c2 = *c3 = 0;
6189 }
6190}
6191#endif
6192
6193/*
6194 * Compare two strings, ignore case if ireg_ic set.
6195 * Return 0 if strings match, non-zero otherwise.
6196 * Correct the length "*n" when composing characters are ignored.
6197 */
6198 static int
6199cstrncmp(s1, s2, n)
6200 char_u *s1, *s2;
6201 int *n;
6202{
6203 int result;
6204
6205 if (!ireg_ic)
6206 result = STRNCMP(s1, s2, *n);
6207 else
6208 result = MB_STRNICMP(s1, s2, *n);
6209
6210#ifdef FEAT_MBYTE
6211 /* if it failed and it's utf8 and we want to combineignore: */
6212 if (result != 0 && enc_utf8 && ireg_icombine)
6213 {
6214 char_u *str1, *str2;
6215 int c1, c2, c11, c12;
6216 int ix;
6217 int junk;
6218
6219 /* we have to handle the strcmp ourselves, since it is necessary to
6220 * deal with the composing characters by ignoring them: */
6221 str1 = s1;
6222 str2 = s2;
6223 c1 = c2 = 0;
6224 for (ix = 0; ix < *n; )
6225 {
6226 c1 = mb_ptr2char_adv(&str1);
6227 c2 = mb_ptr2char_adv(&str2);
6228 ix += utf_char2len(c1);
6229
6230 /* decompose the character if necessary, into 'base' characters
6231 * because I don't care about Arabic, I will hard-code the Hebrew
6232 * which I *do* care about! So sue me... */
6233 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6234 {
6235 /* decomposition necessary? */
6236 mb_decompose(c1, &c11, &junk, &junk);
6237 mb_decompose(c2, &c12, &junk, &junk);
6238 c1 = c11;
6239 c2 = c12;
6240 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6241 break;
6242 }
6243 }
6244 result = c2 - c1;
6245 if (result == 0)
6246 *n = (int)(str2 - s2);
6247 }
6248#endif
6249
6250 return result;
6251}
6252
6253/*
6254 * cstrchr: This function is used a lot for simple searches, keep it fast!
6255 */
6256 static char_u *
6257cstrchr(s, c)
6258 char_u *s;
6259 int c;
6260{
6261 char_u *p;
6262 int cc;
6263
6264 if (!ireg_ic
6265#ifdef FEAT_MBYTE
6266 || (!enc_utf8 && mb_char2len(c) > 1)
6267#endif
6268 )
6269 return vim_strchr(s, c);
6270
6271 /* tolower() and toupper() can be slow, comparing twice should be a lot
6272 * faster (esp. when using MS Visual C++!).
6273 * For UTF-8 need to use folded case. */
6274#ifdef FEAT_MBYTE
6275 if (enc_utf8 && c > 0x80)
6276 cc = utf_fold(c);
6277 else
6278#endif
6279 if (isupper(c))
6280 cc = TOLOWER_LOC(c);
6281 else if (islower(c))
6282 cc = TOUPPER_LOC(c);
6283 else
6284 return vim_strchr(s, c);
6285
6286#ifdef FEAT_MBYTE
6287 if (has_mbyte)
6288 {
6289 for (p = s; *p != NUL; p += (*mb_ptr2len_check)(p))
6290 {
6291 if (enc_utf8 && c > 0x80)
6292 {
6293 if (utf_fold(utf_ptr2char(p)) == cc)
6294 return p;
6295 }
6296 else if (*p == c || *p == cc)
6297 return p;
6298 }
6299 }
6300 else
6301#endif
6302 /* Faster version for when there are no multi-byte characters. */
6303 for (p = s; *p != NUL; ++p)
6304 if (*p == c || *p == cc)
6305 return p;
6306
6307 return NULL;
6308}
6309
6310/***************************************************************
6311 * regsub stuff *
6312 ***************************************************************/
6313
6314/* This stuff below really confuses cc on an SGI -- webb */
6315#ifdef __sgi
6316# undef __ARGS
6317# define __ARGS(x) ()
6318#endif
6319
6320/*
6321 * We should define ftpr as a pointer to a function returning a pointer to
6322 * a function returning a pointer to a function ...
6323 * This is impossible, so we declare a pointer to a function returning a
6324 * pointer to a function returning void. This should work for all compilers.
6325 */
6326typedef void (*(*fptr) __ARGS((char_u *, int)))();
6327
6328static fptr do_upper __ARGS((char_u *, int));
6329static fptr do_Upper __ARGS((char_u *, int));
6330static fptr do_lower __ARGS((char_u *, int));
6331static fptr do_Lower __ARGS((char_u *, int));
6332
6333static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6334
6335 static fptr
6336do_upper(d, c)
6337 char_u *d;
6338 int c;
6339{
6340 *d = TOUPPER_LOC(c);
6341
6342 return (fptr)NULL;
6343}
6344
6345 static fptr
6346do_Upper(d, c)
6347 char_u *d;
6348 int c;
6349{
6350 *d = TOUPPER_LOC(c);
6351
6352 return (fptr)do_Upper;
6353}
6354
6355 static fptr
6356do_lower(d, c)
6357 char_u *d;
6358 int c;
6359{
6360 *d = TOLOWER_LOC(c);
6361
6362 return (fptr)NULL;
6363}
6364
6365 static fptr
6366do_Lower(d, c)
6367 char_u *d;
6368 int c;
6369{
6370 *d = TOLOWER_LOC(c);
6371
6372 return (fptr)do_Lower;
6373}
6374
6375/*
6376 * regtilde(): Replace tildes in the pattern by the old pattern.
6377 *
6378 * Short explanation of the tilde: It stands for the previous replacement
6379 * pattern. If that previous pattern also contains a ~ we should go back a
6380 * step further... But we insert the previous pattern into the current one
6381 * and remember that.
6382 * This still does not handle the case where "magic" changes. TODO?
6383 *
6384 * The tildes are parsed once before the first call to vim_regsub().
6385 */
6386 char_u *
6387regtilde(source, magic)
6388 char_u *source;
6389 int magic;
6390{
6391 char_u *newsub = source;
6392 char_u *tmpsub;
6393 char_u *p;
6394 int len;
6395 int prevlen;
6396
6397 for (p = newsub; *p; ++p)
6398 {
6399 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6400 {
6401 if (reg_prev_sub != NULL)
6402 {
6403 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6404 prevlen = (int)STRLEN(reg_prev_sub);
6405 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6406 if (tmpsub != NULL)
6407 {
6408 /* copy prefix */
6409 len = (int)(p - newsub); /* not including ~ */
6410 mch_memmove(tmpsub, newsub, (size_t)len);
6411 /* interpretate tilde */
6412 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6413 /* copy postfix */
6414 if (!magic)
6415 ++p; /* back off \ */
6416 STRCPY(tmpsub + len + prevlen, p + 1);
6417
6418 if (newsub != source) /* already allocated newsub */
6419 vim_free(newsub);
6420 newsub = tmpsub;
6421 p = newsub + len + prevlen;
6422 }
6423 }
6424 else if (magic)
6425 STRCPY(p, p + 1); /* remove '~' */
6426 else
6427 STRCPY(p, p + 2); /* remove '\~' */
6428 --p;
6429 }
6430 else
6431 {
6432 if (*p == '\\' && p[1]) /* skip escaped characters */
6433 ++p;
6434#ifdef FEAT_MBYTE
6435 if (has_mbyte)
6436 p += (*mb_ptr2len_check)(p) - 1;
6437#endif
6438 }
6439 }
6440
6441 vim_free(reg_prev_sub);
6442 if (newsub != source) /* newsub was allocated, just keep it */
6443 reg_prev_sub = newsub;
6444 else /* no ~ found, need to save newsub */
6445 reg_prev_sub = vim_strsave(newsub);
6446 return newsub;
6447}
6448
6449#ifdef FEAT_EVAL
6450static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
6451
6452/* These pointers are used instead of reg_match and reg_mmatch for
6453 * reg_submatch(). Needed for when the substitution string is an expression
6454 * that contains a call to substitute() and submatch(). */
6455static regmatch_T *submatch_match;
6456static regmmatch_T *submatch_mmatch;
6457#endif
6458
6459#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6460/*
6461 * vim_regsub() - perform substitutions after a vim_regexec() or
6462 * vim_regexec_multi() match.
6463 *
6464 * If "copy" is TRUE really copy into "dest".
6465 * If "copy" is FALSE nothing is copied, this is just to find out the length
6466 * of the result.
6467 *
6468 * If "backslash" is TRUE, a backslash will be removed later, need to double
6469 * them to keep them, and insert a backslash before a CR to avoid it being
6470 * replaced with a line break later.
6471 *
6472 * Note: The matched text must not change between the call of
6473 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
6474 * references invalid!
6475 *
6476 * Returns the size of the replacement, including terminating NUL.
6477 */
6478 int
6479vim_regsub(rmp, source, dest, copy, magic, backslash)
6480 regmatch_T *rmp;
6481 char_u *source;
6482 char_u *dest;
6483 int copy;
6484 int magic;
6485 int backslash;
6486{
6487 reg_match = rmp;
6488 reg_mmatch = NULL;
6489 reg_maxline = 0;
6490 return vim_regsub_both(source, dest, copy, magic, backslash);
6491}
6492#endif
6493
6494 int
6495vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6496 regmmatch_T *rmp;
6497 linenr_T lnum;
6498 char_u *source;
6499 char_u *dest;
6500 int copy;
6501 int magic;
6502 int backslash;
6503{
6504 reg_match = NULL;
6505 reg_mmatch = rmp;
6506 reg_buf = curbuf; /* always works on the current buffer! */
6507 reg_firstlnum = lnum;
6508 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6509 return vim_regsub_both(source, dest, copy, magic, backslash);
6510}
6511
6512 static int
6513vim_regsub_both(source, dest, copy, magic, backslash)
6514 char_u *source;
6515 char_u *dest;
6516 int copy;
6517 int magic;
6518 int backslash;
6519{
6520 char_u *src;
6521 char_u *dst;
6522 char_u *s;
6523 int c;
6524 int no = -1;
6525 fptr func = (fptr)NULL;
6526 linenr_T clnum = 0; /* init for GCC */
6527 int len = 0; /* init for GCC */
6528#ifdef FEAT_EVAL
6529 static char_u *eval_result = NULL;
6530#endif
6531#ifdef FEAT_MBYTE
6532 int l;
6533#endif
6534
6535
6536 /* Be paranoid... */
6537 if (source == NULL || dest == NULL)
6538 {
6539 EMSG(_(e_null));
6540 return 0;
6541 }
6542 if (prog_magic_wrong())
6543 return 0;
6544 src = source;
6545 dst = dest;
6546
6547 /*
6548 * When the substitute part starts with "\=" evaluate it as an expression.
6549 */
6550 if (source[0] == '\\' && source[1] == '='
6551#ifdef FEAT_EVAL
6552 && !can_f_submatch /* can't do this recursively */
6553#endif
6554 )
6555 {
6556#ifdef FEAT_EVAL
6557 /* To make sure that the length doesn't change between checking the
6558 * length and copying the string, and to speed up things, the
6559 * resulting string is saved from the call with "copy" == FALSE to the
6560 * call with "copy" == TRUE. */
6561 if (copy)
6562 {
6563 if (eval_result != NULL)
6564 {
6565 STRCPY(dest, eval_result);
6566 dst += STRLEN(eval_result);
6567 vim_free(eval_result);
6568 eval_result = NULL;
6569 }
6570 }
6571 else
6572 {
6573 linenr_T save_reg_maxline;
6574 win_T *save_reg_win;
6575 int save_ireg_ic;
6576
6577 vim_free(eval_result);
6578
6579 /* The expression may contain substitute(), which calls us
6580 * recursively. Make sure submatch() gets the text from the first
6581 * level. Don't need to save "reg_buf", because
6582 * vim_regexec_multi() can't be called recursively. */
6583 submatch_match = reg_match;
6584 submatch_mmatch = reg_mmatch;
6585 save_reg_maxline = reg_maxline;
6586 save_reg_win = reg_win;
6587 save_ireg_ic = ireg_ic;
6588 can_f_submatch = TRUE;
6589
6590 eval_result = eval_to_string(source + 2, NULL);
6591 if (eval_result != NULL)
6592 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00006593 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006594 {
6595 /* Change NL to CR, so that it becomes a line break.
6596 * Skip over a backslashed character. */
6597 if (*s == NL)
6598 *s = CAR;
6599 else if (*s == '\\' && s[1] != NUL)
6600 ++s;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006601 }
6602
6603 dst += STRLEN(eval_result);
6604 }
6605
6606 reg_match = submatch_match;
6607 reg_mmatch = submatch_mmatch;
6608 reg_maxline = save_reg_maxline;
6609 reg_win = save_reg_win;
6610 ireg_ic = save_ireg_ic;
6611 can_f_submatch = FALSE;
6612 }
6613#endif
6614 }
6615 else
6616 while ((c = *src++) != NUL)
6617 {
6618 if (c == '&' && magic)
6619 no = 0;
6620 else if (c == '\\' && *src != NUL)
6621 {
6622 if (*src == '&' && !magic)
6623 {
6624 ++src;
6625 no = 0;
6626 }
6627 else if ('0' <= *src && *src <= '9')
6628 {
6629 no = *src++ - '0';
6630 }
6631 else if (vim_strchr((char_u *)"uUlLeE", *src))
6632 {
6633 switch (*src++)
6634 {
6635 case 'u': func = (fptr)do_upper;
6636 continue;
6637 case 'U': func = (fptr)do_Upper;
6638 continue;
6639 case 'l': func = (fptr)do_lower;
6640 continue;
6641 case 'L': func = (fptr)do_Lower;
6642 continue;
6643 case 'e':
6644 case 'E': func = (fptr)NULL;
6645 continue;
6646 }
6647 }
6648 }
6649 if (no < 0) /* Ordinary character. */
6650 {
6651 if (c == '\\' && *src != NUL)
6652 {
6653 /* Check for abbreviations -- webb */
6654 switch (*src)
6655 {
6656 case 'r': c = CAR; ++src; break;
6657 case 'n': c = NL; ++src; break;
6658 case 't': c = TAB; ++src; break;
6659 /* Oh no! \e already has meaning in subst pat :-( */
6660 /* case 'e': c = ESC; ++src; break; */
6661 case 'b': c = Ctrl_H; ++src; break;
6662
6663 /* If "backslash" is TRUE the backslash will be removed
6664 * later. Used to insert a literal CR. */
6665 default: if (backslash)
6666 {
6667 if (copy)
6668 *dst = '\\';
6669 ++dst;
6670 }
6671 c = *src++;
6672 }
6673 }
6674
6675 /* Write to buffer, if copy is set. */
6676#ifdef FEAT_MBYTE
6677 if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
6678 {
6679 /* TODO: should use "func" here. */
6680 if (copy)
6681 mch_memmove(dst, src - 1, l);
6682 dst += l - 1;
6683 src += l - 1;
6684 }
6685 else
6686 {
6687#endif
6688 if (copy)
6689 {
6690 if (func == (fptr)NULL) /* just copy */
6691 *dst = c;
6692 else /* change case */
6693 func = (fptr)(func(dst, c));
6694 /* Turbo C complains without the typecast */
6695 }
6696#ifdef FEAT_MBYTE
6697 }
6698#endif
6699 dst++;
6700 }
6701 else
6702 {
6703 if (REG_MULTI)
6704 {
6705 clnum = reg_mmatch->startpos[no].lnum;
6706 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
6707 s = NULL;
6708 else
6709 {
6710 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
6711 if (reg_mmatch->endpos[no].lnum == clnum)
6712 len = reg_mmatch->endpos[no].col
6713 - reg_mmatch->startpos[no].col;
6714 else
6715 len = (int)STRLEN(s);
6716 }
6717 }
6718 else
6719 {
6720 s = reg_match->startp[no];
6721 if (reg_match->endp[no] == NULL)
6722 s = NULL;
6723 else
6724 len = (int)(reg_match->endp[no] - s);
6725 }
6726 if (s != NULL)
6727 {
6728 for (;;)
6729 {
6730 if (len == 0)
6731 {
6732 if (REG_MULTI)
6733 {
6734 if (reg_mmatch->endpos[no].lnum == clnum)
6735 break;
6736 if (copy)
6737 *dst = CAR;
6738 ++dst;
6739 s = reg_getline(++clnum);
6740 if (reg_mmatch->endpos[no].lnum == clnum)
6741 len = reg_mmatch->endpos[no].col;
6742 else
6743 len = (int)STRLEN(s);
6744 }
6745 else
6746 break;
6747 }
6748 else if (*s == NUL) /* we hit NUL. */
6749 {
6750 if (copy)
6751 EMSG(_(e_re_damg));
6752 goto exit;
6753 }
6754 else
6755 {
6756 if (backslash && (*s == CAR || *s == '\\'))
6757 {
6758 /*
6759 * Insert a backslash in front of a CR, otherwise
6760 * it will be replaced by a line break.
6761 * Number of backslashes will be halved later,
6762 * double them here.
6763 */
6764 if (copy)
6765 {
6766 dst[0] = '\\';
6767 dst[1] = *s;
6768 }
6769 dst += 2;
6770 }
6771#ifdef FEAT_MBYTE
6772 else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
6773 {
6774 /* TODO: should use "func" here. */
6775 if (copy)
6776 mch_memmove(dst, s, l);
6777 dst += l;
6778 s += l - 1;
6779 len -= l - 1;
6780 }
6781#endif
6782 else
6783 {
6784 if (copy)
6785 {
6786 if (func == (fptr)NULL) /* just copy */
6787 *dst = *s;
6788 else /* change case */
6789 func = (fptr)(func(dst, *s));
6790 /* Turbo C complains without the typecast */
6791 }
6792 ++dst;
6793 }
6794 ++s;
6795 --len;
6796 }
6797 }
6798 }
6799 no = -1;
6800 }
6801 }
6802 if (copy)
6803 *dst = NUL;
6804
6805exit:
6806 return (int)((dst - dest) + 1);
6807}
6808
6809#ifdef FEAT_EVAL
6810/*
6811 * Used for the submatch() function: get the string from tne n'th submatch in
6812 * allocated memory.
6813 * Returns NULL when not in a ":s" command and for a non-existing submatch.
6814 */
6815 char_u *
6816reg_submatch(no)
6817 int no;
6818{
6819 char_u *retval = NULL;
6820 char_u *s;
6821 int len;
6822 int round;
6823 linenr_T lnum;
6824
6825 if (!can_f_submatch)
6826 return NULL;
6827
6828 if (submatch_match == NULL)
6829 {
6830 /*
6831 * First round: compute the length and allocate memory.
6832 * Second round: copy the text.
6833 */
6834 for (round = 1; round <= 2; ++round)
6835 {
6836 lnum = submatch_mmatch->startpos[no].lnum;
6837 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
6838 return NULL;
6839
6840 s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
6841 if (s == NULL) /* anti-crash check, cannot happen? */
6842 break;
6843 if (submatch_mmatch->endpos[no].lnum == lnum)
6844 {
6845 /* Within one line: take form start to end col. */
6846 len = submatch_mmatch->endpos[no].col
6847 - submatch_mmatch->startpos[no].col;
6848 if (round == 2)
6849 {
6850 STRNCPY(retval, s, len);
6851 retval[len] = NUL;
6852 }
6853 ++len;
6854 }
6855 else
6856 {
6857 /* Multiple lines: take start line from start col, middle
6858 * lines completely and end line up to end col. */
6859 len = (int)STRLEN(s);
6860 if (round == 2)
6861 {
6862 STRCPY(retval, s);
6863 retval[len] = '\n';
6864 }
6865 ++len;
6866 ++lnum;
6867 while (lnum < submatch_mmatch->endpos[no].lnum)
6868 {
6869 s = reg_getline(lnum++);
6870 if (round == 2)
6871 STRCPY(retval + len, s);
6872 len += (int)STRLEN(s);
6873 if (round == 2)
6874 retval[len] = '\n';
6875 ++len;
6876 }
6877 if (round == 2)
6878 STRNCPY(retval + len, reg_getline(lnum),
6879 submatch_mmatch->endpos[no].col);
6880 len += submatch_mmatch->endpos[no].col;
6881 if (round == 2)
6882 retval[len] = NUL;
6883 ++len;
6884 }
6885
6886 if (round == 1)
6887 {
6888 retval = lalloc((long_u)len, TRUE);
6889 if (s == NULL)
6890 return NULL;
6891 }
6892 }
6893 }
6894 else
6895 {
6896 if (submatch_match->endp[no] == NULL)
6897 retval = NULL;
6898 else
6899 {
6900 s = submatch_match->startp[no];
6901 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
6902 }
6903 }
6904
6905 return retval;
6906}
6907#endif