blob: e212cc0dc7292565c78d1b211bdc22e09a68fa80 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer. This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim. If you want a good
10 * regular expression library, get the original code. The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 * Copyright (c) 1986 by University of Toronto.
16 * Written by Henry Spencer. Not derived from licensed software.
17 *
18 * Permission is granted to anyone to use this software for any
19 * purpose on any computer system, and to redistribute it freely,
20 * subject to the following restrictions:
21 *
22 * 1. The author is not responsible for the consequences of use of
23 * this software, no matter how awful, even if they arise
24 * from defects in it.
25 *
26 * 2. The origin of this software must not be misrepresented, either
27 * by explicit claim or by omission.
28 *
29 * 3. Altered versions must be plainly marked as such, and must not
30 * be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions. Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
Bram Moolenaarc0197e22004-09-13 20:26:32 +000036 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar071d4272004-06-13 20:20:40 +000038 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
41#include "vim.h"
42
43#undef DEBUG
44
45/*
46 * The "internal use only" fields in regexp.h are present to pass info from
47 * compile to execute that permits the execute phase to run lots faster on
48 * simple cases. They are:
49 *
50 * regstart char that must begin a match; NUL if none obvious; Can be a
51 * multi-byte character.
52 * reganch is the match anchored (at beginning-of-line only)?
53 * regmust string (pointer into program) that match must include, or NULL
54 * regmlen length of regmust string
55 * regflags RF_ values or'ed together
56 *
57 * Regstart and reganch permit very fast decisions on suitable starting points
58 * for a match, cutting down the work a lot. Regmust permits fast rejection
59 * of lines that cannot possibly match. The regmust tests are costly enough
60 * that vim_regcomp() supplies a regmust only if the r.e. contains something
61 * potentially expensive (at present, the only such thing detected is * or +
62 * at the start of the r.e., which can involve a lot of backup). Regmlen is
63 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
64 * computing it anyway.
65 */
66
67/*
68 * Structure for regexp "program". This is essentially a linear encoding
69 * of a nondeterministic finite-state machine (aka syntax charts or
70 * "railroad normal form" in parsing technology). Each node is an opcode
71 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
72 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
73 * pointer with a BRANCH on both ends of it is connecting two alternatives.
74 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
75 * (as opposed to a collection of them) is never concatenated with anything
76 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaardf177f62005-02-22 08:39:57 +000077 * node points to the node after the stuff to be repeated.
78 * The operand of some types of node is a literal string; for others, it is a
79 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
80 * is the first node of the branch.
81 * (NB this is *not* a tree structure: the tail of the branch connects to the
82 * thing following the set of BRANCHes.)
Bram Moolenaar071d4272004-06-13 20:20:40 +000083 *
84 * pattern is coded like:
85 *
86 * +-----------------+
87 * | V
88 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
89 * | ^ | ^
90 * +------+ +----------+
91 *
92 *
93 * +------------------+
94 * V |
95 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
96 * | | ^ ^
97 * | +---------------+ |
98 * +---------------------------------------------+
99 *
100 *
Bram Moolenaardf177f62005-02-22 08:39:57 +0000101 * +----------------------+
102 * V |
Bram Moolenaar582fd852005-03-28 20:58:01 +0000103 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000104 * | | ^ ^
105 * | +-----------+ |
106 * +--------------------------------------------------+
Bram Moolenaardf177f62005-02-22 08:39:57 +0000107 *
108 *
Bram Moolenaar071d4272004-06-13 20:20:40 +0000109 * +-------------------------+
110 * V |
111 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
112 * | | ^
113 * | +----------------+
114 * +-----------------------------------------------+
115 *
116 *
117 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
118 * | | ^ ^
119 * | +----------------+ |
120 * +--------------------------------+
121 *
122 * +---------+
123 * | V
124 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
125 * | | | | ^ ^
126 * | | | +-----+ |
127 * | | +----------------+ |
128 * | +---------------------------+ |
129 * +------------------------------------------------------+
130 *
131 * They all start with a BRANCH for "\|" alternaties, even when there is only
132 * one alternative.
133 */
134
135/*
136 * The opcodes are:
137 */
138
139/* definition number opnd? meaning */
140#define END 0 /* End of program or NOMATCH operand. */
141#define BOL 1 /* Match "" at beginning of line. */
142#define EOL 2 /* Match "" at end of line. */
143#define BRANCH 3 /* node Match this alternative, or the
144 * next... */
145#define BACK 4 /* Match "", "next" ptr points backward. */
146#define EXACTLY 5 /* str Match this string. */
147#define NOTHING 6 /* Match empty string. */
148#define STAR 7 /* node Match this (simple) thing 0 or more
149 * times. */
150#define PLUS 8 /* node Match this (simple) thing 1 or more
151 * times. */
152#define MATCH 9 /* node match the operand zero-width */
153#define NOMATCH 10 /* node check for no match with operand */
154#define BEHIND 11 /* node look behind for a match with operand */
155#define NOBEHIND 12 /* node look behind for no match with operand */
156#define SUBPAT 13 /* node match the operand here */
157#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
158 * n times (\{m,n\}). */
159#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
160#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
161#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
162 * and BRACE_COMPLEX. */
163#define NEWL 18 /* Match line-break */
164#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
165
166
167/* character classes: 20-48 normal, 50-78 include a line-break */
168#define ADD_NL 30
169#define FIRST_NL ANY + ADD_NL
170#define ANY 20 /* Match any one character. */
171#define ANYOF 21 /* str Match any character in this string. */
172#define ANYBUT 22 /* str Match any character not in this
173 * string. */
174#define IDENT 23 /* Match identifier char */
175#define SIDENT 24 /* Match identifier char but no digit */
176#define KWORD 25 /* Match keyword char */
177#define SKWORD 26 /* Match word char but no digit */
178#define FNAME 27 /* Match file name char */
179#define SFNAME 28 /* Match file name char but no digit */
180#define PRINT 29 /* Match printable char */
181#define SPRINT 30 /* Match printable char but no digit */
182#define WHITE 31 /* Match whitespace char */
183#define NWHITE 32 /* Match non-whitespace char */
184#define DIGIT 33 /* Match digit char */
185#define NDIGIT 34 /* Match non-digit char */
186#define HEX 35 /* Match hex char */
187#define NHEX 36 /* Match non-hex char */
188#define OCTAL 37 /* Match octal char */
189#define NOCTAL 38 /* Match non-octal char */
190#define WORD 39 /* Match word char */
191#define NWORD 40 /* Match non-word char */
192#define HEAD 41 /* Match head char */
193#define NHEAD 42 /* Match non-head char */
194#define ALPHA 43 /* Match alpha char */
195#define NALPHA 44 /* Match non-alpha char */
196#define LOWER 45 /* Match lowercase char */
197#define NLOWER 46 /* Match non-lowercase char */
198#define UPPER 47 /* Match uppercase char */
199#define NUPPER 48 /* Match non-uppercase char */
200#define LAST_NL NUPPER + ADD_NL
201#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
202
203#define MOPEN 80 /* -89 Mark this point in input as start of
204 * \( subexpr. MOPEN + 0 marks start of
205 * match. */
206#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
207 * end of match. */
208#define BACKREF 100 /* -109 node Match same string again \1-\9 */
209
210#ifdef FEAT_SYN_HL
211# define ZOPEN 110 /* -119 Mark this point in input as start of
212 * \z( subexpr. */
213# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
214# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
215#endif
216
217#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
218
219#define NOPEN 150 /* Mark this point in input as start of
220 \%( subexpr. */
221#define NCLOSE 151 /* Analogous to NOPEN. */
222
223#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
224#define RE_BOF 201 /* Match "" at beginning of file. */
225#define RE_EOF 202 /* Match "" at end of file. */
226#define CURSOR 203 /* Match location of cursor. */
227
228#define RE_LNUM 204 /* nr cmp Match line number */
229#define RE_COL 205 /* nr cmp Match column number */
230#define RE_VCOL 206 /* nr cmp Match virtual column number */
231
232/*
233 * Magic characters have a special meaning, they don't match literally.
234 * Magic characters are negative. This separates them from literal characters
235 * (possibly multi-byte). Only ASCII characters can be Magic.
236 */
237#define Magic(x) ((int)(x) - 256)
238#define un_Magic(x) ((x) + 256)
239#define is_Magic(x) ((x) < 0)
240
241static int no_Magic __ARGS((int x));
242static int toggle_Magic __ARGS((int x));
243
244 static int
245no_Magic(x)
246 int x;
247{
248 if (is_Magic(x))
249 return un_Magic(x);
250 return x;
251}
252
253 static int
254toggle_Magic(x)
255 int x;
256{
257 if (is_Magic(x))
258 return un_Magic(x);
259 return Magic(x);
260}
261
262/*
263 * The first byte of the regexp internal "program" is actually this magic
264 * number; the start node begins in the second byte. It's used to catch the
265 * most severe mutilation of the program by the caller.
266 */
267
268#define REGMAGIC 0234
269
270/*
271 * Opcode notes:
272 *
273 * BRANCH The set of branches constituting a single choice are hooked
274 * together with their "next" pointers, since precedence prevents
275 * anything being concatenated to any individual branch. The
276 * "next" pointer of the last BRANCH in a choice points to the
277 * thing following the whole choice. This is also where the
278 * final "next" pointer of each individual branch points; each
279 * branch starts with the operand node of a BRANCH node.
280 *
281 * BACK Normal "next" pointers all implicitly point forward; BACK
282 * exists to make loop structures possible.
283 *
284 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
285 * BRANCH structures using BACK. Simple cases (one character
286 * per match) are implemented with STAR and PLUS for speed
287 * and to minimize recursive plunges.
288 *
289 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
290 * node, and defines the min and max limits to be used for that
291 * node.
292 *
293 * MOPEN,MCLOSE ...are numbered at compile time.
294 * ZOPEN,ZCLOSE ...ditto
295 */
296
297/*
298 * A node is one char of opcode followed by two chars of "next" pointer.
299 * "Next" pointers are stored as two 8-bit bytes, high order first. The
300 * value is a positive offset from the opcode of the node containing it.
301 * An operand, if any, simply follows the node. (Note that much of the
302 * code generation knows about this implicit relationship.)
303 *
304 * Using two bytes for the "next" pointer is vast overkill for most things,
305 * but allows patterns to get big without disasters.
306 */
307#define OP(p) ((int)*(p))
308#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
309#define OPERAND(p) ((p) + 3)
310/* Obtain an operand that was stored as four bytes, MSB first. */
311#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
312 + ((long)(p)[5] << 8) + (long)(p)[6])
313/* Obtain a second operand stored as four bytes. */
314#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
315/* Obtain a second single-byte operand stored after a four bytes operand. */
316#define OPERAND_CMP(p) (p)[7]
317
318/*
319 * Utility definitions.
320 */
321#define UCHARAT(p) ((int)*(char_u *)(p))
322
323/* Used for an error (down from) vim_regcomp(): give the error message, set
324 * rc_did_emsg and return NULL */
Bram Moolenaar45eeb132005-06-06 21:59:07 +0000325#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, NULL)
326#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, NULL)
327#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000328#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
329
330#define MAX_LIMIT (32767L << 16L)
331
332static int re_multi_type __ARGS((int));
333static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
334static char_u *cstrchr __ARGS((char_u *, int));
335
336#ifdef DEBUG
337static void regdump __ARGS((char_u *, regprog_T *));
338static char_u *regprop __ARGS((char_u *));
339#endif
340
341#define NOT_MULTI 0
342#define MULTI_ONE 1
343#define MULTI_MULT 2
344/*
345 * Return NOT_MULTI if c is not a "multi" operator.
346 * Return MULTI_ONE if c is a single "multi" operator.
347 * Return MULTI_MULT if c is a multi "multi" operator.
348 */
349 static int
350re_multi_type(c)
351 int c;
352{
353 if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
354 return MULTI_ONE;
355 if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
356 return MULTI_MULT;
357 return NOT_MULTI;
358}
359
360/*
361 * Flags to be passed up and down.
362 */
363#define HASWIDTH 0x1 /* Known never to match null string. */
364#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
365#define SPSTART 0x4 /* Starts with * or +. */
366#define HASNL 0x8 /* Contains some \n. */
367#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
368#define WORST 0 /* Worst case. */
369
370/*
371 * When regcode is set to this value, code is not emitted and size is computed
372 * instead.
373 */
374#define JUST_CALC_SIZE ((char_u *) -1)
375
Bram Moolenaarf461c8e2005-06-25 23:04:51 +0000376static char_u *reg_prev_sub = NULL;
377
378#if defined(EXITFREE) || defined(PROTO)
379 void
380free_regexp_stuff()
381{
382 vim_free(reg_prev_sub);
383}
384#endif
Bram Moolenaar071d4272004-06-13 20:20:40 +0000385
386/*
387 * REGEXP_INRANGE contains all characters which are always special in a []
388 * range after '\'.
389 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
390 * These are:
391 * \n - New line (NL).
392 * \r - Carriage Return (CR).
393 * \t - Tab (TAB).
394 * \e - Escape (ESC).
395 * \b - Backspace (Ctrl_H).
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000396 * \d - Character code in decimal, eg \d123
397 * \o - Character code in octal, eg \o80
398 * \x - Character code in hex, eg \x4a
399 * \u - Multibyte character code, eg \u20ac
400 * \U - Long multibyte character code, eg \U12345678
Bram Moolenaar071d4272004-06-13 20:20:40 +0000401 */
402static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000403static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar071d4272004-06-13 20:20:40 +0000404
405static int backslash_trans __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000406static int get_char_class __ARGS((char_u **pp));
407static int get_equi_class __ARGS((char_u **pp));
408static void reg_equi_class __ARGS((int c));
409static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000410static char_u *skip_anyof __ARGS((char_u *p));
411static void init_class_tab __ARGS((void));
412
413/*
414 * Translate '\x' to its control character, except "\n", which is Magic.
415 */
416 static int
417backslash_trans(c)
418 int c;
419{
420 switch (c)
421 {
422 case 'r': return CAR;
423 case 't': return TAB;
424 case 'e': return ESC;
425 case 'b': return BS;
426 }
427 return c;
428}
429
430/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000431 * Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000432 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
433 * recognized. Otherwise "pp" is advanced to after the item.
434 */
435 static int
Bram Moolenaardf177f62005-02-22 08:39:57 +0000436get_char_class(pp)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000437 char_u **pp;
438{
439 static const char *(class_names[]) =
440 {
441 "alnum:]",
442#define CLASS_ALNUM 0
443 "alpha:]",
444#define CLASS_ALPHA 1
445 "blank:]",
446#define CLASS_BLANK 2
447 "cntrl:]",
448#define CLASS_CNTRL 3
449 "digit:]",
450#define CLASS_DIGIT 4
451 "graph:]",
452#define CLASS_GRAPH 5
453 "lower:]",
454#define CLASS_LOWER 6
455 "print:]",
456#define CLASS_PRINT 7
457 "punct:]",
458#define CLASS_PUNCT 8
459 "space:]",
460#define CLASS_SPACE 9
461 "upper:]",
462#define CLASS_UPPER 10
463 "xdigit:]",
464#define CLASS_XDIGIT 11
465 "tab:]",
466#define CLASS_TAB 12
467 "return:]",
468#define CLASS_RETURN 13
469 "backspace:]",
470#define CLASS_BACKSPACE 14
471 "escape:]",
472#define CLASS_ESCAPE 15
473 };
474#define CLASS_NONE 99
475 int i;
476
477 if ((*pp)[1] == ':')
478 {
479 for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
480 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
481 {
482 *pp += STRLEN(class_names[i]) + 2;
483 return i;
484 }
485 }
486 return CLASS_NONE;
487}
488
489/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000490 * Specific version of character class functions.
491 * Using a table to keep this fast.
492 */
493static short class_tab[256];
494
495#define RI_DIGIT 0x01
496#define RI_HEX 0x02
497#define RI_OCTAL 0x04
498#define RI_WORD 0x08
499#define RI_HEAD 0x10
500#define RI_ALPHA 0x20
501#define RI_LOWER 0x40
502#define RI_UPPER 0x80
503#define RI_WHITE 0x100
504
505 static void
506init_class_tab()
507{
508 int i;
509 static int done = FALSE;
510
511 if (done)
512 return;
513
514 for (i = 0; i < 256; ++i)
515 {
516 if (i >= '0' && i <= '7')
517 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
518 else if (i >= '8' && i <= '9')
519 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
520 else if (i >= 'a' && i <= 'f')
521 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
522#ifdef EBCDIC
523 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
524 || (i >= 's' && i <= 'z'))
525#else
526 else if (i >= 'g' && i <= 'z')
527#endif
528 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
529 else if (i >= 'A' && i <= 'F')
530 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
531#ifdef EBCDIC
532 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
533 || (i >= 'S' && i <= 'Z'))
534#else
535 else if (i >= 'G' && i <= 'Z')
536#endif
537 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
538 else if (i == '_')
539 class_tab[i] = RI_WORD + RI_HEAD;
540 else
541 class_tab[i] = 0;
542 }
543 class_tab[' '] |= RI_WHITE;
544 class_tab['\t'] |= RI_WHITE;
545 done = TRUE;
546}
547
548#ifdef FEAT_MBYTE
549# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
550# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
551# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
552# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
553# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
554# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
555# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
556# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
557# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
558#else
559# define ri_digit(c) (class_tab[c] & RI_DIGIT)
560# define ri_hex(c) (class_tab[c] & RI_HEX)
561# define ri_octal(c) (class_tab[c] & RI_OCTAL)
562# define ri_word(c) (class_tab[c] & RI_WORD)
563# define ri_head(c) (class_tab[c] & RI_HEAD)
564# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
565# define ri_lower(c) (class_tab[c] & RI_LOWER)
566# define ri_upper(c) (class_tab[c] & RI_UPPER)
567# define ri_white(c) (class_tab[c] & RI_WHITE)
568#endif
569
570/* flags for regflags */
571#define RF_ICASE 1 /* ignore case */
572#define RF_NOICASE 2 /* don't ignore case */
573#define RF_HASNL 4 /* can match a NL */
574#define RF_ICOMBINE 8 /* ignore combining characters */
575#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
576
577/*
578 * Global work variables for vim_regcomp().
579 */
580
581static char_u *regparse; /* Input-scan pointer. */
582static int prevchr_len; /* byte length of previous char */
583static int num_complex_braces; /* Complex \{...} count */
584static int regnpar; /* () count. */
585#ifdef FEAT_SYN_HL
586static int regnzpar; /* \z() count. */
587static int re_has_z; /* \z item detected */
588#endif
589static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
590static long regsize; /* Code size. */
591static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
592static unsigned regflags; /* RF_ flags for prog */
593static long brace_min[10]; /* Minimums for complex brace repeats */
594static long brace_max[10]; /* Maximums for complex brace repeats */
595static int brace_count[10]; /* Current counts for complex brace repeats */
596#if defined(FEAT_SYN_HL) || defined(PROTO)
597static int had_eol; /* TRUE when EOL found by vim_regcomp() */
598#endif
599static int one_exactly = FALSE; /* only do one char for EXACTLY */
600
601static int reg_magic; /* magicness of the pattern: */
602#define MAGIC_NONE 1 /* "\V" very unmagic */
603#define MAGIC_OFF 2 /* "\M" or 'magic' off */
604#define MAGIC_ON 3 /* "\m" or 'magic' */
605#define MAGIC_ALL 4 /* "\v" very magic */
606
607static int reg_string; /* matching with a string instead of a buffer
608 line */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000609static int reg_strict; /* "[abc" is illegal */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000610
611/*
612 * META contains all characters that may be magic, except '^' and '$'.
613 */
614
615#ifdef EBCDIC
616static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
617#else
618/* META[] is used often enough to justify turning it into a table. */
619static char_u META_flags[] = {
620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
621 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
622/* % & ( ) * + . */
623 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
624/* 1 2 3 4 5 6 7 8 9 < = > ? */
625 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
626/* @ A C D F H I K L M O */
627 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
628/* P S U V W X Z [ _ */
629 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
630/* a c d f h i k l m n o */
631 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
632/* p s u v w x z { | ~ */
633 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
634};
635#endif
636
637static int curchr;
638
639/* arguments for reg() */
640#define REG_NOPAREN 0 /* toplevel reg() */
641#define REG_PAREN 1 /* \(\) */
642#define REG_ZPAREN 2 /* \z(\) */
643#define REG_NPAREN 3 /* \%(\) */
644
645/*
646 * Forward declarations for vim_regcomp()'s friends.
647 */
648static void initchr __ARGS((char_u *));
649static int getchr __ARGS((void));
650static void skipchr_keepstart __ARGS((void));
651static int peekchr __ARGS((void));
652static void skipchr __ARGS((void));
653static void ungetchr __ARGS((void));
Bram Moolenaarc0197e22004-09-13 20:26:32 +0000654static int gethexchrs __ARGS((int maxinputlen));
655static int getoctchrs __ARGS((void));
656static int getdecchrs __ARGS((void));
657static int coll_get_char __ARGS((void));
Bram Moolenaar071d4272004-06-13 20:20:40 +0000658static void regcomp_start __ARGS((char_u *expr, int flags));
659static char_u *reg __ARGS((int, int *));
660static char_u *regbranch __ARGS((int *flagp));
661static char_u *regconcat __ARGS((int *flagp));
662static char_u *regpiece __ARGS((int *));
663static char_u *regatom __ARGS((int *));
664static char_u *regnode __ARGS((int));
665static int prog_magic_wrong __ARGS((void));
666static char_u *regnext __ARGS((char_u *));
667static void regc __ARGS((int b));
668#ifdef FEAT_MBYTE
669static void regmbc __ARGS((int c));
Bram Moolenaardf177f62005-02-22 08:39:57 +0000670#else
671# define regmbc(c) regc(c)
Bram Moolenaar071d4272004-06-13 20:20:40 +0000672#endif
673static void reginsert __ARGS((int, char_u *));
674static void reginsert_limits __ARGS((int, long, long, char_u *));
675static char_u *re_put_long __ARGS((char_u *pr, long_u val));
676static int read_limits __ARGS((long *, long *));
677static void regtail __ARGS((char_u *, char_u *));
678static void regoptail __ARGS((char_u *, char_u *));
679
680/*
681 * Return TRUE if compiled regular expression "prog" can match a line break.
682 */
683 int
684re_multiline(prog)
685 regprog_T *prog;
686{
687 return (prog->regflags & RF_HASNL);
688}
689
690/*
691 * Return TRUE if compiled regular expression "prog" looks before the start
692 * position (pattern contains "\@<=" or "\@<!").
693 */
694 int
695re_lookbehind(prog)
696 regprog_T *prog;
697{
698 return (prog->regflags & RF_LOOKBH);
699}
700
701/*
Bram Moolenaardf177f62005-02-22 08:39:57 +0000702 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
703 * Returns a character representing the class. Zero means that no item was
704 * recognized. Otherwise "pp" is advanced to after the item.
705 */
706 static int
707get_equi_class(pp)
708 char_u **pp;
709{
710 int c;
711 int l = 1;
712 char_u *p = *pp;
713
714 if (p[1] == '=')
715 {
716#ifdef FEAT_MBYTE
717 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000718 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000719#endif
720 if (p[l + 2] == '=' && p[l + 3] == ']')
721 {
722#ifdef FEAT_MBYTE
723 if (has_mbyte)
724 c = mb_ptr2char(p + 2);
725 else
726#endif
727 c = p[2];
728 *pp += l + 4;
729 return c;
730 }
731 }
732 return 0;
733}
734
735/*
736 * Produce the bytes for equivalence class "c".
737 * Currently only handles latin1, latin9 and utf-8.
738 */
739 static void
740reg_equi_class(c)
741 int c;
742{
743#ifdef FEAT_MBYTE
744 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
Bram Moolenaar78622822005-08-23 21:00:13 +0000745 || STRCMP(p_enc, "iso-8859-15") == 0)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000746#endif
747 {
748 switch (c)
749 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000750 case 'A': case '\300': case '\301': case '\302':
751 case '\303': case '\304': case '\305':
752 regmbc('A'); regmbc('\300'); regmbc('\301');
753 regmbc('\302'); regmbc('\303'); regmbc('\304');
754 regmbc('\305');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000755 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000756 case 'C': case '\307':
757 regmbc('C'); regmbc('\307');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000758 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000759 case 'E': case '\310': case '\311': case '\312': case '\313':
760 regmbc('E'); regmbc('\310'); regmbc('\311');
761 regmbc('\312'); regmbc('\313');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000762 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000763 case 'I': case '\314': case '\315': case '\316': case '\317':
764 regmbc('I'); regmbc('\314'); regmbc('\315');
765 regmbc('\316'); regmbc('\317');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000766 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000767 case 'N': case '\321':
768 regmbc('N'); regmbc('\321');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000769 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000770 case 'O': case '\322': case '\323': case '\324': case '\325':
771 case '\326':
772 regmbc('O'); regmbc('\322'); regmbc('\323');
773 regmbc('\324'); regmbc('\325'); regmbc('\326');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000774 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000775 case 'U': case '\331': case '\332': case '\333': case '\334':
776 regmbc('U'); regmbc('\331'); regmbc('\332');
777 regmbc('\333'); regmbc('\334');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000778 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000779 case 'Y': case '\335':
780 regmbc('Y'); regmbc('\335');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000781 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000782 case 'a': case '\340': case '\341': case '\342':
783 case '\343': case '\344': case '\345':
784 regmbc('a'); regmbc('\340'); regmbc('\341');
785 regmbc('\342'); regmbc('\343'); regmbc('\344');
786 regmbc('\345');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000787 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000788 case 'c': case '\347':
789 regmbc('c'); regmbc('\347');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000790 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000791 case 'e': case '\350': case '\351': case '\352': case '\353':
792 regmbc('e'); regmbc('\350'); regmbc('\351');
793 regmbc('\352'); regmbc('\353');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000794 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000795 case 'i': case '\354': case '\355': case '\356': case '\357':
796 regmbc('i'); regmbc('\354'); regmbc('\355');
797 regmbc('\356'); regmbc('\357');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000798 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000799 case 'n': case '\361':
800 regmbc('n'); regmbc('\361');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000801 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000802 case 'o': case '\362': case '\363': case '\364': case '\365':
803 case '\366':
804 regmbc('o'); regmbc('\362'); regmbc('\363');
805 regmbc('\364'); regmbc('\365'); regmbc('\366');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000806 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000807 case 'u': case '\371': case '\372': case '\373': case '\374':
808 regmbc('u'); regmbc('\371'); regmbc('\372');
809 regmbc('\373'); regmbc('\374');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000810 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000811 case 'y': case '\375': case '\377':
812 regmbc('y'); regmbc('\375'); regmbc('\377');
Bram Moolenaardf177f62005-02-22 08:39:57 +0000813 return;
814 }
815 }
816 regmbc(c);
817}
818
819/*
820 * Check for a collating element "[.a.]". "pp" points to the '['.
821 * Returns a character. Zero means that no item was recognized. Otherwise
822 * "pp" is advanced to after the item.
823 * Currently only single characters are recognized!
824 */
825 static int
826get_coll_element(pp)
827 char_u **pp;
828{
829 int c;
830 int l = 1;
831 char_u *p = *pp;
832
833 if (p[1] == '.')
834 {
835#ifdef FEAT_MBYTE
836 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000837 l = (*mb_ptr2len)(p + 2);
Bram Moolenaardf177f62005-02-22 08:39:57 +0000838#endif
839 if (p[l + 2] == '.' && p[l + 3] == ']')
840 {
841#ifdef FEAT_MBYTE
842 if (has_mbyte)
843 c = mb_ptr2char(p + 2);
844 else
845#endif
846 c = p[2];
847 *pp += l + 4;
848 return c;
849 }
850 }
851 return 0;
852}
853
854
855/*
856 * Skip over a "[]" range.
857 * "p" must point to the character after the '['.
858 * The returned pointer is on the matching ']', or the terminating NUL.
859 */
860 static char_u *
861skip_anyof(p)
862 char_u *p;
863{
864 int cpo_lit; /* 'cpoptions' contains 'l' flag */
865 int cpo_bsl; /* 'cpoptions' contains '\' flag */
866#ifdef FEAT_MBYTE
867 int l;
868#endif
869
Bram Moolenaar3b56eb32005-07-11 22:40:32 +0000870 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
871 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaardf177f62005-02-22 08:39:57 +0000872
873 if (*p == '^') /* Complement of range. */
874 ++p;
875 if (*p == ']' || *p == '-')
876 ++p;
877 while (*p != NUL && *p != ']')
878 {
879#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000880 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Bram Moolenaardf177f62005-02-22 08:39:57 +0000881 p += l;
882 else
883#endif
884 if (*p == '-')
885 {
886 ++p;
887 if (*p != ']' && *p != NUL)
888 mb_ptr_adv(p);
889 }
890 else if (*p == '\\'
891 && !cpo_bsl
892 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
893 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
894 p += 2;
895 else if (*p == '[')
896 {
897 if (get_char_class(&p) == CLASS_NONE
898 && get_equi_class(&p) == 0
899 && get_coll_element(&p) == 0)
900 ++p; /* It was not a class name */
901 }
902 else
903 ++p;
904 }
905
906 return p;
907}
908
909/*
Bram Moolenaar071d4272004-06-13 20:20:40 +0000910 * Skip past regular expression.
Bram Moolenaar748bf032005-02-02 23:04:36 +0000911 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar071d4272004-06-13 20:20:40 +0000912 * Take care of characters with a backslash in front of it.
913 * Skip strings inside [ and ].
914 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
915 * expression and change "\?" to "?". If "*newp" is not NULL the expression
916 * is changed in-place.
917 */
918 char_u *
919skip_regexp(startp, dirc, magic, newp)
920 char_u *startp;
921 int dirc;
922 int magic;
923 char_u **newp;
924{
925 int mymagic;
926 char_u *p = startp;
927
928 if (magic)
929 mymagic = MAGIC_ON;
930 else
931 mymagic = MAGIC_OFF;
932
Bram Moolenaar1cd871b2004-12-19 22:46:22 +0000933 for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +0000934 {
935 if (p[0] == dirc) /* found end of regexp */
936 break;
937 if ((p[0] == '[' && mymagic >= MAGIC_ON)
938 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
939 {
940 p = skip_anyof(p + 1);
941 if (p[0] == NUL)
942 break;
943 }
944 else if (p[0] == '\\' && p[1] != NUL)
945 {
946 if (dirc == '?' && newp != NULL && p[1] == '?')
947 {
948 /* change "\?" to "?", make a copy first. */
949 if (*newp == NULL)
950 {
951 *newp = vim_strsave(startp);
952 if (*newp != NULL)
953 p = *newp + (p - startp);
954 }
955 if (*newp != NULL)
956 mch_memmove(p, p + 1, STRLEN(p));
957 else
958 ++p;
959 }
960 else
961 ++p; /* skip next character */
962 if (*p == 'v')
963 mymagic = MAGIC_ALL;
964 else if (*p == 'V')
965 mymagic = MAGIC_NONE;
966 }
Bram Moolenaar071d4272004-06-13 20:20:40 +0000967 }
968 return p;
969}
970
971/*
Bram Moolenaar86b68352004-12-27 21:59:20 +0000972 * vim_regcomp() - compile a regular expression into internal code
973 * Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar071d4272004-06-13 20:20:40 +0000974 *
975 * We can't allocate space until we know how big the compiled form will be,
976 * but we can't compile it (and thus know how big it is) until we've got a
977 * place to put the code. So we cheat: we compile it twice, once with code
978 * generation turned off and size counting turned on, and once "for real".
979 * This also means that we don't allocate space until we are sure that the
980 * thing really will compile successfully, and we never have to move the
981 * code and thus invalidate pointers into it. (Note that it has to be in
982 * one piece because vim_free() must be able to free it all.)
983 *
984 * Whether upper/lower case is to be ignored is decided when executing the
985 * program, it does not matter here.
986 *
987 * Beware that the optimization-preparation code in here knows about some
988 * of the structure of the compiled regexp.
989 * "re_flags": RE_MAGIC and/or RE_STRING.
990 */
991 regprog_T *
992vim_regcomp(expr, re_flags)
993 char_u *expr;
994 int re_flags;
995{
996 regprog_T *r;
997 char_u *scan;
998 char_u *longest;
999 int len;
1000 int flags;
1001
1002 if (expr == NULL)
1003 EMSG_RET_NULL(_(e_null));
1004
1005 init_class_tab();
1006
1007 /*
1008 * First pass: determine size, legality.
1009 */
1010 regcomp_start(expr, re_flags);
1011 regcode = JUST_CALC_SIZE;
1012 regc(REGMAGIC);
1013 if (reg(REG_NOPAREN, &flags) == NULL)
1014 return NULL;
1015
1016 /* Small enough for pointer-storage convention? */
1017#ifdef SMALL_MALLOC /* 16 bit storage allocation */
1018 if (regsize >= 65536L - 256L)
1019 EMSG_RET_NULL(_("E339: Pattern too long"));
1020#endif
1021
1022 /* Allocate space. */
1023 r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1024 if (r == NULL)
1025 return NULL;
1026
1027 /*
1028 * Second pass: emit code.
1029 */
1030 regcomp_start(expr, re_flags);
1031 regcode = r->program;
1032 regc(REGMAGIC);
1033 if (reg(REG_NOPAREN, &flags) == NULL)
1034 {
1035 vim_free(r);
1036 return NULL;
1037 }
1038
1039 /* Dig out information for optimizations. */
1040 r->regstart = NUL; /* Worst-case defaults. */
1041 r->reganch = 0;
1042 r->regmust = NULL;
1043 r->regmlen = 0;
1044 r->regflags = regflags;
1045 if (flags & HASNL)
1046 r->regflags |= RF_HASNL;
1047 if (flags & HASLOOKBH)
1048 r->regflags |= RF_LOOKBH;
1049#ifdef FEAT_SYN_HL
1050 /* Remember whether this pattern has any \z specials in it. */
1051 r->reghasz = re_has_z;
1052#endif
1053 scan = r->program + 1; /* First BRANCH. */
1054 if (OP(regnext(scan)) == END) /* Only one top-level choice. */
1055 {
1056 scan = OPERAND(scan);
1057
1058 /* Starting-point info. */
1059 if (OP(scan) == BOL || OP(scan) == RE_BOF)
1060 {
1061 r->reganch++;
1062 scan = regnext(scan);
1063 }
1064
1065 if (OP(scan) == EXACTLY)
1066 {
1067#ifdef FEAT_MBYTE
1068 if (has_mbyte)
1069 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1070 else
1071#endif
1072 r->regstart = *OPERAND(scan);
1073 }
1074 else if ((OP(scan) == BOW
1075 || OP(scan) == EOW
1076 || OP(scan) == NOTHING
1077 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1078 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1079 && OP(regnext(scan)) == EXACTLY)
1080 {
1081#ifdef FEAT_MBYTE
1082 if (has_mbyte)
1083 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1084 else
1085#endif
1086 r->regstart = *OPERAND(regnext(scan));
1087 }
1088
1089 /*
1090 * If there's something expensive in the r.e., find the longest
1091 * literal string that must appear and make it the regmust. Resolve
1092 * ties in favor of later strings, since the regstart check works
1093 * with the beginning of the r.e. and avoiding duplication
1094 * strengthens checking. Not a strong reason, but sufficient in the
1095 * absence of others.
1096 */
1097 /*
1098 * When the r.e. starts with BOW, it is faster to look for a regmust
1099 * first. Used a lot for "#" and "*" commands. (Added by mool).
1100 */
1101 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1102 && !(flags & HASNL))
1103 {
1104 longest = NULL;
1105 len = 0;
1106 for (; scan != NULL; scan = regnext(scan))
1107 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1108 {
1109 longest = OPERAND(scan);
1110 len = (int)STRLEN(OPERAND(scan));
1111 }
1112 r->regmust = longest;
1113 r->regmlen = len;
1114 }
1115 }
1116#ifdef DEBUG
1117 regdump(expr, r);
1118#endif
1119 return r;
1120}
1121
1122/*
1123 * Setup to parse the regexp. Used once to get the length and once to do it.
1124 */
1125 static void
1126regcomp_start(expr, re_flags)
1127 char_u *expr;
1128 int re_flags; /* see vim_regcomp() */
1129{
1130 initchr(expr);
1131 if (re_flags & RE_MAGIC)
1132 reg_magic = MAGIC_ON;
1133 else
1134 reg_magic = MAGIC_OFF;
1135 reg_string = (re_flags & RE_STRING);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001136 reg_strict = (re_flags & RE_STRICT);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001137
1138 num_complex_braces = 0;
1139 regnpar = 1;
1140 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1141#ifdef FEAT_SYN_HL
1142 regnzpar = 1;
1143 re_has_z = 0;
1144#endif
1145 regsize = 0L;
1146 regflags = 0;
1147#if defined(FEAT_SYN_HL) || defined(PROTO)
1148 had_eol = FALSE;
1149#endif
1150}
1151
1152#if defined(FEAT_SYN_HL) || defined(PROTO)
1153/*
1154 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1155 * found. This is messy, but it works fine.
1156 */
1157 int
1158vim_regcomp_had_eol()
1159{
1160 return had_eol;
1161}
1162#endif
1163
1164/*
1165 * reg - regular expression, i.e. main body or parenthesized thing
1166 *
1167 * Caller must absorb opening parenthesis.
1168 *
1169 * Combining parenthesis handling with the base level of regular expression
1170 * is a trifle forced, but the need to tie the tails of the branches to what
1171 * follows makes it hard to avoid.
1172 */
1173 static char_u *
1174reg(paren, flagp)
1175 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1176 int *flagp;
1177{
1178 char_u *ret;
1179 char_u *br;
1180 char_u *ender;
1181 int parno = 0;
1182 int flags;
1183
1184 *flagp = HASWIDTH; /* Tentatively. */
1185
1186#ifdef FEAT_SYN_HL
1187 if (paren == REG_ZPAREN)
1188 {
1189 /* Make a ZOPEN node. */
1190 if (regnzpar >= NSUBEXP)
1191 EMSG_RET_NULL(_("E50: Too many \\z("));
1192 parno = regnzpar;
1193 regnzpar++;
1194 ret = regnode(ZOPEN + parno);
1195 }
1196 else
1197#endif
1198 if (paren == REG_PAREN)
1199 {
1200 /* Make a MOPEN node. */
1201 if (regnpar >= NSUBEXP)
1202 EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1203 parno = regnpar;
1204 ++regnpar;
1205 ret = regnode(MOPEN + parno);
1206 }
1207 else if (paren == REG_NPAREN)
1208 {
1209 /* Make a NOPEN node. */
1210 ret = regnode(NOPEN);
1211 }
1212 else
1213 ret = NULL;
1214
1215 /* Pick up the branches, linking them together. */
1216 br = regbranch(&flags);
1217 if (br == NULL)
1218 return NULL;
1219 if (ret != NULL)
1220 regtail(ret, br); /* [MZ]OPEN -> first. */
1221 else
1222 ret = br;
1223 /* If one of the branches can be zero-width, the whole thing can.
1224 * If one of the branches has * at start or matches a line-break, the
1225 * whole thing can. */
1226 if (!(flags & HASWIDTH))
1227 *flagp &= ~HASWIDTH;
1228 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1229 while (peekchr() == Magic('|'))
1230 {
1231 skipchr();
1232 br = regbranch(&flags);
1233 if (br == NULL)
1234 return NULL;
1235 regtail(ret, br); /* BRANCH -> BRANCH. */
1236 if (!(flags & HASWIDTH))
1237 *flagp &= ~HASWIDTH;
1238 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1239 }
1240
1241 /* Make a closing node, and hook it on the end. */
1242 ender = regnode(
1243#ifdef FEAT_SYN_HL
1244 paren == REG_ZPAREN ? ZCLOSE + parno :
1245#endif
1246 paren == REG_PAREN ? MCLOSE + parno :
1247 paren == REG_NPAREN ? NCLOSE : END);
1248 regtail(ret, ender);
1249
1250 /* Hook the tails of the branches to the closing node. */
1251 for (br = ret; br != NULL; br = regnext(br))
1252 regoptail(br, ender);
1253
1254 /* Check for proper termination. */
1255 if (paren != REG_NOPAREN && getchr() != Magic(')'))
1256 {
1257#ifdef FEAT_SYN_HL
1258 if (paren == REG_ZPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001259 EMSG_RET_NULL(_("E52: Unmatched \\z("));
Bram Moolenaar071d4272004-06-13 20:20:40 +00001260 else
1261#endif
1262 if (paren == REG_NPAREN)
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001263 EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001264 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001265 EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001266 }
1267 else if (paren == REG_NOPAREN && peekchr() != NUL)
1268 {
1269 if (curchr == Magic(')'))
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001270 EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
Bram Moolenaar071d4272004-06-13 20:20:40 +00001271 else
Bram Moolenaar45eeb132005-06-06 21:59:07 +00001272 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001273 /* NOTREACHED */
1274 }
1275 /*
1276 * Here we set the flag allowing back references to this set of
1277 * parentheses.
1278 */
1279 if (paren == REG_PAREN)
1280 had_endbrace[parno] = TRUE; /* have seen the close paren */
1281 return ret;
1282}
1283
1284/*
1285 * regbranch - one alternative of an | operator
1286 *
1287 * Implements the & operator.
1288 */
1289 static char_u *
1290regbranch(flagp)
1291 int *flagp;
1292{
1293 char_u *ret;
1294 char_u *chain = NULL;
1295 char_u *latest;
1296 int flags;
1297
1298 *flagp = WORST | HASNL; /* Tentatively. */
1299
1300 ret = regnode(BRANCH);
1301 for (;;)
1302 {
1303 latest = regconcat(&flags);
1304 if (latest == NULL)
1305 return NULL;
1306 /* If one of the branches has width, the whole thing has. If one of
1307 * the branches anchors at start-of-line, the whole thing does.
1308 * If one of the branches uses look-behind, the whole thing does. */
1309 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1310 /* If one of the branches doesn't match a line-break, the whole thing
1311 * doesn't. */
1312 *flagp &= ~HASNL | (flags & HASNL);
1313 if (chain != NULL)
1314 regtail(chain, latest);
1315 if (peekchr() != Magic('&'))
1316 break;
1317 skipchr();
1318 regtail(latest, regnode(END)); /* operand ends */
1319 reginsert(MATCH, latest);
1320 chain = latest;
1321 }
1322
1323 return ret;
1324}
1325
1326/*
1327 * regbranch - one alternative of an | or & operator
1328 *
1329 * Implements the concatenation operator.
1330 */
1331 static char_u *
1332regconcat(flagp)
1333 int *flagp;
1334{
1335 char_u *first = NULL;
1336 char_u *chain = NULL;
1337 char_u *latest;
1338 int flags;
1339 int cont = TRUE;
1340
1341 *flagp = WORST; /* Tentatively. */
1342
1343 while (cont)
1344 {
1345 switch (peekchr())
1346 {
1347 case NUL:
1348 case Magic('|'):
1349 case Magic('&'):
1350 case Magic(')'):
1351 cont = FALSE;
1352 break;
1353 case Magic('Z'):
1354#ifdef FEAT_MBYTE
1355 regflags |= RF_ICOMBINE;
1356#endif
1357 skipchr_keepstart();
1358 break;
1359 case Magic('c'):
1360 regflags |= RF_ICASE;
1361 skipchr_keepstart();
1362 break;
1363 case Magic('C'):
1364 regflags |= RF_NOICASE;
1365 skipchr_keepstart();
1366 break;
1367 case Magic('v'):
1368 reg_magic = MAGIC_ALL;
1369 skipchr_keepstart();
1370 curchr = -1;
1371 break;
1372 case Magic('m'):
1373 reg_magic = MAGIC_ON;
1374 skipchr_keepstart();
1375 curchr = -1;
1376 break;
1377 case Magic('M'):
1378 reg_magic = MAGIC_OFF;
1379 skipchr_keepstart();
1380 curchr = -1;
1381 break;
1382 case Magic('V'):
1383 reg_magic = MAGIC_NONE;
1384 skipchr_keepstart();
1385 curchr = -1;
1386 break;
1387 default:
1388 latest = regpiece(&flags);
1389 if (latest == NULL)
1390 return NULL;
1391 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1392 if (chain == NULL) /* First piece. */
1393 *flagp |= flags & SPSTART;
1394 else
1395 regtail(chain, latest);
1396 chain = latest;
1397 if (first == NULL)
1398 first = latest;
1399 break;
1400 }
1401 }
1402 if (first == NULL) /* Loop ran zero times. */
1403 first = regnode(NOTHING);
1404 return first;
1405}
1406
1407/*
1408 * regpiece - something followed by possible [*+=]
1409 *
1410 * Note that the branching code sequences used for = and the general cases
1411 * of * and + are somewhat optimized: they use the same NOTHING node as
1412 * both the endmarker for their branch list and the body of the last branch.
1413 * It might seem that this node could be dispensed with entirely, but the
1414 * endmarker role is not redundant.
1415 */
1416 static char_u *
1417regpiece(flagp)
1418 int *flagp;
1419{
1420 char_u *ret;
1421 int op;
1422 char_u *next;
1423 int flags;
1424 long minval;
1425 long maxval;
1426
1427 ret = regatom(&flags);
1428 if (ret == NULL)
1429 return NULL;
1430
1431 op = peekchr();
1432 if (re_multi_type(op) == NOT_MULTI)
1433 {
1434 *flagp = flags;
1435 return ret;
1436 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00001437 /* default flags */
1438 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1439
1440 skipchr();
1441 switch (op)
1442 {
1443 case Magic('*'):
1444 if (flags & SIMPLE)
1445 reginsert(STAR, ret);
1446 else
1447 {
1448 /* Emit x* as (x&|), where & means "self". */
1449 reginsert(BRANCH, ret); /* Either x */
1450 regoptail(ret, regnode(BACK)); /* and loop */
1451 regoptail(ret, ret); /* back */
1452 regtail(ret, regnode(BRANCH)); /* or */
1453 regtail(ret, regnode(NOTHING)); /* null. */
1454 }
1455 break;
1456
1457 case Magic('+'):
1458 if (flags & SIMPLE)
1459 reginsert(PLUS, ret);
1460 else
1461 {
1462 /* Emit x+ as x(&|), where & means "self". */
1463 next = regnode(BRANCH); /* Either */
1464 regtail(ret, next);
Bram Moolenaar582fd852005-03-28 20:58:01 +00001465 regtail(regnode(BACK), ret); /* loop back */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001466 regtail(next, regnode(BRANCH)); /* or */
1467 regtail(ret, regnode(NOTHING)); /* null. */
1468 }
1469 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1470 break;
1471
1472 case Magic('@'):
1473 {
1474 int lop = END;
1475
1476 switch (no_Magic(getchr()))
1477 {
1478 case '=': lop = MATCH; break; /* \@= */
1479 case '!': lop = NOMATCH; break; /* \@! */
1480 case '>': lop = SUBPAT; break; /* \@> */
1481 case '<': switch (no_Magic(getchr()))
1482 {
1483 case '=': lop = BEHIND; break; /* \@<= */
1484 case '!': lop = NOBEHIND; break; /* \@<! */
1485 }
1486 }
1487 if (lop == END)
1488 EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1489 reg_magic == MAGIC_ALL);
1490 /* Look behind must match with behind_pos. */
1491 if (lop == BEHIND || lop == NOBEHIND)
1492 {
1493 regtail(ret, regnode(BHPOS));
1494 *flagp |= HASLOOKBH;
1495 }
1496 regtail(ret, regnode(END)); /* operand ends */
1497 reginsert(lop, ret);
1498 break;
1499 }
1500
1501 case Magic('?'):
1502 case Magic('='):
1503 /* Emit x= as (x|) */
1504 reginsert(BRANCH, ret); /* Either x */
1505 regtail(ret, regnode(BRANCH)); /* or */
1506 next = regnode(NOTHING); /* null. */
1507 regtail(ret, next);
1508 regoptail(ret, next);
1509 break;
1510
1511 case Magic('{'):
1512 if (!read_limits(&minval, &maxval))
1513 return NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001514 if (flags & SIMPLE)
1515 {
1516 reginsert(BRACE_SIMPLE, ret);
1517 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1518 }
1519 else
1520 {
1521 if (num_complex_braces >= 10)
1522 EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1523 reg_magic == MAGIC_ALL);
1524 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1525 regoptail(ret, regnode(BACK));
1526 regoptail(ret, ret);
1527 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1528 ++num_complex_braces;
1529 }
1530 if (minval > 0 && maxval > 0)
1531 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1532 break;
1533 }
1534 if (re_multi_type(peekchr()) != NOT_MULTI)
1535 {
1536 /* Can't have a multi follow a multi. */
1537 if (peekchr() == Magic('*'))
1538 sprintf((char *)IObuff, _("E61: Nested %s*"),
1539 reg_magic >= MAGIC_ON ? "" : "\\");
1540 else
1541 sprintf((char *)IObuff, _("E62: Nested %s%c"),
1542 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1543 EMSG_RET_NULL(IObuff);
1544 }
1545
1546 return ret;
1547}
1548
1549/*
1550 * regatom - the lowest level
1551 *
1552 * Optimization: gobbles an entire sequence of ordinary characters so that
1553 * it can turn them into a single node, which is smaller to store and
1554 * faster to run. Don't do this when one_exactly is set.
1555 */
1556 static char_u *
1557regatom(flagp)
1558 int *flagp;
1559{
1560 char_u *ret;
1561 int flags;
1562 int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaardf177f62005-02-22 08:39:57 +00001563 int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar071d4272004-06-13 20:20:40 +00001564 int c;
1565 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1566 static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1567 FNAME, SFNAME, PRINT, SPRINT,
1568 WHITE, NWHITE, DIGIT, NDIGIT,
1569 HEX, NHEX, OCTAL, NOCTAL,
1570 WORD, NWORD, HEAD, NHEAD,
1571 ALPHA, NALPHA, LOWER, NLOWER,
1572 UPPER, NUPPER
1573 };
1574 char_u *p;
1575 int extra = 0;
1576
1577 *flagp = WORST; /* Tentatively. */
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00001578 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1579 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00001580
1581 c = getchr();
1582 switch (c)
1583 {
1584 case Magic('^'):
1585 ret = regnode(BOL);
1586 break;
1587
1588 case Magic('$'):
1589 ret = regnode(EOL);
1590#if defined(FEAT_SYN_HL) || defined(PROTO)
1591 had_eol = TRUE;
1592#endif
1593 break;
1594
1595 case Magic('<'):
1596 ret = regnode(BOW);
1597 break;
1598
1599 case Magic('>'):
1600 ret = regnode(EOW);
1601 break;
1602
1603 case Magic('_'):
1604 c = no_Magic(getchr());
1605 if (c == '^') /* "\_^" is start-of-line */
1606 {
1607 ret = regnode(BOL);
1608 break;
1609 }
1610 if (c == '$') /* "\_$" is end-of-line */
1611 {
1612 ret = regnode(EOL);
1613#if defined(FEAT_SYN_HL) || defined(PROTO)
1614 had_eol = TRUE;
1615#endif
1616 break;
1617 }
1618
1619 extra = ADD_NL;
1620 *flagp |= HASNL;
1621
1622 /* "\_[" is character range plus newline */
1623 if (c == '[')
1624 goto collection;
1625
1626 /* "\_x" is character class plus newline */
1627 /*FALLTHROUGH*/
1628
1629 /*
1630 * Character classes.
1631 */
1632 case Magic('.'):
1633 case Magic('i'):
1634 case Magic('I'):
1635 case Magic('k'):
1636 case Magic('K'):
1637 case Magic('f'):
1638 case Magic('F'):
1639 case Magic('p'):
1640 case Magic('P'):
1641 case Magic('s'):
1642 case Magic('S'):
1643 case Magic('d'):
1644 case Magic('D'):
1645 case Magic('x'):
1646 case Magic('X'):
1647 case Magic('o'):
1648 case Magic('O'):
1649 case Magic('w'):
1650 case Magic('W'):
1651 case Magic('h'):
1652 case Magic('H'):
1653 case Magic('a'):
1654 case Magic('A'):
1655 case Magic('l'):
1656 case Magic('L'):
1657 case Magic('u'):
1658 case Magic('U'):
1659 p = vim_strchr(classchars, no_Magic(c));
1660 if (p == NULL)
1661 EMSG_RET_NULL(_("E63: invalid use of \\_"));
1662 ret = regnode(classcodes[p - classchars] + extra);
1663 *flagp |= HASWIDTH | SIMPLE;
1664 break;
1665
1666 case Magic('n'):
1667 if (reg_string)
1668 {
1669 /* In a string "\n" matches a newline character. */
1670 ret = regnode(EXACTLY);
1671 regc(NL);
1672 regc(NUL);
1673 *flagp |= HASWIDTH | SIMPLE;
1674 }
1675 else
1676 {
1677 /* In buffer text "\n" matches the end of a line. */
1678 ret = regnode(NEWL);
1679 *flagp |= HASWIDTH | HASNL;
1680 }
1681 break;
1682
1683 case Magic('('):
1684 if (one_exactly)
1685 EMSG_ONE_RET_NULL;
1686 ret = reg(REG_PAREN, &flags);
1687 if (ret == NULL)
1688 return NULL;
1689 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1690 break;
1691
1692 case NUL:
1693 case Magic('|'):
1694 case Magic('&'):
1695 case Magic(')'):
1696 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
1697 /* NOTREACHED */
1698
1699 case Magic('='):
1700 case Magic('?'):
1701 case Magic('+'):
1702 case Magic('@'):
1703 case Magic('{'):
1704 case Magic('*'):
1705 c = no_Magic(c);
1706 sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1707 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1708 ? "" : "\\", c);
1709 EMSG_RET_NULL(IObuff);
1710 /* NOTREACHED */
1711
1712 case Magic('~'): /* previous substitute pattern */
Bram Moolenaarf461c8e2005-06-25 23:04:51 +00001713 if (reg_prev_sub != NULL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00001714 {
1715 char_u *lp;
1716
1717 ret = regnode(EXACTLY);
1718 lp = reg_prev_sub;
1719 while (*lp != NUL)
1720 regc(*lp++);
1721 regc(NUL);
1722 if (*reg_prev_sub != NUL)
1723 {
1724 *flagp |= HASWIDTH;
1725 if ((lp - reg_prev_sub) == 1)
1726 *flagp |= SIMPLE;
1727 }
1728 }
1729 else
1730 EMSG_RET_NULL(_(e_nopresub));
1731 break;
1732
1733 case Magic('1'):
1734 case Magic('2'):
1735 case Magic('3'):
1736 case Magic('4'):
1737 case Magic('5'):
1738 case Magic('6'):
1739 case Magic('7'):
1740 case Magic('8'):
1741 case Magic('9'):
1742 {
1743 int refnum;
1744
1745 refnum = c - Magic('0');
1746 /*
1747 * Check if the back reference is legal. We must have seen the
1748 * close brace.
1749 * TODO: Should also check that we don't refer to something
1750 * that is repeated (+*=): what instance of the repetition
1751 * should we match?
1752 */
1753 if (!had_endbrace[refnum])
1754 {
1755 /* Trick: check if "@<=" or "@<!" follows, in which case
1756 * the \1 can appear before the referenced match. */
1757 for (p = regparse; *p != NUL; ++p)
1758 if (p[0] == '@' && p[1] == '<'
1759 && (p[2] == '!' || p[2] == '='))
1760 break;
1761 if (*p == NUL)
1762 EMSG_RET_NULL(_("E65: Illegal back reference"));
1763 }
1764 ret = regnode(BACKREF + refnum);
1765 }
1766 break;
1767
1768#ifdef FEAT_SYN_HL
1769 case Magic('z'):
1770 {
1771 c = no_Magic(getchr());
1772 switch (c)
1773 {
1774 case '(': if (reg_do_extmatch != REX_SET)
1775 EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1776 if (one_exactly)
1777 EMSG_ONE_RET_NULL;
1778 ret = reg(REG_ZPAREN, &flags);
1779 if (ret == NULL)
1780 return NULL;
1781 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1782 re_has_z = REX_SET;
1783 break;
1784
1785 case '1':
1786 case '2':
1787 case '3':
1788 case '4':
1789 case '5':
1790 case '6':
1791 case '7':
1792 case '8':
1793 case '9': if (reg_do_extmatch != REX_USE)
1794 EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1795 ret = regnode(ZREF + c - '0');
1796 re_has_z = REX_USE;
1797 break;
1798
1799 case 's': ret = regnode(MOPEN + 0);
1800 break;
1801
1802 case 'e': ret = regnode(MCLOSE + 0);
1803 break;
1804
1805 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1806 }
1807 }
1808 break;
1809#endif
1810
1811 case Magic('%'):
1812 {
1813 c = no_Magic(getchr());
1814 switch (c)
1815 {
1816 /* () without a back reference */
1817 case '(':
1818 if (one_exactly)
1819 EMSG_ONE_RET_NULL;
1820 ret = reg(REG_NPAREN, &flags);
1821 if (ret == NULL)
1822 return NULL;
1823 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1824 break;
1825
1826 /* Catch \%^ and \%$ regardless of where they appear in the
1827 * pattern -- regardless of whether or not it makes sense. */
1828 case '^':
1829 ret = regnode(RE_BOF);
1830 break;
1831
1832 case '$':
1833 ret = regnode(RE_EOF);
1834 break;
1835
1836 case '#':
1837 ret = regnode(CURSOR);
1838 break;
1839
1840 /* \%[abc]: Emit as a list of branches, all ending at the last
1841 * branch which matches nothing. */
1842 case '[':
1843 if (one_exactly) /* doesn't nest */
1844 EMSG_ONE_RET_NULL;
1845 {
1846 char_u *lastbranch;
1847 char_u *lastnode = NULL;
1848 char_u *br;
1849
1850 ret = NULL;
1851 while ((c = getchr()) != ']')
1852 {
1853 if (c == NUL)
1854 EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1855 reg_magic == MAGIC_ALL);
1856 br = regnode(BRANCH);
1857 if (ret == NULL)
1858 ret = br;
1859 else
1860 regtail(lastnode, br);
1861
1862 ungetchr();
1863 one_exactly = TRUE;
1864 lastnode = regatom(flagp);
1865 one_exactly = FALSE;
1866 if (lastnode == NULL)
1867 return NULL;
1868 }
1869 if (ret == NULL)
1870 EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1871 reg_magic == MAGIC_ALL);
1872 lastbranch = regnode(BRANCH);
1873 br = regnode(NOTHING);
1874 if (ret != JUST_CALC_SIZE)
1875 {
1876 regtail(lastnode, br);
1877 regtail(lastbranch, br);
1878 /* connect all branches to the NOTHING
1879 * branch at the end */
1880 for (br = ret; br != lastnode; )
1881 {
1882 if (OP(br) == BRANCH)
1883 {
1884 regtail(br, lastbranch);
1885 br = OPERAND(br);
1886 }
1887 else
1888 br = regnext(br);
1889 }
1890 }
1891 *flagp &= ~HASWIDTH;
1892 break;
1893 }
1894
Bram Moolenaarc0197e22004-09-13 20:26:32 +00001895 case 'd': /* %d123 decimal */
1896 case 'o': /* %o123 octal */
1897 case 'x': /* %xab hex 2 */
1898 case 'u': /* %uabcd hex 4 */
1899 case 'U': /* %U1234abcd hex 8 */
1900 {
1901 int i;
1902
1903 switch (c)
1904 {
1905 case 'd': i = getdecchrs(); break;
1906 case 'o': i = getoctchrs(); break;
1907 case 'x': i = gethexchrs(2); break;
1908 case 'u': i = gethexchrs(4); break;
1909 case 'U': i = gethexchrs(8); break;
1910 default: i = -1; break;
1911 }
1912
1913 if (i < 0)
1914 EMSG_M_RET_NULL(
1915 _("E678: Invalid character after %s%%[dxouU]"),
1916 reg_magic == MAGIC_ALL);
1917 ret = regnode(EXACTLY);
1918 if (i == 0)
1919 regc(0x0a);
1920 else
1921#ifdef FEAT_MBYTE
1922 regmbc(i);
1923#else
1924 regc(i);
1925#endif
1926 regc(NUL);
1927 *flagp |= HASWIDTH;
1928 break;
1929 }
1930
Bram Moolenaar071d4272004-06-13 20:20:40 +00001931 default:
1932 if (VIM_ISDIGIT(c) || c == '<' || c == '>')
1933 {
1934 long_u n = 0;
1935 int cmp;
1936
1937 cmp = c;
1938 if (cmp == '<' || cmp == '>')
1939 c = getchr();
1940 while (VIM_ISDIGIT(c))
1941 {
1942 n = n * 10 + (c - '0');
1943 c = getchr();
1944 }
1945 if (c == 'l' || c == 'c' || c == 'v')
1946 {
1947 if (c == 'l')
1948 ret = regnode(RE_LNUM);
1949 else if (c == 'c')
1950 ret = regnode(RE_COL);
1951 else
1952 ret = regnode(RE_VCOL);
1953 if (ret == JUST_CALC_SIZE)
1954 regsize += 5;
1955 else
1956 {
1957 /* put the number and the optional
1958 * comparator after the opcode */
1959 regcode = re_put_long(regcode, n);
1960 *regcode++ = cmp;
1961 }
1962 break;
1963 }
1964 }
1965
1966 EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
1967 reg_magic == MAGIC_ALL);
1968 }
1969 }
1970 break;
1971
1972 case Magic('['):
1973collection:
1974 {
1975 char_u *lp;
1976
1977 /*
1978 * If there is no matching ']', we assume the '[' is a normal
1979 * character. This makes 'incsearch' and ":help [" work.
1980 */
1981 lp = skip_anyof(regparse);
1982 if (*lp == ']') /* there is a matching ']' */
1983 {
1984 int startc = -1; /* > 0 when next '-' is a range */
1985 int endc;
1986
1987 /*
1988 * In a character class, different parsing rules apply.
1989 * Not even \ is special anymore, nothing is.
1990 */
1991 if (*regparse == '^') /* Complement of range. */
1992 {
1993 ret = regnode(ANYBUT + extra);
1994 regparse++;
1995 }
1996 else
1997 ret = regnode(ANYOF + extra);
1998
1999 /* At the start ']' and '-' mean the literal character. */
2000 if (*regparse == ']' || *regparse == '-')
Bram Moolenaardf177f62005-02-22 08:39:57 +00002001 {
2002 startc = *regparse;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002003 regc(*regparse++);
Bram Moolenaardf177f62005-02-22 08:39:57 +00002004 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002005
2006 while (*regparse != NUL && *regparse != ']')
2007 {
2008 if (*regparse == '-')
2009 {
2010 ++regparse;
2011 /* The '-' is not used for a range at the end and
2012 * after or before a '\n'. */
2013 if (*regparse == ']' || *regparse == NUL
2014 || startc == -1
2015 || (regparse[0] == '\\' && regparse[1] == 'n'))
2016 {
2017 regc('-');
2018 startc = '-'; /* [--x] is a range */
2019 }
2020 else
2021 {
Bram Moolenaardf177f62005-02-22 08:39:57 +00002022 /* Also accept "a-[.z.]" */
2023 endc = 0;
2024 if (*regparse == '[')
2025 endc = get_coll_element(&regparse);
2026 if (endc == 0)
2027 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00002028#ifdef FEAT_MBYTE
Bram Moolenaardf177f62005-02-22 08:39:57 +00002029 if (has_mbyte)
2030 endc = mb_ptr2char_adv(&regparse);
2031 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00002032#endif
Bram Moolenaardf177f62005-02-22 08:39:57 +00002033 endc = *regparse++;
2034 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002035
2036 /* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002037 if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002038 endc = coll_get_char();
2039
Bram Moolenaar071d4272004-06-13 20:20:40 +00002040 if (startc > endc)
2041 EMSG_RET_NULL(_(e_invrange));
2042#ifdef FEAT_MBYTE
2043 if (has_mbyte && ((*mb_char2len)(startc) > 1
2044 || (*mb_char2len)(endc) > 1))
2045 {
2046 /* Limit to a range of 256 chars */
2047 if (endc > startc + 256)
2048 EMSG_RET_NULL(_(e_invrange));
2049 while (++startc <= endc)
2050 regmbc(startc);
2051 }
2052 else
2053#endif
2054 {
2055#ifdef EBCDIC
2056 int alpha_only = FALSE;
2057
2058 /* for alphabetical range skip the gaps
2059 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
2060 if (isalpha(startc) && isalpha(endc))
2061 alpha_only = TRUE;
2062#endif
2063 while (++startc <= endc)
2064#ifdef EBCDIC
2065 if (!alpha_only || isalpha(startc))
2066#endif
2067 regc(startc);
2068 }
2069 startc = -1;
2070 }
2071 }
2072 /*
2073 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
2074 * accepts "\t", "\e", etc., but only when the 'l' flag in
2075 * 'cpoptions' is not included.
Bram Moolenaardf177f62005-02-22 08:39:57 +00002076 * Posix doesn't recognize backslash at all.
Bram Moolenaar071d4272004-06-13 20:20:40 +00002077 */
2078 else if (*regparse == '\\'
Bram Moolenaardf177f62005-02-22 08:39:57 +00002079 && !cpo_bsl
Bram Moolenaar071d4272004-06-13 20:20:40 +00002080 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2081 || (!cpo_lit
2082 && vim_strchr(REGEXP_ABBR,
2083 regparse[1]) != NULL)))
2084 {
2085 regparse++;
2086 if (*regparse == 'n')
2087 {
2088 /* '\n' in range: also match NL */
2089 if (ret != JUST_CALC_SIZE)
2090 {
2091 if (*ret == ANYBUT)
2092 *ret = ANYBUT + ADD_NL;
2093 else if (*ret == ANYOF)
2094 *ret = ANYOF + ADD_NL;
2095 /* else: must have had a \n already */
2096 }
2097 *flagp |= HASNL;
2098 regparse++;
2099 startc = -1;
2100 }
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002101 else if (*regparse == 'd'
2102 || *regparse == 'o'
2103 || *regparse == 'x'
2104 || *regparse == 'u'
2105 || *regparse == 'U')
2106 {
2107 startc = coll_get_char();
2108 if (startc == 0)
2109 regc(0x0a);
2110 else
2111#ifdef FEAT_MBYTE
2112 regmbc(startc);
2113#else
2114 regc(startc);
2115#endif
2116 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002117 else
2118 {
2119 startc = backslash_trans(*regparse++);
2120 regc(startc);
2121 }
2122 }
2123 else if (*regparse == '[')
2124 {
2125 int c_class;
2126 int cu;
2127
Bram Moolenaardf177f62005-02-22 08:39:57 +00002128 c_class = get_char_class(&regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002129 startc = -1;
2130 /* Characters assumed to be 8 bits! */
2131 switch (c_class)
2132 {
2133 case CLASS_NONE:
Bram Moolenaardf177f62005-02-22 08:39:57 +00002134 c_class = get_equi_class(&regparse);
2135 if (c_class != 0)
2136 {
2137 /* produce equivalence class */
2138 reg_equi_class(c_class);
2139 }
2140 else if ((c_class =
2141 get_coll_element(&regparse)) != 0)
2142 {
2143 /* produce a collating element */
2144 regmbc(c_class);
2145 }
2146 else
2147 {
2148 /* literal '[', allow [[-x] as a range */
2149 startc = *regparse++;
2150 regc(startc);
2151 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00002152 break;
2153 case CLASS_ALNUM:
2154 for (cu = 1; cu <= 255; cu++)
2155 if (isalnum(cu))
2156 regc(cu);
2157 break;
2158 case CLASS_ALPHA:
2159 for (cu = 1; cu <= 255; cu++)
2160 if (isalpha(cu))
2161 regc(cu);
2162 break;
2163 case CLASS_BLANK:
2164 regc(' ');
2165 regc('\t');
2166 break;
2167 case CLASS_CNTRL:
2168 for (cu = 1; cu <= 255; cu++)
2169 if (iscntrl(cu))
2170 regc(cu);
2171 break;
2172 case CLASS_DIGIT:
2173 for (cu = 1; cu <= 255; cu++)
2174 if (VIM_ISDIGIT(cu))
2175 regc(cu);
2176 break;
2177 case CLASS_GRAPH:
2178 for (cu = 1; cu <= 255; cu++)
2179 if (isgraph(cu))
2180 regc(cu);
2181 break;
2182 case CLASS_LOWER:
2183 for (cu = 1; cu <= 255; cu++)
2184 if (islower(cu))
2185 regc(cu);
2186 break;
2187 case CLASS_PRINT:
2188 for (cu = 1; cu <= 255; cu++)
2189 if (vim_isprintc(cu))
2190 regc(cu);
2191 break;
2192 case CLASS_PUNCT:
2193 for (cu = 1; cu <= 255; cu++)
2194 if (ispunct(cu))
2195 regc(cu);
2196 break;
2197 case CLASS_SPACE:
2198 for (cu = 9; cu <= 13; cu++)
2199 regc(cu);
2200 regc(' ');
2201 break;
2202 case CLASS_UPPER:
2203 for (cu = 1; cu <= 255; cu++)
2204 if (isupper(cu))
2205 regc(cu);
2206 break;
2207 case CLASS_XDIGIT:
2208 for (cu = 1; cu <= 255; cu++)
2209 if (vim_isxdigit(cu))
2210 regc(cu);
2211 break;
2212 case CLASS_TAB:
2213 regc('\t');
2214 break;
2215 case CLASS_RETURN:
2216 regc('\r');
2217 break;
2218 case CLASS_BACKSPACE:
2219 regc('\b');
2220 break;
2221 case CLASS_ESCAPE:
2222 regc('\033');
2223 break;
2224 }
2225 }
2226 else
2227 {
2228#ifdef FEAT_MBYTE
2229 if (has_mbyte)
2230 {
2231 int len;
2232
2233 /* produce a multibyte character, including any
2234 * following composing characters */
2235 startc = mb_ptr2char(regparse);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002236 len = (*mb_ptr2len)(regparse);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002237 if (enc_utf8 && utf_char2len(startc) != len)
2238 startc = -1; /* composing chars */
2239 while (--len >= 0)
2240 regc(*regparse++);
2241 }
2242 else
2243#endif
2244 {
2245 startc = *regparse++;
2246 regc(startc);
2247 }
2248 }
2249 }
2250 regc(NUL);
2251 prevchr_len = 1; /* last char was the ']' */
2252 if (*regparse != ']')
2253 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
2254 skipchr(); /* let's be friends with the lexer again */
2255 *flagp |= HASWIDTH | SIMPLE;
2256 break;
2257 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002258 else if (reg_strict)
2259 EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
2260 reg_magic > MAGIC_OFF);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002261 }
2262 /* FALLTHROUGH */
2263
2264 default:
2265 {
2266 int len;
2267
2268#ifdef FEAT_MBYTE
2269 /* A multi-byte character is handled as a separate atom if it's
2270 * before a multi. */
2271 if (has_mbyte && (*mb_char2len)(c) > 1
2272 && re_multi_type(peekchr()) != NOT_MULTI)
2273 {
2274 ret = regnode(MULTIBYTECODE);
2275 regmbc(c);
2276 *flagp |= HASWIDTH | SIMPLE;
2277 break;
2278 }
2279#endif
2280
2281 ret = regnode(EXACTLY);
2282
2283 /*
2284 * Append characters as long as:
2285 * - there is no following multi, we then need the character in
2286 * front of it as a single character operand
2287 * - not running into a Magic character
2288 * - "one_exactly" is not set
2289 * But always emit at least one character. Might be a Multi,
2290 * e.g., a "[" without matching "]".
2291 */
2292 for (len = 0; c != NUL && (len == 0
2293 || (re_multi_type(peekchr()) == NOT_MULTI
2294 && !one_exactly
2295 && !is_Magic(c))); ++len)
2296 {
2297 c = no_Magic(c);
2298#ifdef FEAT_MBYTE
2299 if (has_mbyte)
2300 {
2301 regmbc(c);
2302 if (enc_utf8)
2303 {
2304 int off;
2305 int l;
2306
2307 /* Need to get composing character too, directly
2308 * access regparse for that, because skipchr() skips
2309 * over composing chars. */
2310 ungetchr();
2311 if (*regparse == '\\' && regparse[1] != NUL)
2312 off = 1;
2313 else
2314 off = 0;
2315 for (;;)
2316 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002317 l = utf_ptr2len(regparse + off);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002318 if (!UTF_COMPOSINGLIKE(regparse + off,
2319 regparse + off + l))
2320 break;
2321 off += l;
2322 regmbc(utf_ptr2char(regparse + off));
2323 }
2324 skipchr();
2325 }
2326 }
2327 else
2328#endif
2329 regc(c);
2330 c = getchr();
2331 }
2332 ungetchr();
2333
2334 regc(NUL);
2335 *flagp |= HASWIDTH;
2336 if (len == 1)
2337 *flagp |= SIMPLE;
2338 }
2339 break;
2340 }
2341
2342 return ret;
2343}
2344
2345/*
2346 * emit a node
2347 * Return pointer to generated code.
2348 */
2349 static char_u *
2350regnode(op)
2351 int op;
2352{
2353 char_u *ret;
2354
2355 ret = regcode;
2356 if (ret == JUST_CALC_SIZE)
2357 regsize += 3;
2358 else
2359 {
2360 *regcode++ = op;
2361 *regcode++ = NUL; /* Null "next" pointer. */
2362 *regcode++ = NUL;
2363 }
2364 return ret;
2365}
2366
2367/*
2368 * Emit (if appropriate) a byte of code
2369 */
2370 static void
2371regc(b)
2372 int b;
2373{
2374 if (regcode == JUST_CALC_SIZE)
2375 regsize++;
2376 else
2377 *regcode++ = b;
2378}
2379
2380#ifdef FEAT_MBYTE
2381/*
2382 * Emit (if appropriate) a multi-byte character of code
2383 */
2384 static void
2385regmbc(c)
2386 int c;
2387{
2388 if (regcode == JUST_CALC_SIZE)
2389 regsize += (*mb_char2len)(c);
2390 else
2391 regcode += (*mb_char2bytes)(c, regcode);
2392}
2393#endif
2394
2395/*
2396 * reginsert - insert an operator in front of already-emitted operand
2397 *
2398 * Means relocating the operand.
2399 */
2400 static void
2401reginsert(op, opnd)
2402 int op;
2403 char_u *opnd;
2404{
2405 char_u *src;
2406 char_u *dst;
2407 char_u *place;
2408
2409 if (regcode == JUST_CALC_SIZE)
2410 {
2411 regsize += 3;
2412 return;
2413 }
2414 src = regcode;
2415 regcode += 3;
2416 dst = regcode;
2417 while (src > opnd)
2418 *--dst = *--src;
2419
2420 place = opnd; /* Op node, where operand used to be. */
2421 *place++ = op;
2422 *place++ = NUL;
2423 *place = NUL;
2424}
2425
2426/*
2427 * reginsert_limits - insert an operator in front of already-emitted operand.
2428 * The operator has the given limit values as operands. Also set next pointer.
2429 *
2430 * Means relocating the operand.
2431 */
2432 static void
2433reginsert_limits(op, minval, maxval, opnd)
2434 int op;
2435 long minval;
2436 long maxval;
2437 char_u *opnd;
2438{
2439 char_u *src;
2440 char_u *dst;
2441 char_u *place;
2442
2443 if (regcode == JUST_CALC_SIZE)
2444 {
2445 regsize += 11;
2446 return;
2447 }
2448 src = regcode;
2449 regcode += 11;
2450 dst = regcode;
2451 while (src > opnd)
2452 *--dst = *--src;
2453
2454 place = opnd; /* Op node, where operand used to be. */
2455 *place++ = op;
2456 *place++ = NUL;
2457 *place++ = NUL;
2458 place = re_put_long(place, (long_u)minval);
2459 place = re_put_long(place, (long_u)maxval);
2460 regtail(opnd, place);
2461}
2462
2463/*
2464 * Write a long as four bytes at "p" and return pointer to the next char.
2465 */
2466 static char_u *
2467re_put_long(p, val)
2468 char_u *p;
2469 long_u val;
2470{
2471 *p++ = (char_u) ((val >> 24) & 0377);
2472 *p++ = (char_u) ((val >> 16) & 0377);
2473 *p++ = (char_u) ((val >> 8) & 0377);
2474 *p++ = (char_u) (val & 0377);
2475 return p;
2476}
2477
2478/*
2479 * regtail - set the next-pointer at the end of a node chain
2480 */
2481 static void
2482regtail(p, val)
2483 char_u *p;
2484 char_u *val;
2485{
2486 char_u *scan;
2487 char_u *temp;
2488 int offset;
2489
2490 if (p == JUST_CALC_SIZE)
2491 return;
2492
2493 /* Find last node. */
2494 scan = p;
2495 for (;;)
2496 {
2497 temp = regnext(scan);
2498 if (temp == NULL)
2499 break;
2500 scan = temp;
2501 }
2502
Bram Moolenaar582fd852005-03-28 20:58:01 +00002503 if (OP(scan) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00002504 offset = (int)(scan - val);
2505 else
2506 offset = (int)(val - scan);
2507 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2508 *(scan + 2) = (char_u) (offset & 0377);
2509}
2510
2511/*
2512 * regoptail - regtail on item after a BRANCH; nop if none
2513 */
2514 static void
2515regoptail(p, val)
2516 char_u *p;
2517 char_u *val;
2518{
2519 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2520 if (p == NULL || p == JUST_CALC_SIZE
2521 || (OP(p) != BRANCH
2522 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2523 return;
2524 regtail(OPERAND(p), val);
2525}
2526
2527/*
2528 * getchr() - get the next character from the pattern. We know about
2529 * magic and such, so therefore we need a lexical analyzer.
2530 */
2531
2532/* static int curchr; */
2533static int prevprevchr;
2534static int prevchr;
2535static int nextchr; /* used for ungetchr() */
2536/*
2537 * Note: prevchr is sometimes -1 when we are not at the start,
2538 * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2539 * taken to be magic -- webb
2540 */
2541static int at_start; /* True when on the first character */
2542static int prev_at_start; /* True when on the second character */
2543
2544 static void
2545initchr(str)
2546 char_u *str;
2547{
2548 regparse = str;
2549 prevchr_len = 0;
2550 curchr = prevprevchr = prevchr = nextchr = -1;
2551 at_start = TRUE;
2552 prev_at_start = FALSE;
2553}
2554
2555 static int
2556peekchr()
2557{
Bram Moolenaardf177f62005-02-22 08:39:57 +00002558 static int after_slash = FALSE;
2559
Bram Moolenaar071d4272004-06-13 20:20:40 +00002560 if (curchr == -1)
2561 {
2562 switch (curchr = regparse[0])
2563 {
2564 case '.':
2565 case '[':
2566 case '~':
2567 /* magic when 'magic' is on */
2568 if (reg_magic >= MAGIC_ON)
2569 curchr = Magic(curchr);
2570 break;
2571 case '(':
2572 case ')':
2573 case '{':
2574 case '%':
2575 case '+':
2576 case '=':
2577 case '?':
2578 case '@':
2579 case '!':
2580 case '&':
2581 case '|':
2582 case '<':
2583 case '>':
2584 case '#': /* future ext. */
2585 case '"': /* future ext. */
2586 case '\'': /* future ext. */
2587 case ',': /* future ext. */
2588 case '-': /* future ext. */
2589 case ':': /* future ext. */
2590 case ';': /* future ext. */
2591 case '`': /* future ext. */
2592 case '/': /* Can't be used in / command */
2593 /* magic only after "\v" */
2594 if (reg_magic == MAGIC_ALL)
2595 curchr = Magic(curchr);
2596 break;
2597 case '*':
Bram Moolenaardf177f62005-02-22 08:39:57 +00002598 /* * is not magic as the very first character, eg "?*ptr", when
2599 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
2600 * "\(\*" is not magic, thus must be magic if "after_slash" */
2601 if (reg_magic >= MAGIC_ON
2602 && !at_start
2603 && !(prev_at_start && prevchr == Magic('^'))
2604 && (after_slash
2605 || (prevchr != Magic('(')
2606 && prevchr != Magic('&')
2607 && prevchr != Magic('|'))))
Bram Moolenaar071d4272004-06-13 20:20:40 +00002608 curchr = Magic('*');
2609 break;
2610 case '^':
2611 /* '^' is only magic as the very first character and if it's after
2612 * "\(", "\|", "\&' or "\n" */
2613 if (reg_magic >= MAGIC_OFF
2614 && (at_start
2615 || reg_magic == MAGIC_ALL
2616 || prevchr == Magic('(')
2617 || prevchr == Magic('|')
2618 || prevchr == Magic('&')
2619 || prevchr == Magic('n')
2620 || (no_Magic(prevchr) == '('
2621 && prevprevchr == Magic('%'))))
2622 {
2623 curchr = Magic('^');
2624 at_start = TRUE;
2625 prev_at_start = FALSE;
2626 }
2627 break;
2628 case '$':
2629 /* '$' is only magic as the very last char and if it's in front of
2630 * either "\|", "\)", "\&", or "\n" */
2631 if (reg_magic >= MAGIC_OFF)
2632 {
2633 char_u *p = regparse + 1;
2634
2635 /* ignore \c \C \m and \M after '$' */
2636 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2637 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2638 p += 2;
2639 if (p[0] == NUL
2640 || (p[0] == '\\'
2641 && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2642 || p[1] == 'n'))
2643 || reg_magic == MAGIC_ALL)
2644 curchr = Magic('$');
2645 }
2646 break;
2647 case '\\':
2648 {
2649 int c = regparse[1];
2650
2651 if (c == NUL)
2652 curchr = '\\'; /* trailing '\' */
2653 else if (
2654#ifdef EBCDIC
2655 vim_strchr(META, c)
2656#else
2657 c <= '~' && META_flags[c]
2658#endif
2659 )
2660 {
2661 /*
2662 * META contains everything that may be magic sometimes,
2663 * except ^ and $ ("\^" and "\$" are only magic after
2664 * "\v"). We now fetch the next character and toggle its
2665 * magicness. Therefore, \ is so meta-magic that it is
2666 * not in META.
2667 */
2668 curchr = -1;
2669 prev_at_start = at_start;
2670 at_start = FALSE; /* be able to say "/\*ptr" */
2671 ++regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00002672 ++after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002673 peekchr();
2674 --regparse;
Bram Moolenaardf177f62005-02-22 08:39:57 +00002675 --after_slash;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002676 curchr = toggle_Magic(curchr);
2677 }
2678 else if (vim_strchr(REGEXP_ABBR, c))
2679 {
2680 /*
2681 * Handle abbreviations, like "\t" for TAB -- webb
2682 */
2683 curchr = backslash_trans(c);
2684 }
2685 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2686 curchr = toggle_Magic(c);
2687 else
2688 {
2689 /*
2690 * Next character can never be (made) magic?
2691 * Then backslashing it won't do anything.
2692 */
2693#ifdef FEAT_MBYTE
2694 if (has_mbyte)
2695 curchr = (*mb_ptr2char)(regparse + 1);
2696 else
2697#endif
2698 curchr = c;
2699 }
2700 break;
2701 }
2702
2703#ifdef FEAT_MBYTE
2704 default:
2705 if (has_mbyte)
2706 curchr = (*mb_ptr2char)(regparse);
2707#endif
2708 }
2709 }
2710
2711 return curchr;
2712}
2713
2714/*
2715 * Eat one lexed character. Do this in a way that we can undo it.
2716 */
2717 static void
2718skipchr()
2719{
2720 /* peekchr() eats a backslash, do the same here */
2721 if (*regparse == '\\')
2722 prevchr_len = 1;
2723 else
2724 prevchr_len = 0;
2725 if (regparse[prevchr_len] != NUL)
2726 {
2727#ifdef FEAT_MBYTE
2728 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002729 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00002730 else
2731#endif
2732 ++prevchr_len;
2733 }
2734 regparse += prevchr_len;
2735 prev_at_start = at_start;
2736 at_start = FALSE;
2737 prevprevchr = prevchr;
2738 prevchr = curchr;
2739 curchr = nextchr; /* use previously unget char, or -1 */
2740 nextchr = -1;
2741}
2742
2743/*
2744 * Skip a character while keeping the value of prev_at_start for at_start.
2745 * prevchr and prevprevchr are also kept.
2746 */
2747 static void
2748skipchr_keepstart()
2749{
2750 int as = prev_at_start;
2751 int pr = prevchr;
2752 int prpr = prevprevchr;
2753
2754 skipchr();
2755 at_start = as;
2756 prevchr = pr;
2757 prevprevchr = prpr;
2758}
2759
2760 static int
2761getchr()
2762{
2763 int chr = peekchr();
2764
2765 skipchr();
2766 return chr;
2767}
2768
2769/*
2770 * put character back. Works only once!
2771 */
2772 static void
2773ungetchr()
2774{
2775 nextchr = curchr;
2776 curchr = prevchr;
2777 prevchr = prevprevchr;
2778 at_start = prev_at_start;
2779 prev_at_start = FALSE;
2780
2781 /* Backup regparse, so that it's at the same position as before the
2782 * getchr(). */
2783 regparse -= prevchr_len;
2784}
2785
2786/*
Bram Moolenaar7b0294c2004-10-11 10:16:09 +00002787 * Get and return the value of the hex string at the current position.
2788 * Return -1 if there is no valid hex number.
2789 * The position is updated:
Bram Moolenaarc0197e22004-09-13 20:26:32 +00002790 * blahblah\%x20asdf
2791 * before-^ ^-after
2792 * The parameter controls the maximum number of input characters. This will be
2793 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2794 */
2795 static int
2796gethexchrs(maxinputlen)
2797 int maxinputlen;
2798{
2799 int nr = 0;
2800 int c;
2801 int i;
2802
2803 for (i = 0; i < maxinputlen; ++i)
2804 {
2805 c = regparse[0];
2806 if (!vim_isxdigit(c))
2807 break;
2808 nr <<= 4;
2809 nr |= hex2nr(c);
2810 ++regparse;
2811 }
2812
2813 if (i == 0)
2814 return -1;
2815 return nr;
2816}
2817
2818/*
2819 * get and return the value of the decimal string immediately after the
2820 * current position. Return -1 for invalid. Consumes all digits.
2821 */
2822 static int
2823getdecchrs()
2824{
2825 int nr = 0;
2826 int c;
2827 int i;
2828
2829 for (i = 0; ; ++i)
2830 {
2831 c = regparse[0];
2832 if (c < '0' || c > '9')
2833 break;
2834 nr *= 10;
2835 nr += c - '0';
2836 ++regparse;
2837 }
2838
2839 if (i == 0)
2840 return -1;
2841 return nr;
2842}
2843
2844/*
2845 * get and return the value of the octal string immediately after the current
2846 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2847 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2848 * treat 8 or 9 as recognised characters. Position is updated:
2849 * blahblah\%o210asdf
2850 * before-^ ^-after
2851 */
2852 static int
2853getoctchrs()
2854{
2855 int nr = 0;
2856 int c;
2857 int i;
2858
2859 for (i = 0; i < 3 && nr < 040; ++i)
2860 {
2861 c = regparse[0];
2862 if (c < '0' || c > '7')
2863 break;
2864 nr <<= 3;
2865 nr |= hex2nr(c);
2866 ++regparse;
2867 }
2868
2869 if (i == 0)
2870 return -1;
2871 return nr;
2872}
2873
2874/*
2875 * Get a number after a backslash that is inside [].
2876 * When nothing is recognized return a backslash.
2877 */
2878 static int
2879coll_get_char()
2880{
2881 int nr = -1;
2882
2883 switch (*regparse++)
2884 {
2885 case 'd': nr = getdecchrs(); break;
2886 case 'o': nr = getoctchrs(); break;
2887 case 'x': nr = gethexchrs(2); break;
2888 case 'u': nr = gethexchrs(4); break;
2889 case 'U': nr = gethexchrs(8); break;
2890 }
2891 if (nr < 0)
2892 {
2893 /* If getting the number fails be backwards compatible: the character
2894 * is a backslash. */
2895 --regparse;
2896 nr = '\\';
2897 }
2898 return nr;
2899}
2900
2901/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00002902 * read_limits - Read two integers to be taken as a minimum and maximum.
2903 * If the first character is '-', then the range is reversed.
2904 * Should end with 'end'. If minval is missing, zero is default, if maxval is
2905 * missing, a very big number is the default.
2906 */
2907 static int
2908read_limits(minval, maxval)
2909 long *minval;
2910 long *maxval;
2911{
2912 int reverse = FALSE;
2913 char_u *first_char;
2914 long tmp;
2915
2916 if (*regparse == '-')
2917 {
2918 /* Starts with '-', so reverse the range later */
2919 regparse++;
2920 reverse = TRUE;
2921 }
2922 first_char = regparse;
2923 *minval = getdigits(&regparse);
2924 if (*regparse == ',') /* There is a comma */
2925 {
2926 if (vim_isdigit(*++regparse))
2927 *maxval = getdigits(&regparse);
2928 else
2929 *maxval = MAX_LIMIT;
2930 }
2931 else if (VIM_ISDIGIT(*first_char))
2932 *maxval = *minval; /* It was \{n} or \{-n} */
2933 else
2934 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
2935 if (*regparse == '\\')
2936 regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaardf177f62005-02-22 08:39:57 +00002937 if (*regparse != '}')
Bram Moolenaar071d4272004-06-13 20:20:40 +00002938 {
2939 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2940 reg_magic == MAGIC_ALL ? "" : "\\");
2941 EMSG_RET_FAIL(IObuff);
2942 }
2943
2944 /*
2945 * Reverse the range if there was a '-', or make sure it is in the right
2946 * order otherwise.
2947 */
2948 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
2949 {
2950 tmp = *minval;
2951 *minval = *maxval;
2952 *maxval = tmp;
2953 }
2954 skipchr(); /* let's be friends with the lexer again */
2955 return OK;
2956}
2957
2958/*
2959 * vim_regexec and friends
2960 */
2961
2962/*
2963 * Global work variables for vim_regexec().
2964 */
2965
2966/* The current match-position is remembered with these variables: */
2967static linenr_T reglnum; /* line number, relative to first line */
2968static char_u *regline; /* start of current line */
2969static char_u *reginput; /* current input, points into "regline" */
2970
2971static int need_clear_subexpr; /* subexpressions still need to be
2972 * cleared */
2973#ifdef FEAT_SYN_HL
2974static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
2975 * still need to be cleared */
2976#endif
2977
Bram Moolenaar071d4272004-06-13 20:20:40 +00002978/*
2979 * Structure used to save the current input state, when it needs to be
2980 * restored after trying a match. Used by reg_save() and reg_restore().
Bram Moolenaar582fd852005-03-28 20:58:01 +00002981 * Also stores the length of "backpos".
Bram Moolenaar071d4272004-06-13 20:20:40 +00002982 */
2983typedef struct
2984{
2985 union
2986 {
2987 char_u *ptr; /* reginput pointer, for single-line regexp */
2988 lpos_T pos; /* reginput pos, for multi-line regexp */
2989 } rs_u;
Bram Moolenaar582fd852005-03-28 20:58:01 +00002990 int rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00002991} regsave_T;
2992
2993/* struct to save start/end pointer/position in for \(\) */
2994typedef struct
2995{
2996 union
2997 {
2998 char_u *ptr;
2999 lpos_T pos;
3000 } se_u;
3001} save_se_T;
3002
3003static char_u *reg_getline __ARGS((linenr_T lnum));
3004static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
3005static long regtry __ARGS((regprog_T *prog, colnr_T col));
3006static void cleanup_subexpr __ARGS((void));
3007#ifdef FEAT_SYN_HL
3008static void cleanup_zsubexpr __ARGS((void));
3009#endif
3010static void reg_nextline __ARGS((void));
Bram Moolenaar582fd852005-03-28 20:58:01 +00003011static void reg_save __ARGS((regsave_T *save, garray_T *gap));
3012static void reg_restore __ARGS((regsave_T *save, garray_T *gap));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003013static int reg_save_equal __ARGS((regsave_T *save));
3014static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3015static void save_se_one __ARGS((save_se_T *savep, char_u **pp));
3016
3017/* Save the sub-expressions before attempting a match. */
3018#define save_se(savep, posp, pp) \
3019 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3020
3021/* After a failed match restore the sub-expressions. */
3022#define restore_se(savep, posp, pp) { \
3023 if (REG_MULTI) \
3024 *(posp) = (savep)->se_u.pos; \
3025 else \
3026 *(pp) = (savep)->se_u.ptr; }
3027
3028static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003029static int regmatch __ARGS((char_u *prog));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003030static int regrepeat __ARGS((char_u *p, long maxcount));
3031
3032#ifdef DEBUG
3033int regnarrate = 0;
3034#endif
3035
3036/*
3037 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3038 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3039 * contains '\c' or '\C' the value is overruled.
3040 */
3041static int ireg_ic;
3042
3043#ifdef FEAT_MBYTE
3044/*
3045 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
3046 * in the regexp. Defaults to false, always.
3047 */
3048static int ireg_icombine;
3049#endif
3050
3051/*
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003052 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3053 * there is no maximum.
3054 */
Bram Moolenaarbbebc852005-07-18 21:47:53 +00003055static colnr_T ireg_maxcol;
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003056
3057/*
Bram Moolenaar071d4272004-06-13 20:20:40 +00003058 * Sometimes need to save a copy of a line. Since alloc()/free() is very
3059 * slow, we keep one allocated piece of memory and only re-allocate it when
3060 * it's too small. It's freed in vim_regexec_both() when finished.
3061 */
3062static char_u *reg_tofree;
3063static unsigned reg_tofreelen;
3064
3065/*
3066 * These variables are set when executing a regexp to speed up the execution.
3067 * Which ones are set depends on whethere a single-line or multi-line match is
3068 * done:
3069 * single-line multi-line
3070 * reg_match &regmatch_T NULL
3071 * reg_mmatch NULL &regmmatch_T
3072 * reg_startp reg_match->startp <invalid>
3073 * reg_endp reg_match->endp <invalid>
3074 * reg_startpos <invalid> reg_mmatch->startpos
3075 * reg_endpos <invalid> reg_mmatch->endpos
3076 * reg_win NULL window in which to search
3077 * reg_buf <invalid> buffer in which to search
3078 * reg_firstlnum <invalid> first line in which to search
3079 * reg_maxline 0 last line nr
3080 * reg_line_lbr FALSE or TRUE FALSE
3081 */
3082static regmatch_T *reg_match;
3083static regmmatch_T *reg_mmatch;
3084static char_u **reg_startp = NULL;
3085static char_u **reg_endp = NULL;
3086static lpos_T *reg_startpos = NULL;
3087static lpos_T *reg_endpos = NULL;
3088static win_T *reg_win;
3089static buf_T *reg_buf;
3090static linenr_T reg_firstlnum;
3091static linenr_T reg_maxline;
3092static int reg_line_lbr; /* "\n" in string is line break */
3093
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003094/* Values for rs_state in regitem_T. */
3095typedef enum regstate_E
3096{
3097 RS_NOPEN = 0 /* NOPEN and NCLOSE */
3098 , RS_MOPEN /* MOPEN + [0-9] */
3099 , RS_MCLOSE /* MCLOSE + [0-9] */
3100#ifdef FEAT_SYN_HL
3101 , RS_ZOPEN /* ZOPEN + [0-9] */
3102 , RS_ZCLOSE /* ZCLOSE + [0-9] */
3103#endif
3104 , RS_BRANCH /* BRANCH */
3105 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
3106 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
3107 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
3108 , RS_NOMATCH /* NOMATCH */
3109 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
3110 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
3111 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
3112 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
3113} regstate_T;
3114
3115/*
3116 * When there are alternatives a regstate_T is put on the regstack to remember
3117 * what we are doing.
3118 * Before it may be another type of item, depending on rs_state, to remember
3119 * more things.
3120 */
3121typedef struct regitem_S
3122{
3123 regstate_T rs_state; /* what we are doing, one of RS_ above */
3124 char_u *rs_scan; /* current node in program */
3125 union
3126 {
3127 save_se_T sesave;
3128 regsave_T regsave;
3129 } rs_un; /* room for saving reginput */
3130 short rs_no; /* submatch nr */
3131} regitem_T;
3132
3133static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3134static void regstack_pop __ARGS((char_u **scan));
3135
3136/* used for BEHIND and NOBEHIND matching */
3137typedef struct regbehind_S
3138{
3139 regsave_T save_after;
3140 regsave_T save_behind;
3141} regbehind_T;
3142
3143/* used for STAR, PLUS and BRACE_SIMPLE matching */
3144typedef struct regstar_S
3145{
3146 int nextb; /* next byte */
3147 int nextb_ic; /* next byte reverse case */
3148 long count;
3149 long minval;
3150 long maxval;
3151} regstar_T;
3152
3153/* used to store input position when a BACK was encountered, so that we now if
3154 * we made any progress since the last time. */
3155typedef struct backpos_S
3156{
3157 char_u *bp_scan; /* "scan" where BACK was encountered */
3158 regsave_T bp_pos; /* last input position */
3159} backpos_T;
3160
3161/*
3162 * regstack and backpos are used by regmatch(). They are kept over calls to
3163 * avoid invoking malloc() and free() often.
3164 */
3165static garray_T regstack; /* stack with regitem_T items, sometimes
3166 preceded by regstar_T or regbehind_T. */
3167static garray_T backpos; /* table with backpos_T for BACK */
3168
Bram Moolenaar071d4272004-06-13 20:20:40 +00003169/*
3170 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3171 */
3172 static char_u *
3173reg_getline(lnum)
3174 linenr_T lnum;
3175{
3176 /* when looking behind for a match/no-match lnum is negative. But we
3177 * can't go before line 1 */
3178 if (reg_firstlnum + lnum < 1)
3179 return NULL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00003180 if (lnum > reg_maxline)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003181 /* Must have matched the "\n" in the last line. */
3182 return (char_u *)"";
Bram Moolenaar071d4272004-06-13 20:20:40 +00003183 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3184}
3185
3186static regsave_T behind_pos;
3187
3188#ifdef FEAT_SYN_HL
3189static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */
3190static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */
3191static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3192static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
3193#endif
3194
3195/* TRUE if using multi-line regexp. */
3196#define REG_MULTI (reg_match == NULL)
3197
3198/*
3199 * Match a regexp against a string.
3200 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3201 * Uses curbuf for line count and 'iskeyword'.
3202 *
3203 * Return TRUE if there is a match, FALSE if not.
3204 */
3205 int
3206vim_regexec(rmp, line, col)
3207 regmatch_T *rmp;
3208 char_u *line; /* string to match against */
3209 colnr_T col; /* column to start looking for match */
3210{
3211 reg_match = rmp;
3212 reg_mmatch = NULL;
3213 reg_maxline = 0;
3214 reg_line_lbr = FALSE;
3215 reg_win = NULL;
3216 ireg_ic = rmp->rm_ic;
3217#ifdef FEAT_MBYTE
3218 ireg_icombine = FALSE;
3219#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003220 ireg_maxcol = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003221 return (vim_regexec_both(line, col) != 0);
3222}
3223
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003224#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3225 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003226/*
3227 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3228 */
3229 int
3230vim_regexec_nl(rmp, line, col)
3231 regmatch_T *rmp;
3232 char_u *line; /* string to match against */
3233 colnr_T col; /* column to start looking for match */
3234{
3235 reg_match = rmp;
3236 reg_mmatch = NULL;
3237 reg_maxline = 0;
3238 reg_line_lbr = TRUE;
3239 reg_win = NULL;
3240 ireg_ic = rmp->rm_ic;
3241#ifdef FEAT_MBYTE
3242 ireg_icombine = FALSE;
3243#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003244 ireg_maxcol = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003245 return (vim_regexec_both(line, col) != 0);
3246}
3247#endif
3248
3249/*
3250 * Match a regexp against multiple lines.
3251 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3252 * Uses curbuf for line count and 'iskeyword'.
3253 *
3254 * Return zero if there is no match. Return number of lines contained in the
3255 * match otherwise.
3256 */
3257 long
3258vim_regexec_multi(rmp, win, buf, lnum, col)
3259 regmmatch_T *rmp;
3260 win_T *win; /* window in which to search or NULL */
3261 buf_T *buf; /* buffer in which to search */
3262 linenr_T lnum; /* nr of line to start looking for match */
3263 colnr_T col; /* column to start looking for match */
3264{
3265 long r;
3266 buf_T *save_curbuf = curbuf;
3267
3268 reg_match = NULL;
3269 reg_mmatch = rmp;
3270 reg_buf = buf;
3271 reg_win = win;
3272 reg_firstlnum = lnum;
3273 reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3274 reg_line_lbr = FALSE;
3275 ireg_ic = rmp->rmm_ic;
3276#ifdef FEAT_MBYTE
3277 ireg_icombine = FALSE;
3278#endif
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003279 ireg_maxcol = rmp->rmm_maxcol;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003280
3281 /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3282 curbuf = buf;
3283 r = vim_regexec_both(NULL, col);
3284 curbuf = save_curbuf;
3285
3286 return r;
3287}
3288
3289/*
3290 * Match a regexp against a string ("line" points to the string) or multiple
3291 * lines ("line" is NULL, use reg_getline()).
3292 */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003293 static long
3294vim_regexec_both(line, col)
3295 char_u *line;
3296 colnr_T col; /* column to start looking for match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003297{
3298 regprog_T *prog;
3299 char_u *s;
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003300 long retval = 0L;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003301
3302 reg_tofree = NULL;
3303
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003304 /* Init the regstack empty. Use an item size of 1 byte, since we push
3305 * different things onto it. Use a large grow size to avoid reallocating
3306 * it too often. */
3307 ga_init2(&regstack, 1, 10000);
3308
3309 /* Init the backpos table empty. */
3310 ga_init2(&backpos, sizeof(backpos_T), 10);
3311
Bram Moolenaar071d4272004-06-13 20:20:40 +00003312 if (REG_MULTI)
3313 {
3314 prog = reg_mmatch->regprog;
3315 line = reg_getline((linenr_T)0);
3316 reg_startpos = reg_mmatch->startpos;
3317 reg_endpos = reg_mmatch->endpos;
3318 }
3319 else
3320 {
3321 prog = reg_match->regprog;
3322 reg_startp = reg_match->startp;
3323 reg_endp = reg_match->endp;
3324 }
3325
3326 /* Be paranoid... */
3327 if (prog == NULL || line == NULL)
3328 {
3329 EMSG(_(e_null));
3330 goto theend;
3331 }
3332
3333 /* Check validity of program. */
3334 if (prog_magic_wrong())
3335 goto theend;
3336
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003337 /* If the start column is past the maximum column: no need to try. */
3338 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3339 goto theend;
3340
Bram Moolenaar071d4272004-06-13 20:20:40 +00003341 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3342 if (prog->regflags & RF_ICASE)
3343 ireg_ic = TRUE;
3344 else if (prog->regflags & RF_NOICASE)
3345 ireg_ic = FALSE;
3346
3347#ifdef FEAT_MBYTE
3348 /* If pattern contains "\Z" overrule value of ireg_icombine */
3349 if (prog->regflags & RF_ICOMBINE)
3350 ireg_icombine = TRUE;
3351#endif
3352
3353 /* If there is a "must appear" string, look for it. */
3354 if (prog->regmust != NULL)
3355 {
3356 int c;
3357
3358#ifdef FEAT_MBYTE
3359 if (has_mbyte)
3360 c = (*mb_ptr2char)(prog->regmust);
3361 else
3362#endif
3363 c = *prog->regmust;
3364 s = line + col;
Bram Moolenaar05159a02005-02-26 23:04:13 +00003365
3366 /*
3367 * This is used very often, esp. for ":global". Use three versions of
3368 * the loop to avoid overhead of conditions.
3369 */
3370 if (!ireg_ic
3371#ifdef FEAT_MBYTE
3372 && !has_mbyte
3373#endif
3374 )
3375 while ((s = vim_strbyte(s, c)) != NULL)
3376 {
3377 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3378 break; /* Found it. */
3379 ++s;
3380 }
3381#ifdef FEAT_MBYTE
3382 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3383 while ((s = vim_strchr(s, c)) != NULL)
3384 {
3385 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3386 break; /* Found it. */
3387 mb_ptr_adv(s);
3388 }
3389#endif
3390 else
3391 while ((s = cstrchr(s, c)) != NULL)
3392 {
3393 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3394 break; /* Found it. */
3395 mb_ptr_adv(s);
3396 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003397 if (s == NULL) /* Not present. */
3398 goto theend;
3399 }
3400
3401 regline = line;
3402 reglnum = 0;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003403
3404 /* Simplest case: Anchored match need be tried only once. */
3405 if (prog->reganch)
3406 {
3407 int c;
3408
3409#ifdef FEAT_MBYTE
3410 if (has_mbyte)
3411 c = (*mb_ptr2char)(regline + col);
3412 else
3413#endif
3414 c = regline[col];
3415 if (prog->regstart == NUL
3416 || prog->regstart == c
3417 || (ireg_ic && ((
3418#ifdef FEAT_MBYTE
3419 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3420 || (c < 255 && prog->regstart < 255 &&
3421#endif
3422 TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
3423 retval = regtry(prog, col);
3424 else
3425 retval = 0;
3426 }
3427 else
3428 {
3429 /* Messy cases: unanchored match. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003430 while (!got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003431 {
3432 if (prog->regstart != NUL)
3433 {
Bram Moolenaar05159a02005-02-26 23:04:13 +00003434 /* Skip until the char we know it must start with.
3435 * Used often, do some work to avoid call overhead. */
3436 if (!ireg_ic
3437#ifdef FEAT_MBYTE
3438 && !has_mbyte
3439#endif
3440 )
3441 s = vim_strbyte(regline + col, prog->regstart);
3442 else
3443 s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003444 if (s == NULL)
3445 {
3446 retval = 0;
3447 break;
3448 }
3449 col = (int)(s - regline);
3450 }
3451
Bram Moolenaar3b56eb32005-07-11 22:40:32 +00003452 /* Check for maximum column to try. */
3453 if (ireg_maxcol > 0 && col >= ireg_maxcol)
3454 {
3455 retval = 0;
3456 break;
3457 }
3458
Bram Moolenaar071d4272004-06-13 20:20:40 +00003459 retval = regtry(prog, col);
3460 if (retval > 0)
3461 break;
3462
3463 /* if not currently on the first line, get it again */
3464 if (reglnum != 0)
3465 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003466 reglnum = 0;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003467 regline = reg_getline((linenr_T)0);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003468 }
3469 if (regline[col] == NUL)
3470 break;
3471#ifdef FEAT_MBYTE
3472 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003473 col += (*mb_ptr2len)(regline + col);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003474 else
3475#endif
3476 ++col;
3477 }
3478 }
3479
Bram Moolenaar071d4272004-06-13 20:20:40 +00003480theend:
Bram Moolenaar071d4272004-06-13 20:20:40 +00003481 vim_free(reg_tofree);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003482 ga_clear(&regstack);
3483 ga_clear(&backpos);
3484
Bram Moolenaar071d4272004-06-13 20:20:40 +00003485 return retval;
3486}
3487
3488#ifdef FEAT_SYN_HL
3489static reg_extmatch_T *make_extmatch __ARGS((void));
3490
3491/*
3492 * Create a new extmatch and mark it as referenced once.
3493 */
3494 static reg_extmatch_T *
3495make_extmatch()
3496{
3497 reg_extmatch_T *em;
3498
3499 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3500 if (em != NULL)
3501 em->refcnt = 1;
3502 return em;
3503}
3504
3505/*
3506 * Add a reference to an extmatch.
3507 */
3508 reg_extmatch_T *
3509ref_extmatch(em)
3510 reg_extmatch_T *em;
3511{
3512 if (em != NULL)
3513 em->refcnt++;
3514 return em;
3515}
3516
3517/*
3518 * Remove a reference to an extmatch. If there are no references left, free
3519 * the info.
3520 */
3521 void
3522unref_extmatch(em)
3523 reg_extmatch_T *em;
3524{
3525 int i;
3526
3527 if (em != NULL && --em->refcnt <= 0)
3528 {
3529 for (i = 0; i < NSUBEXP; ++i)
3530 vim_free(em->matches[i]);
3531 vim_free(em);
3532 }
3533}
3534#endif
3535
3536/*
3537 * regtry - try match of "prog" with at regline["col"].
3538 * Returns 0 for failure, number of lines contained in the match otherwise.
3539 */
3540 static long
3541regtry(prog, col)
3542 regprog_T *prog;
3543 colnr_T col;
3544{
3545 reginput = regline + col;
3546 need_clear_subexpr = TRUE;
3547#ifdef FEAT_SYN_HL
3548 /* Clear the external match subpointers if necessary. */
3549 if (prog->reghasz == REX_SET)
3550 need_clear_zsubexpr = TRUE;
3551#endif
3552
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003553 if (regmatch(prog->program + 1) == 0)
3554 return 0;
3555
3556 cleanup_subexpr();
3557 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003558 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003559 if (reg_startpos[0].lnum < 0)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003560 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003561 reg_startpos[0].lnum = 0;
3562 reg_startpos[0].col = col;
3563 }
3564 if (reg_endpos[0].lnum < 0)
3565 {
3566 reg_endpos[0].lnum = reglnum;
3567 reg_endpos[0].col = (int)(reginput - regline);
Bram Moolenaar071d4272004-06-13 20:20:40 +00003568 }
3569 else
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003570 /* Use line number of "\ze". */
3571 reglnum = reg_endpos[0].lnum;
3572 }
3573 else
3574 {
3575 if (reg_startp[0] == NULL)
3576 reg_startp[0] = regline + col;
3577 if (reg_endp[0] == NULL)
3578 reg_endp[0] = reginput;
3579 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003580#ifdef FEAT_SYN_HL
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003581 /* Package any found \z(...\) matches for export. Default is none. */
3582 unref_extmatch(re_extmatch_out);
3583 re_extmatch_out = NULL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003584
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003585 if (prog->reghasz == REX_SET)
3586 {
3587 int i;
3588
3589 cleanup_zsubexpr();
3590 re_extmatch_out = make_extmatch();
3591 for (i = 0; i < NSUBEXP; i++)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003592 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003593 if (REG_MULTI)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003594 {
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003595 /* Only accept single line matches. */
3596 if (reg_startzpos[i].lnum >= 0
3597 && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3598 re_extmatch_out->matches[i] =
3599 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003600 + reg_startzpos[i].col,
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003601 reg_endzpos[i].col - reg_startzpos[i].col);
3602 }
3603 else
3604 {
3605 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3606 re_extmatch_out->matches[i] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00003607 vim_strnsave(reg_startzp[i],
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003608 (int)(reg_endzp[i] - reg_startzp[i]));
Bram Moolenaar071d4272004-06-13 20:20:40 +00003609 }
3610 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003611 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00003612#endif
3613 return 1 + reglnum;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003614}
3615
3616#ifdef FEAT_MBYTE
Bram Moolenaar071d4272004-06-13 20:20:40 +00003617static int reg_prev_class __ARGS((void));
3618
Bram Moolenaar071d4272004-06-13 20:20:40 +00003619/*
3620 * Get class of previous character.
3621 */
3622 static int
3623reg_prev_class()
3624{
3625 if (reginput > regline)
3626 return mb_get_class(reginput - 1
3627 - (*mb_head_off)(regline, reginput - 1));
3628 return -1;
3629}
3630
Bram Moolenaar071d4272004-06-13 20:20:40 +00003631#endif
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00003632#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003633
3634/*
3635 * The arguments from BRACE_LIMITS are stored here. They are actually local
3636 * to regmatch(), but they are here to reduce the amount of stack space used
3637 * (it can be called recursively many times).
3638 */
3639static long bl_minval;
3640static long bl_maxval;
3641
3642/*
3643 * regmatch - main matching routine
3644 *
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003645 * Conceptually the strategy is simple: Check to see whether the current node
3646 * matches, push an item onto the regstack and loop to see whether the rest
3647 * matches, and then act accordingly. In practice we make some effort to
3648 * avoid using the regstack, in particular by going through "ordinary" nodes
3649 * (that don't need to know whether the rest of the match failed) by a nested
3650 * loop.
Bram Moolenaar071d4272004-06-13 20:20:40 +00003651 *
3652 * Returns TRUE when there is a match. Leaves reginput and reglnum just after
3653 * the last matched character.
3654 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
3655 * undefined state!
3656 */
3657 static int
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003658regmatch(scan)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003659 char_u *scan; /* Current node. */
3660{
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003661 char_u *next; /* Next node. */
3662 int op;
3663 int c;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003664 regitem_T *rp;
3665 int no;
3666 int status; /* one of the RA_ values: */
3667#define RA_FAIL 1 /* something failed, abort */
3668#define RA_CONT 2 /* continue in inner loop */
3669#define RA_BREAK 3 /* break inner loop */
3670#define RA_MATCH 4 /* successful match */
3671#define RA_NOMATCH 5 /* didn't match */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003672
Bram Moolenaara7fc0102005-05-18 22:17:12 +00003673 /* Init the regstack and backpos table empty. They are initialized and
3674 * freed in vim_regexec_both() to reduce malloc()/free() calls. */
3675 regstack.ga_len = 0;
3676 backpos.ga_len = 0;
Bram Moolenaar582fd852005-03-28 20:58:01 +00003677
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003678 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00003679 * Repeat until "regstack" is empty.
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003680 */
3681 for (;;)
3682 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00003683 /* Some patterns my cause a long time to match, even though they are not
3684 * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
3685 fast_breakcheck();
3686
3687#ifdef DEBUG
3688 if (scan != NULL && regnarrate)
3689 {
3690 mch_errmsg(regprop(scan));
3691 mch_errmsg("(\n");
3692 }
3693#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003694
3695 /*
Bram Moolenaar582fd852005-03-28 20:58:01 +00003696 * Repeat for items that can be matched sequentially, without using the
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003697 * regstack.
3698 */
3699 for (;;)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003700 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003701 if (got_int || scan == NULL)
3702 {
3703 status = RA_FAIL;
3704 break;
3705 }
3706 status = RA_CONT;
3707
Bram Moolenaar071d4272004-06-13 20:20:40 +00003708#ifdef DEBUG
3709 if (regnarrate)
3710 {
3711 mch_errmsg(regprop(scan));
3712 mch_errmsg("...\n");
3713# ifdef FEAT_SYN_HL
3714 if (re_extmatch_in != NULL)
3715 {
3716 int i;
3717
3718 mch_errmsg(_("External submatches:\n"));
3719 for (i = 0; i < NSUBEXP; i++)
3720 {
3721 mch_errmsg(" \"");
3722 if (re_extmatch_in->matches[i] != NULL)
3723 mch_errmsg(re_extmatch_in->matches[i]);
3724 mch_errmsg("\"\n");
3725 }
3726 }
3727# endif
3728 }
3729#endif
3730 next = regnext(scan);
3731
3732 op = OP(scan);
3733 /* Check for character class with NL added. */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003734 if (!reg_line_lbr && WITH_NL(op) && *reginput == NUL
3735 && reglnum <= reg_maxline)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003736 {
3737 reg_nextline();
3738 }
3739 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3740 {
3741 ADVANCE_REGINPUT();
3742 }
3743 else
3744 {
3745 if (WITH_NL(op))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003746 op -= ADD_NL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003747#ifdef FEAT_MBYTE
3748 if (has_mbyte)
3749 c = (*mb_ptr2char)(reginput);
3750 else
3751#endif
3752 c = *reginput;
3753 switch (op)
3754 {
3755 case BOL:
3756 if (reginput != regline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003757 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003758 break;
3759
3760 case EOL:
3761 if (c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003762 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003763 break;
3764
3765 case RE_BOF:
3766 /* Passing -1 to the getline() function provided for the search
3767 * should always return NULL if the current line is the first
3768 * line of the file. */
3769 if (reglnum != 0 || reginput != regline
3770 || (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003771 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003772 break;
3773
3774 case RE_EOF:
3775 if (reglnum != reg_maxline || c != NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003776 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003777 break;
3778
3779 case CURSOR:
3780 /* Check if the buffer is in a window and compare the
3781 * reg_win->w_cursor position to the match position. */
3782 if (reg_win == NULL
3783 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3784 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003785 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003786 break;
3787
3788 case RE_LNUM:
3789 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
3790 scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003791 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003792 break;
3793
3794 case RE_COL:
3795 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003796 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003797 break;
3798
3799 case RE_VCOL:
3800 if (!re_num_cmp((long_u)win_linetabsize(
3801 reg_win == NULL ? curwin : reg_win,
3802 regline, (colnr_T)(reginput - regline)) + 1, scan))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003803 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003804 break;
3805
3806 case BOW: /* \<word; reginput points to w */
3807 if (c == NUL) /* Can't match at end of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003808 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003809#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003810 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003811 {
3812 int this_class;
3813
3814 /* Get class of current and previous char (if it exists). */
3815 this_class = mb_get_class(reginput);
3816 if (this_class <= 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003817 status = RA_NOMATCH; /* not on a word at all */
3818 else if (reg_prev_class() == this_class)
3819 status = RA_NOMATCH; /* previous char is in same word */
Bram Moolenaar071d4272004-06-13 20:20:40 +00003820 }
3821#endif
3822 else
3823 {
3824 if (!vim_iswordc(c)
3825 || (reginput > regline && vim_iswordc(reginput[-1])))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003826 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003827 }
3828 break;
3829
3830 case EOW: /* word\>; reginput points after d */
3831 if (reginput == regline) /* Can't match at start of line */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003832 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003833#ifdef FEAT_MBYTE
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003834 else if (has_mbyte)
Bram Moolenaar071d4272004-06-13 20:20:40 +00003835 {
3836 int this_class, prev_class;
3837
3838 /* Get class of current and previous char (if it exists). */
3839 this_class = mb_get_class(reginput);
3840 prev_class = reg_prev_class();
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003841 if (this_class == prev_class
3842 || prev_class == 0 || prev_class == 1)
3843 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003844 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00003845#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003846 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00003847 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003848 if (!vim_iswordc(reginput[-1])
3849 || (reginput[0] != NUL && vim_iswordc(c)))
3850 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00003851 }
3852 break; /* Matched with EOW */
3853
3854 case ANY:
3855 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003856 status = RA_NOMATCH;
3857 else
3858 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003859 break;
3860
3861 case IDENT:
3862 if (!vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003863 status = RA_NOMATCH;
3864 else
3865 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003866 break;
3867
3868 case SIDENT:
3869 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003870 status = RA_NOMATCH;
3871 else
3872 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003873 break;
3874
3875 case KWORD:
3876 if (!vim_iswordp(reginput))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003877 status = RA_NOMATCH;
3878 else
3879 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003880 break;
3881
3882 case SKWORD:
3883 if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003884 status = RA_NOMATCH;
3885 else
3886 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003887 break;
3888
3889 case FNAME:
3890 if (!vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003891 status = RA_NOMATCH;
3892 else
3893 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003894 break;
3895
3896 case SFNAME:
3897 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003898 status = RA_NOMATCH;
3899 else
3900 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003901 break;
3902
3903 case PRINT:
3904 if (ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003905 status = RA_NOMATCH;
3906 else
3907 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003908 break;
3909
3910 case SPRINT:
3911 if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003912 status = RA_NOMATCH;
3913 else
3914 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003915 break;
3916
3917 case WHITE:
3918 if (!vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003919 status = RA_NOMATCH;
3920 else
3921 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003922 break;
3923
3924 case NWHITE:
3925 if (c == NUL || vim_iswhite(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003926 status = RA_NOMATCH;
3927 else
3928 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003929 break;
3930
3931 case DIGIT:
3932 if (!ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003933 status = RA_NOMATCH;
3934 else
3935 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003936 break;
3937
3938 case NDIGIT:
3939 if (c == NUL || ri_digit(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003940 status = RA_NOMATCH;
3941 else
3942 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003943 break;
3944
3945 case HEX:
3946 if (!ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003947 status = RA_NOMATCH;
3948 else
3949 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003950 break;
3951
3952 case NHEX:
3953 if (c == NUL || ri_hex(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003954 status = RA_NOMATCH;
3955 else
3956 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003957 break;
3958
3959 case OCTAL:
3960 if (!ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003961 status = RA_NOMATCH;
3962 else
3963 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003964 break;
3965
3966 case NOCTAL:
3967 if (c == NUL || ri_octal(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003968 status = RA_NOMATCH;
3969 else
3970 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003971 break;
3972
3973 case WORD:
3974 if (!ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003975 status = RA_NOMATCH;
3976 else
3977 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003978 break;
3979
3980 case NWORD:
3981 if (c == NUL || ri_word(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003982 status = RA_NOMATCH;
3983 else
3984 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003985 break;
3986
3987 case HEAD:
3988 if (!ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003989 status = RA_NOMATCH;
3990 else
3991 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003992 break;
3993
3994 case NHEAD:
3995 if (c == NUL || ri_head(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00003996 status = RA_NOMATCH;
3997 else
3998 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00003999 break;
4000
4001 case ALPHA:
4002 if (!ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004003 status = RA_NOMATCH;
4004 else
4005 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004006 break;
4007
4008 case NALPHA:
4009 if (c == NUL || ri_alpha(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004010 status = RA_NOMATCH;
4011 else
4012 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004013 break;
4014
4015 case LOWER:
4016 if (!ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004017 status = RA_NOMATCH;
4018 else
4019 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004020 break;
4021
4022 case NLOWER:
4023 if (c == NUL || ri_lower(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004024 status = RA_NOMATCH;
4025 else
4026 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004027 break;
4028
4029 case UPPER:
4030 if (!ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004031 status = RA_NOMATCH;
4032 else
4033 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004034 break;
4035
4036 case NUPPER:
4037 if (c == NUL || ri_upper(c))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004038 status = RA_NOMATCH;
4039 else
4040 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004041 break;
4042
4043 case EXACTLY:
4044 {
4045 int len;
4046 char_u *opnd;
4047
4048 opnd = OPERAND(scan);
4049 /* Inline the first byte, for speed. */
4050 if (*opnd != *reginput
4051 && (!ireg_ic || (
4052#ifdef FEAT_MBYTE
4053 !enc_utf8 &&
4054#endif
4055 TOLOWER_LOC(*opnd) != TOLOWER_LOC(*reginput))))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004056 status = RA_NOMATCH;
4057 else if (*opnd == NUL)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004058 {
4059 /* match empty string always works; happens when "~" is
4060 * empty. */
4061 }
4062 else if (opnd[1] == NUL
4063#ifdef FEAT_MBYTE
4064 && !(enc_utf8 && ireg_ic)
4065#endif
4066 )
4067 ++reginput; /* matched a single char */
4068 else
4069 {
4070 len = (int)STRLEN(opnd);
4071 /* Need to match first byte again for multi-byte. */
4072 if (cstrncmp(opnd, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004073 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004074#ifdef FEAT_MBYTE
4075 /* Check for following composing character. */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004076 else if (enc_utf8
4077 && UTF_COMPOSINGLIKE(reginput, reginput + len))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004078 {
4079 /* raaron: This code makes a composing character get
4080 * ignored, which is the correct behavior (sometimes)
4081 * for voweled Hebrew texts. */
4082 if (!ireg_icombine)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004083 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004084 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004085#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004086 else
4087 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004088 }
4089 }
4090 break;
4091
4092 case ANYOF:
4093 case ANYBUT:
4094 if (c == NUL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004095 status = RA_NOMATCH;
4096 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4097 status = RA_NOMATCH;
4098 else
4099 ADVANCE_REGINPUT();
Bram Moolenaar071d4272004-06-13 20:20:40 +00004100 break;
4101
4102#ifdef FEAT_MBYTE
4103 case MULTIBYTECODE:
4104 if (has_mbyte)
4105 {
4106 int i, len;
4107 char_u *opnd;
4108
4109 opnd = OPERAND(scan);
4110 /* Safety check (just in case 'encoding' was changed since
4111 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004112 if ((len = (*mb_ptr2len)(opnd)) < 2)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004113 {
4114 status = RA_NOMATCH;
4115 break;
4116 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004117 for (i = 0; i < len; ++i)
4118 if (opnd[i] != reginput[i])
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004119 {
4120 status = RA_NOMATCH;
4121 break;
4122 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004123 reginput += len;
4124 }
4125 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004126 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004127 break;
4128#endif
4129
4130 case NOTHING:
4131 break;
4132
4133 case BACK:
Bram Moolenaar582fd852005-03-28 20:58:01 +00004134 {
4135 int i;
4136 backpos_T *bp;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004137
Bram Moolenaar582fd852005-03-28 20:58:01 +00004138 /*
4139 * When we run into BACK we need to check if we don't keep
4140 * looping without matching any input. The second and later
4141 * times a BACK is encountered it fails if the input is still
4142 * at the same position as the previous time.
4143 * The positions are stored in "backpos" and found by the
4144 * current value of "scan", the position in the RE program.
4145 */
4146 bp = (backpos_T *)backpos.ga_data;
4147 for (i = 0; i < backpos.ga_len; ++i)
4148 if (bp[i].bp_scan == scan)
4149 break;
4150 if (i == backpos.ga_len)
4151 {
4152 /* First time at this BACK, make room to store the pos. */
4153 if (ga_grow(&backpos, 1) == FAIL)
4154 status = RA_FAIL;
4155 else
4156 {
4157 /* get "ga_data" again, it may have changed */
4158 bp = (backpos_T *)backpos.ga_data;
4159 bp[i].bp_scan = scan;
4160 ++backpos.ga_len;
4161 }
4162 }
4163 else if (reg_save_equal(&bp[i].bp_pos))
4164 /* Still at same position as last time, fail. */
4165 status = RA_NOMATCH;
4166
4167 if (status != RA_FAIL && status != RA_NOMATCH)
4168 reg_save(&bp[i].bp_pos, &backpos);
4169 }
Bram Moolenaar19a09a12005-03-04 23:39:37 +00004170 break;
4171
Bram Moolenaar071d4272004-06-13 20:20:40 +00004172 case MOPEN + 0: /* Match start: \zs */
4173 case MOPEN + 1: /* \( */
4174 case MOPEN + 2:
4175 case MOPEN + 3:
4176 case MOPEN + 4:
4177 case MOPEN + 5:
4178 case MOPEN + 6:
4179 case MOPEN + 7:
4180 case MOPEN + 8:
4181 case MOPEN + 9:
4182 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004183 no = op - MOPEN;
4184 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004185 rp = regstack_push(RS_MOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004186 if (rp == NULL)
4187 status = RA_FAIL;
4188 else
4189 {
4190 rp->rs_no = no;
4191 save_se(&rp->rs_un.sesave, &reg_startpos[no],
4192 &reg_startp[no]);
4193 /* We simply continue and handle the result when done. */
4194 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004195 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004196 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004197
4198 case NOPEN: /* \%( */
4199 case NCLOSE: /* \) after \%( */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004200 if (regstack_push(RS_NOPEN, scan) == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004201 status = RA_FAIL;
4202 /* We simply continue and handle the result when done. */
4203 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004204
4205#ifdef FEAT_SYN_HL
4206 case ZOPEN + 1:
4207 case ZOPEN + 2:
4208 case ZOPEN + 3:
4209 case ZOPEN + 4:
4210 case ZOPEN + 5:
4211 case ZOPEN + 6:
4212 case ZOPEN + 7:
4213 case ZOPEN + 8:
4214 case ZOPEN + 9:
4215 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004216 no = op - ZOPEN;
4217 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004218 rp = regstack_push(RS_ZOPEN, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004219 if (rp == NULL)
4220 status = RA_FAIL;
4221 else
4222 {
4223 rp->rs_no = no;
4224 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4225 &reg_startzp[no]);
4226 /* We simply continue and handle the result when done. */
4227 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004228 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004229 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004230#endif
4231
4232 case MCLOSE + 0: /* Match end: \ze */
4233 case MCLOSE + 1: /* \) */
4234 case MCLOSE + 2:
4235 case MCLOSE + 3:
4236 case MCLOSE + 4:
4237 case MCLOSE + 5:
4238 case MCLOSE + 6:
4239 case MCLOSE + 7:
4240 case MCLOSE + 8:
4241 case MCLOSE + 9:
4242 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004243 no = op - MCLOSE;
4244 cleanup_subexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004245 rp = regstack_push(RS_MCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004246 if (rp == NULL)
4247 status = RA_FAIL;
4248 else
4249 {
4250 rp->rs_no = no;
4251 save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4252 /* We simply continue and handle the result when done. */
4253 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004254 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004255 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004256
4257#ifdef FEAT_SYN_HL
4258 case ZCLOSE + 1: /* \) after \z( */
4259 case ZCLOSE + 2:
4260 case ZCLOSE + 3:
4261 case ZCLOSE + 4:
4262 case ZCLOSE + 5:
4263 case ZCLOSE + 6:
4264 case ZCLOSE + 7:
4265 case ZCLOSE + 8:
4266 case ZCLOSE + 9:
4267 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004268 no = op - ZCLOSE;
4269 cleanup_zsubexpr();
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004270 rp = regstack_push(RS_ZCLOSE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004271 if (rp == NULL)
4272 status = RA_FAIL;
4273 else
4274 {
4275 rp->rs_no = no;
4276 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4277 &reg_endzp[no]);
4278 /* We simply continue and handle the result when done. */
4279 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004280 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004281 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004282#endif
4283
4284 case BACKREF + 1:
4285 case BACKREF + 2:
4286 case BACKREF + 3:
4287 case BACKREF + 4:
4288 case BACKREF + 5:
4289 case BACKREF + 6:
4290 case BACKREF + 7:
4291 case BACKREF + 8:
4292 case BACKREF + 9:
4293 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004294 int len;
4295 linenr_T clnum;
4296 colnr_T ccol;
4297 char_u *p;
4298
4299 no = op - BACKREF;
4300 cleanup_subexpr();
4301 if (!REG_MULTI) /* Single-line regexp */
4302 {
4303 if (reg_endp[no] == NULL)
4304 {
4305 /* Backref was not set: Match an empty string. */
4306 len = 0;
4307 }
4308 else
4309 {
4310 /* Compare current input with back-ref in the same
4311 * line. */
4312 len = (int)(reg_endp[no] - reg_startp[no]);
4313 if (cstrncmp(reg_startp[no], reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004314 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004315 }
4316 }
4317 else /* Multi-line regexp */
4318 {
4319 if (reg_endpos[no].lnum < 0)
4320 {
4321 /* Backref was not set: Match an empty string. */
4322 len = 0;
4323 }
4324 else
4325 {
4326 if (reg_startpos[no].lnum == reglnum
4327 && reg_endpos[no].lnum == reglnum)
4328 {
4329 /* Compare back-ref within the current line. */
4330 len = reg_endpos[no].col - reg_startpos[no].col;
4331 if (cstrncmp(regline + reg_startpos[no].col,
4332 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004333 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004334 }
4335 else
4336 {
4337 /* Messy situation: Need to compare between two
4338 * lines. */
4339 ccol = reg_startpos[no].col;
4340 clnum = reg_startpos[no].lnum;
4341 for (;;)
4342 {
4343 /* Since getting one line may invalidate
4344 * the other, need to make copy. Slow! */
4345 if (regline != reg_tofree)
4346 {
4347 len = (int)STRLEN(regline);
4348 if (reg_tofree == NULL
4349 || len >= (int)reg_tofreelen)
4350 {
4351 len += 50; /* get some extra */
4352 vim_free(reg_tofree);
4353 reg_tofree = alloc(len);
4354 if (reg_tofree == NULL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004355 {
4356 status = RA_FAIL; /* outof memory!*/
4357 break;
4358 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004359 reg_tofreelen = len;
4360 }
4361 STRCPY(reg_tofree, regline);
4362 reginput = reg_tofree
4363 + (reginput - regline);
4364 regline = reg_tofree;
4365 }
4366
4367 /* Get the line to compare with. */
4368 p = reg_getline(clnum);
4369 if (clnum == reg_endpos[no].lnum)
4370 len = reg_endpos[no].col - ccol;
4371 else
4372 len = (int)STRLEN(p + ccol);
4373
4374 if (cstrncmp(p + ccol, reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004375 {
4376 status = RA_NOMATCH; /* doesn't match */
4377 break;
4378 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004379 if (clnum == reg_endpos[no].lnum)
4380 break; /* match and at end! */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004381 if (reglnum >= reg_maxline)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004382 {
4383 status = RA_NOMATCH; /* text too short */
4384 break;
4385 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004386
4387 /* Advance to next line. */
4388 reg_nextline();
4389 ++clnum;
4390 ccol = 0;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004391 if (got_int)
4392 {
4393 status = RA_FAIL;
4394 break;
4395 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004396 }
4397
4398 /* found a match! Note that regline may now point
4399 * to a copy of the line, that should not matter. */
4400 }
4401 }
4402 }
4403
4404 /* Matched the backref, skip over it. */
4405 reginput += len;
4406 }
4407 break;
4408
4409#ifdef FEAT_SYN_HL
4410 case ZREF + 1:
4411 case ZREF + 2:
4412 case ZREF + 3:
4413 case ZREF + 4:
4414 case ZREF + 5:
4415 case ZREF + 6:
4416 case ZREF + 7:
4417 case ZREF + 8:
4418 case ZREF + 9:
4419 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004420 int len;
4421
4422 cleanup_zsubexpr();
4423 no = op - ZREF;
4424 if (re_extmatch_in != NULL
4425 && re_extmatch_in->matches[no] != NULL)
4426 {
4427 len = (int)STRLEN(re_extmatch_in->matches[no]);
4428 if (cstrncmp(re_extmatch_in->matches[no],
4429 reginput, &len) != 0)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004430 status = RA_NOMATCH;
4431 else
4432 reginput += len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004433 }
4434 else
4435 {
4436 /* Backref was not set: Match an empty string. */
4437 }
4438 }
4439 break;
4440#endif
4441
4442 case BRANCH:
4443 {
4444 if (OP(next) != BRANCH) /* No choice. */
4445 next = OPERAND(scan); /* Avoid recursion. */
4446 else
4447 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004448 rp = regstack_push(RS_BRANCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004449 if (rp == NULL)
4450 status = RA_FAIL;
4451 else
4452 status = RA_BREAK; /* rest is below */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004453 }
4454 }
4455 break;
4456
4457 case BRACE_LIMITS:
4458 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004459 if (OP(next) == BRACE_SIMPLE)
4460 {
4461 bl_minval = OPERAND_MIN(scan);
4462 bl_maxval = OPERAND_MAX(scan);
4463 }
4464 else if (OP(next) >= BRACE_COMPLEX
4465 && OP(next) < BRACE_COMPLEX + 10)
4466 {
4467 no = OP(next) - BRACE_COMPLEX;
4468 brace_min[no] = OPERAND_MIN(scan);
4469 brace_max[no] = OPERAND_MAX(scan);
4470 brace_count[no] = 0;
4471 }
4472 else
4473 {
4474 EMSG(_(e_internal)); /* Shouldn't happen */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004475 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004476 }
4477 }
4478 break;
4479
4480 case BRACE_COMPLEX + 0:
4481 case BRACE_COMPLEX + 1:
4482 case BRACE_COMPLEX + 2:
4483 case BRACE_COMPLEX + 3:
4484 case BRACE_COMPLEX + 4:
4485 case BRACE_COMPLEX + 5:
4486 case BRACE_COMPLEX + 6:
4487 case BRACE_COMPLEX + 7:
4488 case BRACE_COMPLEX + 8:
4489 case BRACE_COMPLEX + 9:
4490 {
Bram Moolenaar071d4272004-06-13 20:20:40 +00004491 no = op - BRACE_COMPLEX;
4492 ++brace_count[no];
4493
4494 /* If not matched enough times yet, try one more */
4495 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004496 ? brace_min[no] : brace_max[no]))
Bram Moolenaar071d4272004-06-13 20:20:40 +00004497 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004498 rp = regstack_push(RS_BRCPLX_MORE, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004499 if (rp == NULL)
4500 status = RA_FAIL;
4501 else
4502 {
4503 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004504 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004505 next = OPERAND(scan);
4506 /* We continue and handle the result when done. */
4507 }
4508 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004509 }
4510
4511 /* If matched enough times, may try matching some more */
4512 if (brace_min[no] <= brace_max[no])
4513 {
4514 /* Range is the normal way around, use longest match */
4515 if (brace_count[no] <= brace_max[no])
4516 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004517 rp = regstack_push(RS_BRCPLX_LONG, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004518 if (rp == NULL)
4519 status = RA_FAIL;
4520 else
4521 {
4522 rp->rs_no = no;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004523 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004524 next = OPERAND(scan);
4525 /* We continue and handle the result when done. */
4526 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004527 }
4528 }
4529 else
4530 {
4531 /* Range is backwards, use shortest match first */
4532 if (brace_count[no] <= brace_min[no])
4533 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004534 rp = regstack_push(RS_BRCPLX_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004535 if (rp == NULL)
4536 status = RA_FAIL;
4537 else
4538 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004539 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004540 /* We continue and handle the result when done. */
4541 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004542 }
4543 }
4544 }
4545 break;
4546
4547 case BRACE_SIMPLE:
4548 case STAR:
4549 case PLUS:
4550 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004551 regstar_T rst;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004552
4553 /*
4554 * Lookahead to avoid useless match attempts when we know
4555 * what character comes next.
4556 */
4557 if (OP(next) == EXACTLY)
4558 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004559 rst.nextb = *OPERAND(next);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004560 if (ireg_ic)
4561 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004562 if (isupper(rst.nextb))
4563 rst.nextb_ic = TOLOWER_LOC(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004564 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004565 rst.nextb_ic = TOUPPER_LOC(rst.nextb);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004566 }
4567 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004568 rst.nextb_ic = rst.nextb;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004569 }
4570 else
4571 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004572 rst.nextb = NUL;
4573 rst.nextb_ic = NUL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004574 }
4575 if (op != BRACE_SIMPLE)
4576 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004577 rst.minval = (op == STAR) ? 0 : 1;
4578 rst.maxval = MAX_LIMIT;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004579 }
4580 else
4581 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004582 rst.minval = bl_minval;
4583 rst.maxval = bl_maxval;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004584 }
4585
4586 /*
4587 * When maxval > minval, try matching as much as possible, up
4588 * to maxval. When maxval < minval, try matching at least the
4589 * minimal number (since the range is backwards, that's also
4590 * maxval!).
4591 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004592 rst.count = regrepeat(OPERAND(scan), rst.maxval);
Bram Moolenaar071d4272004-06-13 20:20:40 +00004593 if (got_int)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004594 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004595 status = RA_FAIL;
4596 break;
4597 }
4598 if (rst.minval <= rst.maxval
4599 ? rst.count >= rst.minval : rst.count >= rst.maxval)
4600 {
4601 /* It could match. Prepare for trying to match what
4602 * follows. The code is below. Parameters are stored in
4603 * a regstar_T on the regstack. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00004604 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004605 {
4606 EMSG(_(e_maxmempat));
4607 status = RA_FAIL;
4608 }
4609 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004610 status = RA_FAIL;
4611 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004612 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004613 regstack.ga_len += sizeof(regstar_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004614 rp = regstack_push(rst.minval <= rst.maxval
Bram Moolenaar582fd852005-03-28 20:58:01 +00004615 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004616 if (rp == NULL)
4617 status = RA_FAIL;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004618 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004619 {
4620 *(((regstar_T *)rp) - 1) = rst;
4621 status = RA_BREAK; /* skip the restore bits */
4622 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004623 }
4624 }
4625 else
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004626 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004627
Bram Moolenaar071d4272004-06-13 20:20:40 +00004628 }
4629 break;
4630
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004631 case NOMATCH:
Bram Moolenaar071d4272004-06-13 20:20:40 +00004632 case MATCH:
4633 case SUBPAT:
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004634 rp = regstack_push(RS_NOMATCH, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004635 if (rp == NULL)
4636 status = RA_FAIL;
4637 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004638 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004639 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004640 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004641 next = OPERAND(scan);
4642 /* We continue and handle the result when done. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004643 }
4644 break;
4645
4646 case BEHIND:
4647 case NOBEHIND:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004648 /* Need a bit of room to store extra positions. */
Bram Moolenaar916b7af2005-03-16 09:52:38 +00004649 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00004650 {
4651 EMSG(_(e_maxmempat));
4652 status = RA_FAIL;
4653 }
4654 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004655 status = RA_FAIL;
4656 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004657 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004658 regstack.ga_len += sizeof(regbehind_T);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004659 rp = regstack_push(RS_BEHIND1, scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004660 if (rp == NULL)
4661 status = RA_FAIL;
4662 else
Bram Moolenaar071d4272004-06-13 20:20:40 +00004663 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004664 rp->rs_no = op;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004665 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004666 /* First try if what follows matches. If it does then we
4667 * check the behind match by looping. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004668 }
Bram Moolenaar071d4272004-06-13 20:20:40 +00004669 }
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004670 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004671
4672 case BHPOS:
4673 if (REG_MULTI)
4674 {
4675 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4676 || behind_pos.rs_u.pos.lnum != reglnum)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004677 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004678 }
4679 else if (behind_pos.rs_u.ptr != reginput)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004680 status = RA_NOMATCH;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004681 break;
4682
4683 case NEWL:
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004684 if ((c != NUL || reglnum > reg_maxline || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004685 && (c != '\n' || !reg_line_lbr))
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004686 status = RA_NOMATCH;
4687 else if (reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00004688 ADVANCE_REGINPUT();
4689 else
4690 reg_nextline();
4691 break;
4692
4693 case END:
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004694 status = RA_MATCH; /* Success! */
4695 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004696
4697 default:
4698 EMSG(_(e_re_corr));
4699#ifdef DEBUG
4700 printf("Illegal op code %d\n", op);
4701#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004702 status = RA_FAIL;
4703 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004704 }
4705 }
4706
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004707 /* If we can't continue sequentially, break the inner loop. */
4708 if (status != RA_CONT)
4709 break;
4710
4711 /* Continue in inner loop, advance to next item. */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004712 scan = next;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004713
4714 } /* end of inner loop */
Bram Moolenaar071d4272004-06-13 20:20:40 +00004715
4716 /*
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004717 * If there is something on the regstack execute the code for the state.
Bram Moolenaar582fd852005-03-28 20:58:01 +00004718 * If the state is popped then loop and use the older state.
Bram Moolenaar071d4272004-06-13 20:20:40 +00004719 */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004720 while (regstack.ga_len > 0 && status != RA_FAIL)
4721 {
4722 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4723 switch (rp->rs_state)
4724 {
4725 case RS_NOPEN:
4726 /* Result is passed on as-is, simply pop the state. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004727 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004728 break;
4729
4730 case RS_MOPEN:
4731 /* Pop the state. Restore pointers when there is no match. */
4732 if (status == RA_NOMATCH)
4733 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4734 &reg_startp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004735 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004736 break;
4737
4738#ifdef FEAT_SYN_HL
4739 case RS_ZOPEN:
4740 /* Pop the state. Restore pointers when there is no match. */
4741 if (status == RA_NOMATCH)
4742 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4743 &reg_startzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004744 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004745 break;
Bram Moolenaar071d4272004-06-13 20:20:40 +00004746#endif
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004747
4748 case RS_MCLOSE:
4749 /* Pop the state. Restore pointers when there is no match. */
4750 if (status == RA_NOMATCH)
4751 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
4752 &reg_endp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004753 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004754 break;
4755
4756#ifdef FEAT_SYN_HL
4757 case RS_ZCLOSE:
4758 /* Pop the state. Restore pointers when there is no match. */
4759 if (status == RA_NOMATCH)
4760 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4761 &reg_endzp[rp->rs_no]);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004762 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004763 break;
4764#endif
4765
4766 case RS_BRANCH:
4767 if (status == RA_MATCH)
4768 /* this branch matched, use it */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004769 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004770 else
4771 {
4772 if (status != RA_BREAK)
4773 {
4774 /* After a non-matching branch: try next one. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004775 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004776 scan = rp->rs_scan;
4777 }
4778 if (scan == NULL || OP(scan) != BRANCH)
4779 {
4780 /* no more branches, didn't find a match */
4781 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004782 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004783 }
4784 else
4785 {
4786 /* Prepare to try a branch. */
4787 rp->rs_scan = regnext(scan);
Bram Moolenaar582fd852005-03-28 20:58:01 +00004788 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004789 scan = OPERAND(scan);
4790 }
4791 }
4792 break;
4793
4794 case RS_BRCPLX_MORE:
4795 /* Pop the state. Restore pointers when there is no match. */
4796 if (status == RA_NOMATCH)
4797 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004798 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004799 --brace_count[rp->rs_no]; /* decrement match count */
4800 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004801 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004802 break;
4803
4804 case RS_BRCPLX_LONG:
4805 /* Pop the state. Restore pointers when there is no match. */
4806 if (status == RA_NOMATCH)
4807 {
4808 /* There was no match, but we did find enough matches. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004809 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004810 --brace_count[rp->rs_no];
4811 /* continue with the items after "\{}" */
4812 status = RA_CONT;
4813 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004814 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004815 if (status == RA_CONT)
4816 scan = regnext(scan);
4817 break;
4818
4819 case RS_BRCPLX_SHORT:
4820 /* Pop the state. Restore pointers when there is no match. */
4821 if (status == RA_NOMATCH)
4822 /* There was no match, try to match one more item. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004823 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004824 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004825 if (status == RA_NOMATCH)
4826 {
4827 scan = OPERAND(scan);
4828 status = RA_CONT;
4829 }
4830 break;
4831
4832 case RS_NOMATCH:
4833 /* Pop the state. If the operand matches for NOMATCH or
4834 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
4835 * except for SUBPAT, and continue with the next item. */
4836 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
4837 status = RA_NOMATCH;
4838 else
4839 {
4840 status = RA_CONT;
Bram Moolenaar582fd852005-03-28 20:58:01 +00004841 if (rp->rs_no != SUBPAT) /* zero-width */
4842 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004843 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004844 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004845 if (status == RA_CONT)
4846 scan = regnext(scan);
4847 break;
4848
4849 case RS_BEHIND1:
4850 if (status == RA_NOMATCH)
4851 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004852 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004853 regstack.ga_len -= sizeof(regbehind_T);
4854 }
4855 else
4856 {
4857 /* The stuff after BEHIND/NOBEHIND matches. Now try if
4858 * the behind part does (not) match before the current
4859 * position in the input. This must be done at every
4860 * position in the input and checking if the match ends at
4861 * the current position. */
4862
4863 /* save the position after the found match for next */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004864 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004865
4866 /* start looking for a match with operand at the current
4867 * postion. Go back one character until we find the
4868 * result, hitting the start of the line or the previous
4869 * line (for multi-line matching).
4870 * Set behind_pos to where the match should end, BHPOS
4871 * will match it. Save the current value. */
4872 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
4873 behind_pos = rp->rs_un.regsave;
4874
4875 rp->rs_state = RS_BEHIND2;
4876
Bram Moolenaar582fd852005-03-28 20:58:01 +00004877 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004878 scan = OPERAND(rp->rs_scan);
4879 }
4880 break;
4881
4882 case RS_BEHIND2:
4883 /*
4884 * Looping for BEHIND / NOBEHIND match.
4885 */
4886 if (status == RA_MATCH && reg_save_equal(&behind_pos))
4887 {
4888 /* found a match that ends where "next" started */
4889 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4890 if (rp->rs_no == BEHIND)
Bram Moolenaar582fd852005-03-28 20:58:01 +00004891 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4892 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004893 else
4894 /* But we didn't want a match. */
4895 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004896 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004897 regstack.ga_len -= sizeof(regbehind_T);
4898 }
4899 else
4900 {
4901 /* No match: Go back one character. May go to previous
4902 * line once. */
4903 no = OK;
4904 if (REG_MULTI)
4905 {
4906 if (rp->rs_un.regsave.rs_u.pos.col == 0)
4907 {
4908 if (rp->rs_un.regsave.rs_u.pos.lnum
4909 < behind_pos.rs_u.pos.lnum
4910 || reg_getline(
4911 --rp->rs_un.regsave.rs_u.pos.lnum)
4912 == NULL)
4913 no = FAIL;
4914 else
4915 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004916 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004917 rp->rs_un.regsave.rs_u.pos.col =
4918 (colnr_T)STRLEN(regline);
4919 }
4920 }
4921 else
4922 --rp->rs_un.regsave.rs_u.pos.col;
4923 }
4924 else
4925 {
4926 if (rp->rs_un.regsave.rs_u.ptr == regline)
4927 no = FAIL;
4928 else
4929 --rp->rs_un.regsave.rs_u.ptr;
4930 }
4931 if (no == OK)
4932 {
4933 /* Advanced, prepare for finding match again. */
Bram Moolenaar582fd852005-03-28 20:58:01 +00004934 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004935 scan = OPERAND(rp->rs_scan);
4936 }
4937 else
4938 {
4939 /* Can't advance. For NOBEHIND that's a match. */
4940 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4941 if (rp->rs_no == NOBEHIND)
4942 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00004943 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4944 &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004945 status = RA_MATCH;
4946 }
4947 else
4948 status = RA_NOMATCH;
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004949 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004950 regstack.ga_len -= sizeof(regbehind_T);
4951 }
4952 }
4953 break;
4954
4955 case RS_STAR_LONG:
4956 case RS_STAR_SHORT:
4957 {
4958 regstar_T *rst = ((regstar_T *)rp) - 1;
4959
4960 if (status == RA_MATCH)
4961 {
Bram Moolenaara7fc0102005-05-18 22:17:12 +00004962 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004963 regstack.ga_len -= sizeof(regstar_T);
4964 break;
4965 }
4966
4967 /* Tried once already, restore input pointers. */
4968 if (status != RA_BREAK)
Bram Moolenaar582fd852005-03-28 20:58:01 +00004969 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00004970
4971 /* Repeat until we found a position where it could match. */
4972 for (;;)
4973 {
4974 if (status != RA_BREAK)
4975 {
4976 /* Tried first position already, advance. */
4977 if (rp->rs_state == RS_STAR_LONG)
4978 {
4979 /* Trying for longest matc, but couldn't or didn't
4980 * match -- back up one char. */
4981 if (--rst->count < rst->minval)
4982 break;
4983 if (reginput == regline)
4984 {
4985 /* backup to last char of previous line */
4986 --reglnum;
4987 regline = reg_getline(reglnum);
4988 /* Just in case regrepeat() didn't count
4989 * right. */
4990 if (regline == NULL)
4991 break;
4992 reginput = regline + STRLEN(regline);
4993 fast_breakcheck();
4994 }
4995 else
4996 mb_ptr_back(regline, reginput);
4997 }
4998 else
4999 {
5000 /* Range is backwards, use shortest match first.
5001 * Careful: maxval and minval are exchanged!
5002 * Couldn't or didn't match: try advancing one
5003 * char. */
5004 if (rst->count == rst->minval
5005 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5006 break;
5007 ++rst->count;
5008 }
5009 if (got_int)
5010 break;
5011 }
5012 else
5013 status = RA_NOMATCH;
5014
5015 /* If it could match, try it. */
5016 if (rst->nextb == NUL || *reginput == rst->nextb
5017 || *reginput == rst->nextb_ic)
5018 {
Bram Moolenaar582fd852005-03-28 20:58:01 +00005019 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005020 scan = regnext(rp->rs_scan);
5021 status = RA_CONT;
5022 break;
5023 }
5024 }
5025 if (status != RA_CONT)
5026 {
5027 /* Failed. */
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005028 regstack_pop(&scan);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005029 regstack.ga_len -= sizeof(regstar_T);
5030 status = RA_NOMATCH;
5031 }
5032 }
5033 break;
5034 }
5035
5036 /* If we want to continue the inner loop or didn't pop a state contine
5037 * matching loop */
5038 if (status == RA_CONT || rp == (regitem_T *)
5039 ((char *)regstack.ga_data + regstack.ga_len) - 1)
5040 break;
5041 }
5042
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005043 /* May need to continue with the inner loop, starting at "scan". */
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005044 if (status == RA_CONT)
5045 continue;
5046
5047 /*
5048 * If the regstack is empty or something failed we are done.
5049 */
5050 if (regstack.ga_len == 0 || status == RA_FAIL)
5051 {
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005052 if (scan == NULL)
5053 {
5054 /*
5055 * We get here only if there's trouble -- normally "case END" is
5056 * the terminating point.
5057 */
5058 EMSG(_(e_re_corr));
5059#ifdef DEBUG
5060 printf("Premature EOL\n");
5061#endif
5062 }
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005063 if (status == RA_FAIL)
5064 got_int = TRUE;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005065 return (status == RA_MATCH);
5066 }
5067
5068 } /* End of loop until the regstack is empty. */
5069
5070 /* NOTREACHED */
5071}
5072
5073/*
5074 * Push an item onto the regstack.
5075 * Returns pointer to new item. Returns NULL when out of memory.
5076 */
5077 static regitem_T *
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005078regstack_push(state, scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005079 regstate_T state;
5080 char_u *scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005081{
5082 regitem_T *rp;
5083
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005084 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
Bram Moolenaare4efc3b2005-03-07 23:16:51 +00005085 {
5086 EMSG(_(e_maxmempat));
5087 return NULL;
5088 }
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005089 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005090 return NULL;
5091
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005092 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005093 rp->rs_state = state;
5094 rp->rs_scan = scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005095
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005096 regstack.ga_len += sizeof(regitem_T);
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005097 return rp;
5098}
5099
5100/*
5101 * Pop an item from the regstack.
5102 */
5103 static void
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005104regstack_pop(scan)
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005105 char_u **scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005106{
5107 regitem_T *rp;
5108
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005109 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005110 *scan = rp->rs_scan;
Bram Moolenaarbc7aa852005-03-06 23:38:09 +00005111
Bram Moolenaara7fc0102005-05-18 22:17:12 +00005112 regstack.ga_len -= sizeof(regitem_T);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005113}
5114
Bram Moolenaar071d4272004-06-13 20:20:40 +00005115/*
5116 * regrepeat - repeatedly match something simple, return how many.
5117 * Advances reginput (and reglnum) to just after the matched chars.
5118 */
5119 static int
5120regrepeat(p, maxcount)
5121 char_u *p;
5122 long maxcount; /* maximum number of matches allowed */
5123{
5124 long count = 0;
5125 char_u *scan;
5126 char_u *opnd;
5127 int mask;
5128 int testval = 0;
5129
5130 scan = reginput; /* Make local copy of reginput for speed. */
5131 opnd = OPERAND(p);
5132 switch (OP(p))
5133 {
5134 case ANY:
5135 case ANY + ADD_NL:
5136 while (count < maxcount)
5137 {
5138 /* Matching anything means we continue until end-of-line (or
5139 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5140 while (*scan != NUL && count < maxcount)
5141 {
5142 ++count;
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005143 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005144 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005145 if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr
5146 || count == maxcount)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005147 break;
5148 ++count; /* count the line-break */
5149 reg_nextline();
5150 scan = reginput;
5151 if (got_int)
5152 break;
5153 }
5154 break;
5155
5156 case IDENT:
5157 case IDENT + ADD_NL:
5158 testval = TRUE;
5159 /*FALLTHROUGH*/
5160 case SIDENT:
5161 case SIDENT + ADD_NL:
5162 while (count < maxcount)
5163 {
5164 if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5165 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005166 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005167 }
5168 else if (*scan == NUL)
5169 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005170 if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005171 break;
5172 reg_nextline();
5173 scan = reginput;
5174 if (got_int)
5175 break;
5176 }
5177 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5178 ++scan;
5179 else
5180 break;
5181 ++count;
5182 }
5183 break;
5184
5185 case KWORD:
5186 case KWORD + ADD_NL:
5187 testval = TRUE;
5188 /*FALLTHROUGH*/
5189 case SKWORD:
5190 case SKWORD + ADD_NL:
5191 while (count < maxcount)
5192 {
5193 if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5194 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005195 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005196 }
5197 else if (*scan == NUL)
5198 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005199 if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005200 break;
5201 reg_nextline();
5202 scan = reginput;
5203 if (got_int)
5204 break;
5205 }
5206 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5207 ++scan;
5208 else
5209 break;
5210 ++count;
5211 }
5212 break;
5213
5214 case FNAME:
5215 case FNAME + ADD_NL:
5216 testval = TRUE;
5217 /*FALLTHROUGH*/
5218 case SFNAME:
5219 case SFNAME + ADD_NL:
5220 while (count < maxcount)
5221 {
5222 if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5223 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005224 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005225 }
5226 else if (*scan == NUL)
5227 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005228 if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005229 break;
5230 reg_nextline();
5231 scan = reginput;
5232 if (got_int)
5233 break;
5234 }
5235 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5236 ++scan;
5237 else
5238 break;
5239 ++count;
5240 }
5241 break;
5242
5243 case PRINT:
5244 case PRINT + ADD_NL:
5245 testval = TRUE;
5246 /*FALLTHROUGH*/
5247 case SPRINT:
5248 case SPRINT + ADD_NL:
5249 while (count < maxcount)
5250 {
5251 if (*scan == NUL)
5252 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005253 if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005254 break;
5255 reg_nextline();
5256 scan = reginput;
5257 if (got_int)
5258 break;
5259 }
5260 else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5261 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00005262 mb_ptr_adv(scan);
Bram Moolenaar071d4272004-06-13 20:20:40 +00005263 }
5264 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5265 ++scan;
5266 else
5267 break;
5268 ++count;
5269 }
5270 break;
5271
5272 case WHITE:
5273 case WHITE + ADD_NL:
5274 testval = mask = RI_WHITE;
5275do_class:
5276 while (count < maxcount)
5277 {
5278#ifdef FEAT_MBYTE
5279 int l;
5280#endif
5281 if (*scan == NUL)
5282 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005283 if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005284 break;
5285 reg_nextline();
5286 scan = reginput;
5287 if (got_int)
5288 break;
5289 }
5290#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005291 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005292 {
5293 if (testval != 0)
5294 break;
5295 scan += l;
5296 }
5297#endif
5298 else if ((class_tab[*scan] & mask) == testval)
5299 ++scan;
5300 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5301 ++scan;
5302 else
5303 break;
5304 ++count;
5305 }
5306 break;
5307
5308 case NWHITE:
5309 case NWHITE + ADD_NL:
5310 mask = RI_WHITE;
5311 goto do_class;
5312 case DIGIT:
5313 case DIGIT + ADD_NL:
5314 testval = mask = RI_DIGIT;
5315 goto do_class;
5316 case NDIGIT:
5317 case NDIGIT + ADD_NL:
5318 mask = RI_DIGIT;
5319 goto do_class;
5320 case HEX:
5321 case HEX + ADD_NL:
5322 testval = mask = RI_HEX;
5323 goto do_class;
5324 case NHEX:
5325 case NHEX + ADD_NL:
5326 mask = RI_HEX;
5327 goto do_class;
5328 case OCTAL:
5329 case OCTAL + ADD_NL:
5330 testval = mask = RI_OCTAL;
5331 goto do_class;
5332 case NOCTAL:
5333 case NOCTAL + ADD_NL:
5334 mask = RI_OCTAL;
5335 goto do_class;
5336 case WORD:
5337 case WORD + ADD_NL:
5338 testval = mask = RI_WORD;
5339 goto do_class;
5340 case NWORD:
5341 case NWORD + ADD_NL:
5342 mask = RI_WORD;
5343 goto do_class;
5344 case HEAD:
5345 case HEAD + ADD_NL:
5346 testval = mask = RI_HEAD;
5347 goto do_class;
5348 case NHEAD:
5349 case NHEAD + ADD_NL:
5350 mask = RI_HEAD;
5351 goto do_class;
5352 case ALPHA:
5353 case ALPHA + ADD_NL:
5354 testval = mask = RI_ALPHA;
5355 goto do_class;
5356 case NALPHA:
5357 case NALPHA + ADD_NL:
5358 mask = RI_ALPHA;
5359 goto do_class;
5360 case LOWER:
5361 case LOWER + ADD_NL:
5362 testval = mask = RI_LOWER;
5363 goto do_class;
5364 case NLOWER:
5365 case NLOWER + ADD_NL:
5366 mask = RI_LOWER;
5367 goto do_class;
5368 case UPPER:
5369 case UPPER + ADD_NL:
5370 testval = mask = RI_UPPER;
5371 goto do_class;
5372 case NUPPER:
5373 case NUPPER + ADD_NL:
5374 mask = RI_UPPER;
5375 goto do_class;
5376
5377 case EXACTLY:
5378 {
5379 int cu, cl;
5380
5381 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5382 * would have been used for it. */
5383 if (ireg_ic)
5384 {
5385 cu = TOUPPER_LOC(*opnd);
5386 cl = TOLOWER_LOC(*opnd);
5387 while (count < maxcount && (*scan == cu || *scan == cl))
5388 {
5389 count++;
5390 scan++;
5391 }
5392 }
5393 else
5394 {
5395 cu = *opnd;
5396 while (count < maxcount && *scan == cu)
5397 {
5398 count++;
5399 scan++;
5400 }
5401 }
5402 break;
5403 }
5404
5405#ifdef FEAT_MBYTE
5406 case MULTIBYTECODE:
5407 {
5408 int i, len, cf = 0;
5409
5410 /* Safety check (just in case 'encoding' was changed since
5411 * compiling the program). */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005412 if ((len = (*mb_ptr2len)(opnd)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005413 {
5414 if (ireg_ic && enc_utf8)
5415 cf = utf_fold(utf_ptr2char(opnd));
5416 while (count < maxcount)
5417 {
5418 for (i = 0; i < len; ++i)
5419 if (opnd[i] != scan[i])
5420 break;
5421 if (i < len && (!ireg_ic || !enc_utf8
5422 || utf_fold(utf_ptr2char(scan)) != cf))
5423 break;
5424 scan += len;
5425 ++count;
5426 }
5427 }
5428 }
5429 break;
5430#endif
5431
5432 case ANYOF:
5433 case ANYOF + ADD_NL:
5434 testval = TRUE;
5435 /*FALLTHROUGH*/
5436
5437 case ANYBUT:
5438 case ANYBUT + ADD_NL:
5439 while (count < maxcount)
5440 {
5441#ifdef FEAT_MBYTE
5442 int len;
5443#endif
5444 if (*scan == NUL)
5445 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005446 if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005447 break;
5448 reg_nextline();
5449 scan = reginput;
5450 if (got_int)
5451 break;
5452 }
5453 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5454 ++scan;
5455#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005456 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005457 {
5458 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5459 break;
5460 scan += len;
5461 }
5462#endif
5463 else
5464 {
5465 if ((cstrchr(opnd, *scan) == NULL) == testval)
5466 break;
5467 ++scan;
5468 }
5469 ++count;
5470 }
5471 break;
5472
5473 case NEWL:
5474 while (count < maxcount
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005475 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005476 || (*scan == '\n' && reg_line_lbr)))
5477 {
5478 count++;
5479 if (reg_line_lbr)
5480 ADVANCE_REGINPUT();
5481 else
5482 reg_nextline();
5483 scan = reginput;
5484 if (got_int)
5485 break;
5486 }
5487 break;
5488
5489 default: /* Oh dear. Called inappropriately. */
5490 EMSG(_(e_re_corr));
5491#ifdef DEBUG
5492 printf("Called regrepeat with op code %d\n", OP(p));
5493#endif
5494 break;
5495 }
5496
5497 reginput = scan;
5498
5499 return (int)count;
5500}
5501
5502/*
5503 * regnext - dig the "next" pointer out of a node
5504 */
5505 static char_u *
5506regnext(p)
5507 char_u *p;
5508{
5509 int offset;
5510
5511 if (p == JUST_CALC_SIZE)
5512 return NULL;
5513
5514 offset = NEXT(p);
5515 if (offset == 0)
5516 return NULL;
5517
Bram Moolenaar582fd852005-03-28 20:58:01 +00005518 if (OP(p) == BACK)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005519 return p - offset;
5520 else
5521 return p + offset;
5522}
5523
5524/*
5525 * Check the regexp program for its magic number.
5526 * Return TRUE if it's wrong.
5527 */
5528 static int
5529prog_magic_wrong()
5530{
5531 if (UCHARAT(REG_MULTI
5532 ? reg_mmatch->regprog->program
5533 : reg_match->regprog->program) != REGMAGIC)
5534 {
5535 EMSG(_(e_re_corr));
5536 return TRUE;
5537 }
5538 return FALSE;
5539}
5540
5541/*
5542 * Cleanup the subexpressions, if this wasn't done yet.
5543 * This construction is used to clear the subexpressions only when they are
5544 * used (to increase speed).
5545 */
5546 static void
5547cleanup_subexpr()
5548{
5549 if (need_clear_subexpr)
5550 {
5551 if (REG_MULTI)
5552 {
5553 /* Use 0xff to set lnum to -1 */
5554 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5555 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5556 }
5557 else
5558 {
5559 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5560 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5561 }
5562 need_clear_subexpr = FALSE;
5563 }
5564}
5565
5566#ifdef FEAT_SYN_HL
5567 static void
5568cleanup_zsubexpr()
5569{
5570 if (need_clear_zsubexpr)
5571 {
5572 if (REG_MULTI)
5573 {
5574 /* Use 0xff to set lnum to -1 */
5575 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5576 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5577 }
5578 else
5579 {
5580 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5581 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5582 }
5583 need_clear_zsubexpr = FALSE;
5584 }
5585}
5586#endif
5587
5588/*
5589 * Advance reglnum, regline and reginput to the next line.
5590 */
5591 static void
5592reg_nextline()
5593{
5594 regline = reg_getline(++reglnum);
5595 reginput = regline;
5596 fast_breakcheck();
5597}
5598
5599/*
5600 * Save the input line and position in a regsave_T.
5601 */
5602 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00005603reg_save(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005604 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005605 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005606{
5607 if (REG_MULTI)
5608 {
5609 save->rs_u.pos.col = (colnr_T)(reginput - regline);
5610 save->rs_u.pos.lnum = reglnum;
5611 }
5612 else
5613 save->rs_u.ptr = reginput;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005614 save->rs_len = gap->ga_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005615}
5616
5617/*
5618 * Restore the input line and position from a regsave_T.
5619 */
5620 static void
Bram Moolenaar582fd852005-03-28 20:58:01 +00005621reg_restore(save, gap)
Bram Moolenaar071d4272004-06-13 20:20:40 +00005622 regsave_T *save;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005623 garray_T *gap;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005624{
5625 if (REG_MULTI)
5626 {
5627 if (reglnum != save->rs_u.pos.lnum)
5628 {
5629 /* only call reg_getline() when the line number changed to save
5630 * a bit of time */
5631 reglnum = save->rs_u.pos.lnum;
5632 regline = reg_getline(reglnum);
5633 }
5634 reginput = regline + save->rs_u.pos.col;
5635 }
5636 else
5637 reginput = save->rs_u.ptr;
Bram Moolenaar582fd852005-03-28 20:58:01 +00005638 gap->ga_len = save->rs_len;
Bram Moolenaar071d4272004-06-13 20:20:40 +00005639}
5640
5641/*
5642 * Return TRUE if current position is equal to saved position.
5643 */
5644 static int
5645reg_save_equal(save)
5646 regsave_T *save;
5647{
5648 if (REG_MULTI)
5649 return reglnum == save->rs_u.pos.lnum
5650 && reginput == regline + save->rs_u.pos.col;
5651 return reginput == save->rs_u.ptr;
5652}
5653
5654/*
5655 * Tentatively set the sub-expression start to the current position (after
5656 * calling regmatch() they will have changed). Need to save the existing
5657 * values for when there is no match.
5658 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5659 * depending on REG_MULTI.
5660 */
5661 static void
5662save_se_multi(savep, posp)
5663 save_se_T *savep;
5664 lpos_T *posp;
5665{
5666 savep->se_u.pos = *posp;
5667 posp->lnum = reglnum;
5668 posp->col = (colnr_T)(reginput - regline);
5669}
5670
5671 static void
5672save_se_one(savep, pp)
5673 save_se_T *savep;
5674 char_u **pp;
5675{
5676 savep->se_u.ptr = *pp;
5677 *pp = reginput;
5678}
5679
5680/*
5681 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
5682 */
5683 static int
5684re_num_cmp(val, scan)
5685 long_u val;
5686 char_u *scan;
5687{
5688 long_u n = OPERAND_MIN(scan);
5689
5690 if (OPERAND_CMP(scan) == '>')
5691 return val > n;
5692 if (OPERAND_CMP(scan) == '<')
5693 return val < n;
5694 return val == n;
5695}
5696
5697
5698#ifdef DEBUG
5699
5700/*
5701 * regdump - dump a regexp onto stdout in vaguely comprehensible form
5702 */
5703 static void
5704regdump(pattern, r)
5705 char_u *pattern;
5706 regprog_T *r;
5707{
5708 char_u *s;
5709 int op = EXACTLY; /* Arbitrary non-END op. */
5710 char_u *next;
5711 char_u *end = NULL;
5712
5713 printf("\r\nregcomp(%s):\r\n", pattern);
5714
5715 s = r->program + 1;
5716 /*
5717 * Loop until we find the END that isn't before a referred next (an END
5718 * can also appear in a NOMATCH operand).
5719 */
5720 while (op != END || s <= end)
5721 {
5722 op = OP(s);
5723 printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
5724 next = regnext(s);
5725 if (next == NULL) /* Next ptr. */
5726 printf("(0)");
5727 else
5728 printf("(%d)", (int)((s - r->program) + (next - s)));
5729 if (end < next)
5730 end = next;
5731 if (op == BRACE_LIMITS)
5732 {
5733 /* Two short ints */
5734 printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
5735 s += 8;
5736 }
5737 s += 3;
5738 if (op == ANYOF || op == ANYOF + ADD_NL
5739 || op == ANYBUT || op == ANYBUT + ADD_NL
5740 || op == EXACTLY)
5741 {
5742 /* Literal string, where present. */
5743 while (*s != NUL)
5744 printf("%c", *s++);
5745 s++;
5746 }
5747 printf("\r\n");
5748 }
5749
5750 /* Header fields of interest. */
5751 if (r->regstart != NUL)
5752 printf("start `%s' 0x%x; ", r->regstart < 256
5753 ? (char *)transchar(r->regstart)
5754 : "multibyte", r->regstart);
5755 if (r->reganch)
5756 printf("anchored; ");
5757 if (r->regmust != NULL)
5758 printf("must have \"%s\"", r->regmust);
5759 printf("\r\n");
5760}
5761
5762/*
5763 * regprop - printable representation of opcode
5764 */
5765 static char_u *
5766regprop(op)
5767 char_u *op;
5768{
5769 char_u *p;
5770 static char_u buf[50];
5771
5772 (void) strcpy(buf, ":");
5773
5774 switch (OP(op))
5775 {
5776 case BOL:
5777 p = "BOL";
5778 break;
5779 case EOL:
5780 p = "EOL";
5781 break;
5782 case RE_BOF:
5783 p = "BOF";
5784 break;
5785 case RE_EOF:
5786 p = "EOF";
5787 break;
5788 case CURSOR:
5789 p = "CURSOR";
5790 break;
5791 case RE_LNUM:
5792 p = "RE_LNUM";
5793 break;
5794 case RE_COL:
5795 p = "RE_COL";
5796 break;
5797 case RE_VCOL:
5798 p = "RE_VCOL";
5799 break;
5800 case BOW:
5801 p = "BOW";
5802 break;
5803 case EOW:
5804 p = "EOW";
5805 break;
5806 case ANY:
5807 p = "ANY";
5808 break;
5809 case ANY + ADD_NL:
5810 p = "ANY+NL";
5811 break;
5812 case ANYOF:
5813 p = "ANYOF";
5814 break;
5815 case ANYOF + ADD_NL:
5816 p = "ANYOF+NL";
5817 break;
5818 case ANYBUT:
5819 p = "ANYBUT";
5820 break;
5821 case ANYBUT + ADD_NL:
5822 p = "ANYBUT+NL";
5823 break;
5824 case IDENT:
5825 p = "IDENT";
5826 break;
5827 case IDENT + ADD_NL:
5828 p = "IDENT+NL";
5829 break;
5830 case SIDENT:
5831 p = "SIDENT";
5832 break;
5833 case SIDENT + ADD_NL:
5834 p = "SIDENT+NL";
5835 break;
5836 case KWORD:
5837 p = "KWORD";
5838 break;
5839 case KWORD + ADD_NL:
5840 p = "KWORD+NL";
5841 break;
5842 case SKWORD:
5843 p = "SKWORD";
5844 break;
5845 case SKWORD + ADD_NL:
5846 p = "SKWORD+NL";
5847 break;
5848 case FNAME:
5849 p = "FNAME";
5850 break;
5851 case FNAME + ADD_NL:
5852 p = "FNAME+NL";
5853 break;
5854 case SFNAME:
5855 p = "SFNAME";
5856 break;
5857 case SFNAME + ADD_NL:
5858 p = "SFNAME+NL";
5859 break;
5860 case PRINT:
5861 p = "PRINT";
5862 break;
5863 case PRINT + ADD_NL:
5864 p = "PRINT+NL";
5865 break;
5866 case SPRINT:
5867 p = "SPRINT";
5868 break;
5869 case SPRINT + ADD_NL:
5870 p = "SPRINT+NL";
5871 break;
5872 case WHITE:
5873 p = "WHITE";
5874 break;
5875 case WHITE + ADD_NL:
5876 p = "WHITE+NL";
5877 break;
5878 case NWHITE:
5879 p = "NWHITE";
5880 break;
5881 case NWHITE + ADD_NL:
5882 p = "NWHITE+NL";
5883 break;
5884 case DIGIT:
5885 p = "DIGIT";
5886 break;
5887 case DIGIT + ADD_NL:
5888 p = "DIGIT+NL";
5889 break;
5890 case NDIGIT:
5891 p = "NDIGIT";
5892 break;
5893 case NDIGIT + ADD_NL:
5894 p = "NDIGIT+NL";
5895 break;
5896 case HEX:
5897 p = "HEX";
5898 break;
5899 case HEX + ADD_NL:
5900 p = "HEX+NL";
5901 break;
5902 case NHEX:
5903 p = "NHEX";
5904 break;
5905 case NHEX + ADD_NL:
5906 p = "NHEX+NL";
5907 break;
5908 case OCTAL:
5909 p = "OCTAL";
5910 break;
5911 case OCTAL + ADD_NL:
5912 p = "OCTAL+NL";
5913 break;
5914 case NOCTAL:
5915 p = "NOCTAL";
5916 break;
5917 case NOCTAL + ADD_NL:
5918 p = "NOCTAL+NL";
5919 break;
5920 case WORD:
5921 p = "WORD";
5922 break;
5923 case WORD + ADD_NL:
5924 p = "WORD+NL";
5925 break;
5926 case NWORD:
5927 p = "NWORD";
5928 break;
5929 case NWORD + ADD_NL:
5930 p = "NWORD+NL";
5931 break;
5932 case HEAD:
5933 p = "HEAD";
5934 break;
5935 case HEAD + ADD_NL:
5936 p = "HEAD+NL";
5937 break;
5938 case NHEAD:
5939 p = "NHEAD";
5940 break;
5941 case NHEAD + ADD_NL:
5942 p = "NHEAD+NL";
5943 break;
5944 case ALPHA:
5945 p = "ALPHA";
5946 break;
5947 case ALPHA + ADD_NL:
5948 p = "ALPHA+NL";
5949 break;
5950 case NALPHA:
5951 p = "NALPHA";
5952 break;
5953 case NALPHA + ADD_NL:
5954 p = "NALPHA+NL";
5955 break;
5956 case LOWER:
5957 p = "LOWER";
5958 break;
5959 case LOWER + ADD_NL:
5960 p = "LOWER+NL";
5961 break;
5962 case NLOWER:
5963 p = "NLOWER";
5964 break;
5965 case NLOWER + ADD_NL:
5966 p = "NLOWER+NL";
5967 break;
5968 case UPPER:
5969 p = "UPPER";
5970 break;
5971 case UPPER + ADD_NL:
5972 p = "UPPER+NL";
5973 break;
5974 case NUPPER:
5975 p = "NUPPER";
5976 break;
5977 case NUPPER + ADD_NL:
5978 p = "NUPPER+NL";
5979 break;
5980 case BRANCH:
5981 p = "BRANCH";
5982 break;
5983 case EXACTLY:
5984 p = "EXACTLY";
5985 break;
5986 case NOTHING:
5987 p = "NOTHING";
5988 break;
5989 case BACK:
5990 p = "BACK";
5991 break;
5992 case END:
5993 p = "END";
5994 break;
5995 case MOPEN + 0:
5996 p = "MATCH START";
5997 break;
5998 case MOPEN + 1:
5999 case MOPEN + 2:
6000 case MOPEN + 3:
6001 case MOPEN + 4:
6002 case MOPEN + 5:
6003 case MOPEN + 6:
6004 case MOPEN + 7:
6005 case MOPEN + 8:
6006 case MOPEN + 9:
6007 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6008 p = NULL;
6009 break;
6010 case MCLOSE + 0:
6011 p = "MATCH END";
6012 break;
6013 case MCLOSE + 1:
6014 case MCLOSE + 2:
6015 case MCLOSE + 3:
6016 case MCLOSE + 4:
6017 case MCLOSE + 5:
6018 case MCLOSE + 6:
6019 case MCLOSE + 7:
6020 case MCLOSE + 8:
6021 case MCLOSE + 9:
6022 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6023 p = NULL;
6024 break;
6025 case BACKREF + 1:
6026 case BACKREF + 2:
6027 case BACKREF + 3:
6028 case BACKREF + 4:
6029 case BACKREF + 5:
6030 case BACKREF + 6:
6031 case BACKREF + 7:
6032 case BACKREF + 8:
6033 case BACKREF + 9:
6034 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6035 p = NULL;
6036 break;
6037 case NOPEN:
6038 p = "NOPEN";
6039 break;
6040 case NCLOSE:
6041 p = "NCLOSE";
6042 break;
6043#ifdef FEAT_SYN_HL
6044 case ZOPEN + 1:
6045 case ZOPEN + 2:
6046 case ZOPEN + 3:
6047 case ZOPEN + 4:
6048 case ZOPEN + 5:
6049 case ZOPEN + 6:
6050 case ZOPEN + 7:
6051 case ZOPEN + 8:
6052 case ZOPEN + 9:
6053 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6054 p = NULL;
6055 break;
6056 case ZCLOSE + 1:
6057 case ZCLOSE + 2:
6058 case ZCLOSE + 3:
6059 case ZCLOSE + 4:
6060 case ZCLOSE + 5:
6061 case ZCLOSE + 6:
6062 case ZCLOSE + 7:
6063 case ZCLOSE + 8:
6064 case ZCLOSE + 9:
6065 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6066 p = NULL;
6067 break;
6068 case ZREF + 1:
6069 case ZREF + 2:
6070 case ZREF + 3:
6071 case ZREF + 4:
6072 case ZREF + 5:
6073 case ZREF + 6:
6074 case ZREF + 7:
6075 case ZREF + 8:
6076 case ZREF + 9:
6077 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6078 p = NULL;
6079 break;
6080#endif
6081 case STAR:
6082 p = "STAR";
6083 break;
6084 case PLUS:
6085 p = "PLUS";
6086 break;
6087 case NOMATCH:
6088 p = "NOMATCH";
6089 break;
6090 case MATCH:
6091 p = "MATCH";
6092 break;
6093 case BEHIND:
6094 p = "BEHIND";
6095 break;
6096 case NOBEHIND:
6097 p = "NOBEHIND";
6098 break;
6099 case SUBPAT:
6100 p = "SUBPAT";
6101 break;
6102 case BRACE_LIMITS:
6103 p = "BRACE_LIMITS";
6104 break;
6105 case BRACE_SIMPLE:
6106 p = "BRACE_SIMPLE";
6107 break;
6108 case BRACE_COMPLEX + 0:
6109 case BRACE_COMPLEX + 1:
6110 case BRACE_COMPLEX + 2:
6111 case BRACE_COMPLEX + 3:
6112 case BRACE_COMPLEX + 4:
6113 case BRACE_COMPLEX + 5:
6114 case BRACE_COMPLEX + 6:
6115 case BRACE_COMPLEX + 7:
6116 case BRACE_COMPLEX + 8:
6117 case BRACE_COMPLEX + 9:
6118 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6119 p = NULL;
6120 break;
6121#ifdef FEAT_MBYTE
6122 case MULTIBYTECODE:
6123 p = "MULTIBYTECODE";
6124 break;
6125#endif
6126 case NEWL:
6127 p = "NEWL";
6128 break;
6129 default:
6130 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6131 p = NULL;
6132 break;
6133 }
6134 if (p != NULL)
6135 (void) strcat(buf, p);
6136 return buf;
6137}
6138#endif
6139
6140#ifdef FEAT_MBYTE
6141static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6142
6143typedef struct
6144{
6145 int a, b, c;
6146} decomp_T;
6147
6148
6149/* 0xfb20 - 0xfb4f */
Bram Moolenaard6f676d2005-06-01 21:51:55 +00006150static decomp_T decomp_table[0xfb4f-0xfb20+1] =
Bram Moolenaar071d4272004-06-13 20:20:40 +00006151{
6152 {0x5e2,0,0}, /* 0xfb20 alt ayin */
6153 {0x5d0,0,0}, /* 0xfb21 alt alef */
6154 {0x5d3,0,0}, /* 0xfb22 alt dalet */
6155 {0x5d4,0,0}, /* 0xfb23 alt he */
6156 {0x5db,0,0}, /* 0xfb24 alt kaf */
6157 {0x5dc,0,0}, /* 0xfb25 alt lamed */
6158 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
6159 {0x5e8,0,0}, /* 0xfb27 alt resh */
6160 {0x5ea,0,0}, /* 0xfb28 alt tav */
6161 {'+', 0, 0}, /* 0xfb29 alt plus */
6162 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
6163 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
6164 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
6165 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
6166 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
6167 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
6168 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
6169 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
6170 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
6171 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
6172 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
6173 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
6174 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
6175 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
6176 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
6177 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
6178 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
6179 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
6180 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
6181 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
6182 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
6183 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
6184 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
6185 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
6186 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
6187 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
6188 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
6189 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
6190 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
6191 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
6192 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
6193 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
6194 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
6195 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
6196 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
6197 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
6198 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
6199 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
6200};
6201
6202 static void
6203mb_decompose(c, c1, c2, c3)
6204 int c, *c1, *c2, *c3;
6205{
6206 decomp_T d;
6207
6208 if (c >= 0x4b20 && c <= 0xfb4f)
6209 {
6210 d = decomp_table[c - 0xfb20];
6211 *c1 = d.a;
6212 *c2 = d.b;
6213 *c3 = d.c;
6214 }
6215 else
6216 {
6217 *c1 = c;
6218 *c2 = *c3 = 0;
6219 }
6220}
6221#endif
6222
6223/*
6224 * Compare two strings, ignore case if ireg_ic set.
6225 * Return 0 if strings match, non-zero otherwise.
6226 * Correct the length "*n" when composing characters are ignored.
6227 */
6228 static int
6229cstrncmp(s1, s2, n)
6230 char_u *s1, *s2;
6231 int *n;
6232{
6233 int result;
6234
6235 if (!ireg_ic)
6236 result = STRNCMP(s1, s2, *n);
6237 else
6238 result = MB_STRNICMP(s1, s2, *n);
6239
6240#ifdef FEAT_MBYTE
6241 /* if it failed and it's utf8 and we want to combineignore: */
6242 if (result != 0 && enc_utf8 && ireg_icombine)
6243 {
6244 char_u *str1, *str2;
6245 int c1, c2, c11, c12;
6246 int ix;
6247 int junk;
6248
6249 /* we have to handle the strcmp ourselves, since it is necessary to
6250 * deal with the composing characters by ignoring them: */
6251 str1 = s1;
6252 str2 = s2;
6253 c1 = c2 = 0;
6254 for (ix = 0; ix < *n; )
6255 {
6256 c1 = mb_ptr2char_adv(&str1);
6257 c2 = mb_ptr2char_adv(&str2);
6258 ix += utf_char2len(c1);
6259
6260 /* decompose the character if necessary, into 'base' characters
6261 * because I don't care about Arabic, I will hard-code the Hebrew
6262 * which I *do* care about! So sue me... */
6263 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6264 {
6265 /* decomposition necessary? */
6266 mb_decompose(c1, &c11, &junk, &junk);
6267 mb_decompose(c2, &c12, &junk, &junk);
6268 c1 = c11;
6269 c2 = c12;
6270 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6271 break;
6272 }
6273 }
6274 result = c2 - c1;
6275 if (result == 0)
6276 *n = (int)(str2 - s2);
6277 }
6278#endif
6279
6280 return result;
6281}
6282
6283/*
6284 * cstrchr: This function is used a lot for simple searches, keep it fast!
6285 */
6286 static char_u *
6287cstrchr(s, c)
6288 char_u *s;
6289 int c;
6290{
6291 char_u *p;
6292 int cc;
6293
6294 if (!ireg_ic
6295#ifdef FEAT_MBYTE
6296 || (!enc_utf8 && mb_char2len(c) > 1)
6297#endif
6298 )
6299 return vim_strchr(s, c);
6300
6301 /* tolower() and toupper() can be slow, comparing twice should be a lot
6302 * faster (esp. when using MS Visual C++!).
6303 * For UTF-8 need to use folded case. */
6304#ifdef FEAT_MBYTE
6305 if (enc_utf8 && c > 0x80)
6306 cc = utf_fold(c);
6307 else
6308#endif
6309 if (isupper(c))
6310 cc = TOLOWER_LOC(c);
6311 else if (islower(c))
6312 cc = TOUPPER_LOC(c);
6313 else
6314 return vim_strchr(s, c);
6315
6316#ifdef FEAT_MBYTE
6317 if (has_mbyte)
6318 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006319 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006320 {
6321 if (enc_utf8 && c > 0x80)
6322 {
6323 if (utf_fold(utf_ptr2char(p)) == cc)
6324 return p;
6325 }
6326 else if (*p == c || *p == cc)
6327 return p;
6328 }
6329 }
6330 else
6331#endif
6332 /* Faster version for when there are no multi-byte characters. */
6333 for (p = s; *p != NUL; ++p)
6334 if (*p == c || *p == cc)
6335 return p;
6336
6337 return NULL;
6338}
6339
6340/***************************************************************
6341 * regsub stuff *
6342 ***************************************************************/
6343
6344/* This stuff below really confuses cc on an SGI -- webb */
6345#ifdef __sgi
6346# undef __ARGS
6347# define __ARGS(x) ()
6348#endif
6349
6350/*
6351 * We should define ftpr as a pointer to a function returning a pointer to
6352 * a function returning a pointer to a function ...
6353 * This is impossible, so we declare a pointer to a function returning a
6354 * pointer to a function returning void. This should work for all compilers.
6355 */
6356typedef void (*(*fptr) __ARGS((char_u *, int)))();
6357
6358static fptr do_upper __ARGS((char_u *, int));
6359static fptr do_Upper __ARGS((char_u *, int));
6360static fptr do_lower __ARGS((char_u *, int));
6361static fptr do_Lower __ARGS((char_u *, int));
6362
6363static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6364
6365 static fptr
6366do_upper(d, c)
6367 char_u *d;
6368 int c;
6369{
6370 *d = TOUPPER_LOC(c);
6371
6372 return (fptr)NULL;
6373}
6374
6375 static fptr
6376do_Upper(d, c)
6377 char_u *d;
6378 int c;
6379{
6380 *d = TOUPPER_LOC(c);
6381
6382 return (fptr)do_Upper;
6383}
6384
6385 static fptr
6386do_lower(d, c)
6387 char_u *d;
6388 int c;
6389{
6390 *d = TOLOWER_LOC(c);
6391
6392 return (fptr)NULL;
6393}
6394
6395 static fptr
6396do_Lower(d, c)
6397 char_u *d;
6398 int c;
6399{
6400 *d = TOLOWER_LOC(c);
6401
6402 return (fptr)do_Lower;
6403}
6404
6405/*
6406 * regtilde(): Replace tildes in the pattern by the old pattern.
6407 *
6408 * Short explanation of the tilde: It stands for the previous replacement
6409 * pattern. If that previous pattern also contains a ~ we should go back a
6410 * step further... But we insert the previous pattern into the current one
6411 * and remember that.
6412 * This still does not handle the case where "magic" changes. TODO?
6413 *
6414 * The tildes are parsed once before the first call to vim_regsub().
6415 */
6416 char_u *
6417regtilde(source, magic)
6418 char_u *source;
6419 int magic;
6420{
6421 char_u *newsub = source;
6422 char_u *tmpsub;
6423 char_u *p;
6424 int len;
6425 int prevlen;
6426
6427 for (p = newsub; *p; ++p)
6428 {
6429 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6430 {
6431 if (reg_prev_sub != NULL)
6432 {
6433 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6434 prevlen = (int)STRLEN(reg_prev_sub);
6435 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6436 if (tmpsub != NULL)
6437 {
6438 /* copy prefix */
6439 len = (int)(p - newsub); /* not including ~ */
6440 mch_memmove(tmpsub, newsub, (size_t)len);
6441 /* interpretate tilde */
6442 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6443 /* copy postfix */
6444 if (!magic)
6445 ++p; /* back off \ */
6446 STRCPY(tmpsub + len + prevlen, p + 1);
6447
6448 if (newsub != source) /* already allocated newsub */
6449 vim_free(newsub);
6450 newsub = tmpsub;
6451 p = newsub + len + prevlen;
6452 }
6453 }
6454 else if (magic)
6455 STRCPY(p, p + 1); /* remove '~' */
6456 else
6457 STRCPY(p, p + 2); /* remove '\~' */
6458 --p;
6459 }
6460 else
6461 {
6462 if (*p == '\\' && p[1]) /* skip escaped characters */
6463 ++p;
6464#ifdef FEAT_MBYTE
6465 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006466 p += (*mb_ptr2len)(p) - 1;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006467#endif
6468 }
6469 }
6470
6471 vim_free(reg_prev_sub);
6472 if (newsub != source) /* newsub was allocated, just keep it */
6473 reg_prev_sub = newsub;
6474 else /* no ~ found, need to save newsub */
6475 reg_prev_sub = vim_strsave(newsub);
6476 return newsub;
6477}
6478
6479#ifdef FEAT_EVAL
6480static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
6481
6482/* These pointers are used instead of reg_match and reg_mmatch for
6483 * reg_submatch(). Needed for when the substitution string is an expression
6484 * that contains a call to substitute() and submatch(). */
6485static regmatch_T *submatch_match;
6486static regmmatch_T *submatch_mmatch;
6487#endif
6488
6489#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6490/*
6491 * vim_regsub() - perform substitutions after a vim_regexec() or
6492 * vim_regexec_multi() match.
6493 *
6494 * If "copy" is TRUE really copy into "dest".
6495 * If "copy" is FALSE nothing is copied, this is just to find out the length
6496 * of the result.
6497 *
6498 * If "backslash" is TRUE, a backslash will be removed later, need to double
6499 * them to keep them, and insert a backslash before a CR to avoid it being
6500 * replaced with a line break later.
6501 *
6502 * Note: The matched text must not change between the call of
6503 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
6504 * references invalid!
6505 *
6506 * Returns the size of the replacement, including terminating NUL.
6507 */
6508 int
6509vim_regsub(rmp, source, dest, copy, magic, backslash)
6510 regmatch_T *rmp;
6511 char_u *source;
6512 char_u *dest;
6513 int copy;
6514 int magic;
6515 int backslash;
6516{
6517 reg_match = rmp;
6518 reg_mmatch = NULL;
6519 reg_maxline = 0;
6520 return vim_regsub_both(source, dest, copy, magic, backslash);
6521}
6522#endif
6523
6524 int
6525vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6526 regmmatch_T *rmp;
6527 linenr_T lnum;
6528 char_u *source;
6529 char_u *dest;
6530 int copy;
6531 int magic;
6532 int backslash;
6533{
6534 reg_match = NULL;
6535 reg_mmatch = rmp;
6536 reg_buf = curbuf; /* always works on the current buffer! */
6537 reg_firstlnum = lnum;
6538 reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6539 return vim_regsub_both(source, dest, copy, magic, backslash);
6540}
6541
6542 static int
6543vim_regsub_both(source, dest, copy, magic, backslash)
6544 char_u *source;
6545 char_u *dest;
6546 int copy;
6547 int magic;
6548 int backslash;
6549{
6550 char_u *src;
6551 char_u *dst;
6552 char_u *s;
6553 int c;
6554 int no = -1;
6555 fptr func = (fptr)NULL;
6556 linenr_T clnum = 0; /* init for GCC */
6557 int len = 0; /* init for GCC */
6558#ifdef FEAT_EVAL
6559 static char_u *eval_result = NULL;
6560#endif
6561#ifdef FEAT_MBYTE
6562 int l;
6563#endif
6564
6565
6566 /* Be paranoid... */
6567 if (source == NULL || dest == NULL)
6568 {
6569 EMSG(_(e_null));
6570 return 0;
6571 }
6572 if (prog_magic_wrong())
6573 return 0;
6574 src = source;
6575 dst = dest;
6576
6577 /*
6578 * When the substitute part starts with "\=" evaluate it as an expression.
6579 */
6580 if (source[0] == '\\' && source[1] == '='
6581#ifdef FEAT_EVAL
6582 && !can_f_submatch /* can't do this recursively */
6583#endif
6584 )
6585 {
6586#ifdef FEAT_EVAL
6587 /* To make sure that the length doesn't change between checking the
6588 * length and copying the string, and to speed up things, the
6589 * resulting string is saved from the call with "copy" == FALSE to the
6590 * call with "copy" == TRUE. */
6591 if (copy)
6592 {
6593 if (eval_result != NULL)
6594 {
6595 STRCPY(dest, eval_result);
6596 dst += STRLEN(eval_result);
6597 vim_free(eval_result);
6598 eval_result = NULL;
6599 }
6600 }
6601 else
6602 {
6603 linenr_T save_reg_maxline;
6604 win_T *save_reg_win;
6605 int save_ireg_ic;
6606
6607 vim_free(eval_result);
6608
6609 /* The expression may contain substitute(), which calls us
6610 * recursively. Make sure submatch() gets the text from the first
6611 * level. Don't need to save "reg_buf", because
6612 * vim_regexec_multi() can't be called recursively. */
6613 submatch_match = reg_match;
6614 submatch_mmatch = reg_mmatch;
6615 save_reg_maxline = reg_maxline;
6616 save_reg_win = reg_win;
6617 save_ireg_ic = ireg_ic;
6618 can_f_submatch = TRUE;
6619
6620 eval_result = eval_to_string(source + 2, NULL);
6621 if (eval_result != NULL)
6622 {
Bram Moolenaar1cd871b2004-12-19 22:46:22 +00006623 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar071d4272004-06-13 20:20:40 +00006624 {
6625 /* Change NL to CR, so that it becomes a line break.
6626 * Skip over a backslashed character. */
6627 if (*s == NL)
6628 *s = CAR;
6629 else if (*s == '\\' && s[1] != NUL)
6630 ++s;
Bram Moolenaar071d4272004-06-13 20:20:40 +00006631 }
6632
6633 dst += STRLEN(eval_result);
6634 }
6635
6636 reg_match = submatch_match;
6637 reg_mmatch = submatch_mmatch;
6638 reg_maxline = save_reg_maxline;
6639 reg_win = save_reg_win;
6640 ireg_ic = save_ireg_ic;
6641 can_f_submatch = FALSE;
6642 }
6643#endif
6644 }
6645 else
6646 while ((c = *src++) != NUL)
6647 {
6648 if (c == '&' && magic)
6649 no = 0;
6650 else if (c == '\\' && *src != NUL)
6651 {
6652 if (*src == '&' && !magic)
6653 {
6654 ++src;
6655 no = 0;
6656 }
6657 else if ('0' <= *src && *src <= '9')
6658 {
6659 no = *src++ - '0';
6660 }
6661 else if (vim_strchr((char_u *)"uUlLeE", *src))
6662 {
6663 switch (*src++)
6664 {
6665 case 'u': func = (fptr)do_upper;
6666 continue;
6667 case 'U': func = (fptr)do_Upper;
6668 continue;
6669 case 'l': func = (fptr)do_lower;
6670 continue;
6671 case 'L': func = (fptr)do_Lower;
6672 continue;
6673 case 'e':
6674 case 'E': func = (fptr)NULL;
6675 continue;
6676 }
6677 }
6678 }
6679 if (no < 0) /* Ordinary character. */
6680 {
6681 if (c == '\\' && *src != NUL)
6682 {
6683 /* Check for abbreviations -- webb */
6684 switch (*src)
6685 {
6686 case 'r': c = CAR; ++src; break;
6687 case 'n': c = NL; ++src; break;
6688 case 't': c = TAB; ++src; break;
6689 /* Oh no! \e already has meaning in subst pat :-( */
6690 /* case 'e': c = ESC; ++src; break; */
6691 case 'b': c = Ctrl_H; ++src; break;
6692
6693 /* If "backslash" is TRUE the backslash will be removed
6694 * later. Used to insert a literal CR. */
6695 default: if (backslash)
6696 {
6697 if (copy)
6698 *dst = '\\';
6699 ++dst;
6700 }
6701 c = *src++;
6702 }
6703 }
6704
6705 /* Write to buffer, if copy is set. */
6706#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006707 if (has_mbyte && (l = (*mb_ptr2len)(src - 1)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006708 {
6709 /* TODO: should use "func" here. */
6710 if (copy)
6711 mch_memmove(dst, src - 1, l);
6712 dst += l - 1;
6713 src += l - 1;
6714 }
6715 else
6716 {
6717#endif
6718 if (copy)
6719 {
6720 if (func == (fptr)NULL) /* just copy */
6721 *dst = c;
6722 else /* change case */
6723 func = (fptr)(func(dst, c));
6724 /* Turbo C complains without the typecast */
6725 }
6726#ifdef FEAT_MBYTE
6727 }
6728#endif
6729 dst++;
6730 }
6731 else
6732 {
6733 if (REG_MULTI)
6734 {
6735 clnum = reg_mmatch->startpos[no].lnum;
6736 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
6737 s = NULL;
6738 else
6739 {
6740 s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
6741 if (reg_mmatch->endpos[no].lnum == clnum)
6742 len = reg_mmatch->endpos[no].col
6743 - reg_mmatch->startpos[no].col;
6744 else
6745 len = (int)STRLEN(s);
6746 }
6747 }
6748 else
6749 {
6750 s = reg_match->startp[no];
6751 if (reg_match->endp[no] == NULL)
6752 s = NULL;
6753 else
6754 len = (int)(reg_match->endp[no] - s);
6755 }
6756 if (s != NULL)
6757 {
6758 for (;;)
6759 {
6760 if (len == 0)
6761 {
6762 if (REG_MULTI)
6763 {
6764 if (reg_mmatch->endpos[no].lnum == clnum)
6765 break;
6766 if (copy)
6767 *dst = CAR;
6768 ++dst;
6769 s = reg_getline(++clnum);
6770 if (reg_mmatch->endpos[no].lnum == clnum)
6771 len = reg_mmatch->endpos[no].col;
6772 else
6773 len = (int)STRLEN(s);
6774 }
6775 else
6776 break;
6777 }
6778 else if (*s == NUL) /* we hit NUL. */
6779 {
6780 if (copy)
6781 EMSG(_(e_re_damg));
6782 goto exit;
6783 }
6784 else
6785 {
6786 if (backslash && (*s == CAR || *s == '\\'))
6787 {
6788 /*
6789 * Insert a backslash in front of a CR, otherwise
6790 * it will be replaced by a line break.
6791 * Number of backslashes will be halved later,
6792 * double them here.
6793 */
6794 if (copy)
6795 {
6796 dst[0] = '\\';
6797 dst[1] = *s;
6798 }
6799 dst += 2;
6800 }
6801#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006802 else if (has_mbyte && (l = (*mb_ptr2len)(s)) > 1)
Bram Moolenaar071d4272004-06-13 20:20:40 +00006803 {
6804 /* TODO: should use "func" here. */
6805 if (copy)
6806 mch_memmove(dst, s, l);
6807 dst += l;
6808 s += l - 1;
6809 len -= l - 1;
6810 }
6811#endif
6812 else
6813 {
6814 if (copy)
6815 {
6816 if (func == (fptr)NULL) /* just copy */
6817 *dst = *s;
6818 else /* change case */
6819 func = (fptr)(func(dst, *s));
6820 /* Turbo C complains without the typecast */
6821 }
6822 ++dst;
6823 }
6824 ++s;
6825 --len;
6826 }
6827 }
6828 }
6829 no = -1;
6830 }
6831 }
6832 if (copy)
6833 *dst = NUL;
6834
6835exit:
6836 return (int)((dst - dest) + 1);
6837}
6838
6839#ifdef FEAT_EVAL
6840/*
6841 * Used for the submatch() function: get the string from tne n'th submatch in
6842 * allocated memory.
6843 * Returns NULL when not in a ":s" command and for a non-existing submatch.
6844 */
6845 char_u *
6846reg_submatch(no)
6847 int no;
6848{
6849 char_u *retval = NULL;
6850 char_u *s;
6851 int len;
6852 int round;
6853 linenr_T lnum;
6854
6855 if (!can_f_submatch)
6856 return NULL;
6857
6858 if (submatch_match == NULL)
6859 {
6860 /*
6861 * First round: compute the length and allocate memory.
6862 * Second round: copy the text.
6863 */
6864 for (round = 1; round <= 2; ++round)
6865 {
6866 lnum = submatch_mmatch->startpos[no].lnum;
6867 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
6868 return NULL;
6869
6870 s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
6871 if (s == NULL) /* anti-crash check, cannot happen? */
6872 break;
6873 if (submatch_mmatch->endpos[no].lnum == lnum)
6874 {
6875 /* Within one line: take form start to end col. */
6876 len = submatch_mmatch->endpos[no].col
6877 - submatch_mmatch->startpos[no].col;
6878 if (round == 2)
Bram Moolenaarbbebc852005-07-18 21:47:53 +00006879 vim_strncpy(retval, s, len);
Bram Moolenaar071d4272004-06-13 20:20:40 +00006880 ++len;
6881 }
6882 else
6883 {
6884 /* Multiple lines: take start line from start col, middle
6885 * lines completely and end line up to end col. */
6886 len = (int)STRLEN(s);
6887 if (round == 2)
6888 {
6889 STRCPY(retval, s);
6890 retval[len] = '\n';
6891 }
6892 ++len;
6893 ++lnum;
6894 while (lnum < submatch_mmatch->endpos[no].lnum)
6895 {
6896 s = reg_getline(lnum++);
6897 if (round == 2)
6898 STRCPY(retval + len, s);
6899 len += (int)STRLEN(s);
6900 if (round == 2)
6901 retval[len] = '\n';
6902 ++len;
6903 }
6904 if (round == 2)
6905 STRNCPY(retval + len, reg_getline(lnum),
6906 submatch_mmatch->endpos[no].col);
6907 len += submatch_mmatch->endpos[no].col;
6908 if (round == 2)
6909 retval[len] = NUL;
6910 ++len;
6911 }
6912
6913 if (round == 1)
6914 {
6915 retval = lalloc((long_u)len, TRUE);
6916 if (s == NULL)
6917 return NULL;
6918 }
6919 }
6920 }
6921 else
6922 {
6923 if (submatch_match->endp[no] == NULL)
6924 retval = NULL;
6925 else
6926 {
6927 s = submatch_match->startp[no];
6928 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
6929 }
6930 }
6931
6932 return retval;
6933}
6934#endif