blob: 2397af544f8b4ae2598368cac8b0df273db9f47c [file] [log] [blame]
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * Backtracking regular expression implementation.
4 *
5 * This file is included in "regexp.c".
6 *
7 * NOTICE:
8 *
9 * This is NOT the original regular expression code as written by Henry
10 * Spencer. This code has been modified specifically for use with the VIM
11 * editor, and should not be used separately from Vim. If you want a good
12 * regular expression library, get the original code. The copyright notice
13 * that follows is from the original.
14 *
15 * END NOTICE
16 *
17 * Copyright (c) 1986 by University of Toronto.
18 * Written by Henry Spencer. Not derived from licensed software.
19 *
20 * Permission is granted to anyone to use this software for any
21 * purpose on any computer system, and to redistribute it freely,
22 * subject to the following restrictions:
23 *
24 * 1. The author is not responsible for the consequences of use of
25 * this software, no matter how awful, even if they arise
26 * from defects in it.
27 *
28 * 2. The origin of this software must not be misrepresented, either
29 * by explicit claim or by omission.
30 *
31 * 3. Altered versions must be plainly marked as such, and must not
32 * be misrepresented as being the original software.
33 *
34 * Beware that some of this code is subtly aware of the way operator
35 * precedence is structured in regular expressions. Serious changes in
36 * regular-expression syntax might require a total rethink.
37 *
38 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
39 * Webb, Ciaran McCreesh and Bram Moolenaar.
40 * Named character class support added by Walter Briscoe (1998 Jul 01)
41 */
42
43/*
44 * The "internal use only" fields in regexp.h are present to pass info from
45 * compile to execute that permits the execute phase to run lots faster on
46 * simple cases. They are:
47 *
48 * regstart char that must begin a match; NUL if none obvious; Can be a
49 * multi-byte character.
50 * reganch is the match anchored (at beginning-of-line only)?
51 * regmust string (pointer into program) that match must include, or NULL
52 * regmlen length of regmust string
53 * regflags RF_ values or'ed together
54 *
55 * Regstart and reganch permit very fast decisions on suitable starting points
56 * for a match, cutting down the work a lot. Regmust permits fast rejection
57 * of lines that cannot possibly match. The regmust tests are costly enough
58 * that vim_regcomp() supplies a regmust only if the r.e. contains something
59 * potentially expensive (at present, the only such thing detected is * or +
60 * at the start of the r.e., which can involve a lot of backup). Regmlen is
61 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
62 * computing it anyway.
63 */
64
65/*
66 * Structure for regexp "program". This is essentially a linear encoding
67 * of a nondeterministic finite-state machine (aka syntax charts or
68 * "railroad normal form" in parsing technology). Each node is an opcode
69 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
70 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
71 * pointer with a BRANCH on both ends of it is connecting two alternatives.
72 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
73 * (as opposed to a collection of them) is never concatenated with anything
74 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
75 * node points to the node after the stuff to be repeated.
76 * The operand of some types of node is a literal string; for others, it is a
77 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
78 * is the first node of the branch.
79 * (NB this is *not* a tree structure: the tail of the branch connects to the
80 * thing following the set of BRANCHes.)
81 *
82 * pattern is coded like:
83 *
84 * +-----------------+
85 * | V
86 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
87 * | ^ | ^
88 * +------+ +----------+
89 *
90 *
91 * +------------------+
92 * V |
93 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
94 * | | ^ ^
95 * | +---------------+ |
96 * +---------------------------------------------+
97 *
98 *
99 * +----------------------+
100 * V |
101 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
102 * | | ^ ^
103 * | +-----------+ |
104 * +--------------------------------------------------+
105 *
106 *
107 * +-------------------------+
108 * V |
109 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
110 * | | ^
111 * | +----------------+
112 * +-----------------------------------------------+
113 *
114 *
115 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
116 * | | ^ ^
117 * | +----------------+ |
118 * +--------------------------------+
119 *
120 * +---------+
121 * | V
122 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
123 * | | | | ^ ^
124 * | | | +-----+ |
125 * | | +----------------+ |
126 * | +---------------------------+ |
127 * +------------------------------------------------------+
128 *
129 * They all start with a BRANCH for "\|" alternatives, even when there is only
130 * one alternative.
131 */
132
133/*
134 * The opcodes are:
135 */
136
137/* definition number opnd? meaning */
138#define END 0 /* End of program or NOMATCH operand. */
139#define BOL 1 /* Match "" at beginning of line. */
140#define EOL 2 /* Match "" at end of line. */
141#define BRANCH 3 /* node Match this alternative, or the
142 * next... */
143#define BACK 4 /* Match "", "next" ptr points backward. */
144#define EXACTLY 5 /* str Match this string. */
145#define NOTHING 6 /* Match empty string. */
146#define STAR 7 /* node Match this (simple) thing 0 or more
147 * times. */
148#define PLUS 8 /* node Match this (simple) thing 1 or more
149 * times. */
150#define MATCH 9 /* node match the operand zero-width */
151#define NOMATCH 10 /* node check for no match with operand */
152#define BEHIND 11 /* node look behind for a match with operand */
153#define NOBEHIND 12 /* node look behind for no match with operand */
154#define SUBPAT 13 /* node match the operand here */
155#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
156 * n times (\{m,n\}). */
157#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
158#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
159#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
160 * and BRACE_COMPLEX. */
161#define NEWL 18 /* Match line-break */
162#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
163
164
165/* character classes: 20-48 normal, 50-78 include a line-break */
166#define ADD_NL 30
167#define FIRST_NL ANY + ADD_NL
168#define ANY 20 /* Match any one character. */
169#define ANYOF 21 /* str Match any character in this string. */
170#define ANYBUT 22 /* str Match any character not in this
171 * string. */
172#define IDENT 23 /* Match identifier char */
173#define SIDENT 24 /* Match identifier char but no digit */
174#define KWORD 25 /* Match keyword char */
175#define SKWORD 26 /* Match word char but no digit */
176#define FNAME 27 /* Match file name char */
177#define SFNAME 28 /* Match file name char but no digit */
178#define PRINT 29 /* Match printable char */
179#define SPRINT 30 /* Match printable char but no digit */
180#define WHITE 31 /* Match whitespace char */
181#define NWHITE 32 /* Match non-whitespace char */
182#define DIGIT 33 /* Match digit char */
183#define NDIGIT 34 /* Match non-digit char */
184#define HEX 35 /* Match hex char */
185#define NHEX 36 /* Match non-hex char */
186#define OCTAL 37 /* Match octal char */
187#define NOCTAL 38 /* Match non-octal char */
188#define WORD 39 /* Match word char */
189#define NWORD 40 /* Match non-word char */
190#define HEAD 41 /* Match head char */
191#define NHEAD 42 /* Match non-head char */
192#define ALPHA 43 /* Match alpha char */
193#define NALPHA 44 /* Match non-alpha char */
194#define LOWER 45 /* Match lowercase char */
195#define NLOWER 46 /* Match non-lowercase char */
196#define UPPER 47 /* Match uppercase char */
197#define NUPPER 48 /* Match non-uppercase char */
198#define LAST_NL NUPPER + ADD_NL
199#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
200
201#define MOPEN 80 /* -89 Mark this point in input as start of
202 * \( subexpr. MOPEN + 0 marks start of
203 * match. */
204#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
205 * end of match. */
206#define BACKREF 100 /* -109 node Match same string again \1-\9 */
207
208#ifdef FEAT_SYN_HL
209# define ZOPEN 110 /* -119 Mark this point in input as start of
210 * \z( subexpr. */
211# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
212# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
213#endif
214
215#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
216
217#define NOPEN 150 /* Mark this point in input as start of
218 \%( subexpr. */
219#define NCLOSE 151 /* Analogous to NOPEN. */
220
221#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
222#define RE_BOF 201 /* Match "" at beginning of file. */
223#define RE_EOF 202 /* Match "" at end of file. */
224#define CURSOR 203 /* Match location of cursor. */
225
226#define RE_LNUM 204 /* nr cmp Match line number */
227#define RE_COL 205 /* nr cmp Match column number */
228#define RE_VCOL 206 /* nr cmp Match virtual column number */
229
230#define RE_MARK 207 /* mark cmp Match mark position */
231#define RE_VISUAL 208 /* Match Visual area */
232#define RE_COMPOSING 209 /* any composing characters */
233
234/*
235 * Flags to be passed up and down.
236 */
237#define HASWIDTH 0x1 /* Known never to match null string. */
238#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
239#define SPSTART 0x4 /* Starts with * or +. */
240#define HASNL 0x8 /* Contains some \n. */
241#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
242#define WORST 0 /* Worst case. */
243
244static int num_complex_braces; /* Complex \{...} count */
245static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */
246static long regsize; /* Code size. */
247static int reg_toolong; /* TRUE when offset out of range */
248static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
249static long brace_min[10]; /* Minimums for complex brace repeats */
250static long brace_max[10]; /* Maximums for complex brace repeats */
251static int brace_count[10]; /* Current counts for complex brace repeats */
252static int one_exactly = FALSE; /* only do one char for EXACTLY */
253
254/* When making changes to classchars also change nfa_classcodes. */
255static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
256static int classcodes[] = {
257 ANY, IDENT, SIDENT, KWORD, SKWORD,
258 FNAME, SFNAME, PRINT, SPRINT,
259 WHITE, NWHITE, DIGIT, NDIGIT,
260 HEX, NHEX, OCTAL, NOCTAL,
261 WORD, NWORD, HEAD, NHEAD,
262 ALPHA, NALPHA, LOWER, NLOWER,
263 UPPER, NUPPER
264};
265
266/*
267 * When regcode is set to this value, code is not emitted and size is computed
268 * instead.
269 */
270#define JUST_CALC_SIZE ((char_u *) -1)
271
272/* Values for rs_state in regitem_T. */
273typedef enum regstate_E
274{
275 RS_NOPEN = 0 /* NOPEN and NCLOSE */
276 , RS_MOPEN /* MOPEN + [0-9] */
277 , RS_MCLOSE /* MCLOSE + [0-9] */
278#ifdef FEAT_SYN_HL
279 , RS_ZOPEN /* ZOPEN + [0-9] */
280 , RS_ZCLOSE /* ZCLOSE + [0-9] */
281#endif
282 , RS_BRANCH /* BRANCH */
283 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */
284 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */
285 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */
286 , RS_NOMATCH /* NOMATCH */
287 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */
288 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */
289 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */
290 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */
291} regstate_T;
292
293/*
294 * Structure used to save the current input state, when it needs to be
295 * restored after trying a match. Used by reg_save() and reg_restore().
296 * Also stores the length of "backpos".
297 */
298typedef struct
299{
300 union
301 {
302 char_u *ptr; /* rex.input pointer, for single-line regexp */
303 lpos_T pos; /* rex.input pos, for multi-line regexp */
304 } rs_u;
305 int rs_len;
306} regsave_T;
307
308/* struct to save start/end pointer/position in for \(\) */
309typedef struct
310{
311 union
312 {
313 char_u *ptr;
314 lpos_T pos;
315 } se_u;
316} save_se_T;
317
318/* used for BEHIND and NOBEHIND matching */
319typedef struct regbehind_S
320{
321 regsave_T save_after;
322 regsave_T save_behind;
323 int save_need_clear_subexpr;
324 save_se_T save_start[NSUBEXP];
325 save_se_T save_end[NSUBEXP];
326} regbehind_T;
327
328/*
329 * When there are alternatives a regstate_T is put on the regstack to remember
330 * what we are doing.
331 * Before it may be another type of item, depending on rs_state, to remember
332 * more things.
333 */
334typedef struct regitem_S
335{
336 regstate_T rs_state; // what we are doing, one of RS_ above
337 short rs_no; // submatch nr or BEHIND/NOBEHIND
338 char_u *rs_scan; // current node in program
339 union
340 {
341 save_se_T sesave;
342 regsave_T regsave;
343 } rs_un; // room for saving rex.input
344} regitem_T;
345
346
347/* used for STAR, PLUS and BRACE_SIMPLE matching */
348typedef struct regstar_S
349{
350 int nextb; /* next byte */
351 int nextb_ic; /* next byte reverse case */
352 long count;
353 long minval;
354 long maxval;
355} regstar_T;
356
357/* used to store input position when a BACK was encountered, so that we now if
358 * we made any progress since the last time. */
359typedef struct backpos_S
360{
361 char_u *bp_scan; /* "scan" where BACK was encountered */
362 regsave_T bp_pos; /* last input position */
363} backpos_T;
364
365/*
366 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
367 * to avoid invoking malloc() and free() often.
368 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
369 * or regbehind_T.
370 * "backpos_T" is a table with backpos_T for BACK
371 */
372static garray_T regstack = {0, 0, 0, 0, NULL};
373static garray_T backpos = {0, 0, 0, 0, NULL};
374
375static regsave_T behind_pos;
376
377/*
378 * Both for regstack and backpos tables we use the following strategy of
379 * allocation (to reduce malloc/free calls):
380 * - Initial size is fairly small.
381 * - When needed, the tables are grown bigger (8 times at first, double after
382 * that).
383 * - After executing the match we free the memory only if the array has grown.
384 * Thus the memory is kept allocated when it's at the initial size.
385 * This makes it fast while not keeping a lot of memory allocated.
386 * A three times speed increase was observed when using many simple patterns.
387 */
388#define REGSTACK_INITIAL 2048
389#define BACKPOS_INITIAL 64
390
391/*
392 * Opcode notes:
393 *
394 * BRANCH The set of branches constituting a single choice are hooked
395 * together with their "next" pointers, since precedence prevents
396 * anything being concatenated to any individual branch. The
397 * "next" pointer of the last BRANCH in a choice points to the
398 * thing following the whole choice. This is also where the
399 * final "next" pointer of each individual branch points; each
400 * branch starts with the operand node of a BRANCH node.
401 *
402 * BACK Normal "next" pointers all implicitly point forward; BACK
403 * exists to make loop structures possible.
404 *
405 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
406 * BRANCH structures using BACK. Simple cases (one character
407 * per match) are implemented with STAR and PLUS for speed
408 * and to minimize recursive plunges.
409 *
410 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
411 * node, and defines the min and max limits to be used for that
412 * node.
413 *
414 * MOPEN,MCLOSE ...are numbered at compile time.
415 * ZOPEN,ZCLOSE ...ditto
416 */
417
418/*
419 * A node is one char of opcode followed by two chars of "next" pointer.
420 * "Next" pointers are stored as two 8-bit bytes, high order first. The
421 * value is a positive offset from the opcode of the node containing it.
422 * An operand, if any, simply follows the node. (Note that much of the
423 * code generation knows about this implicit relationship.)
424 *
425 * Using two bytes for the "next" pointer is vast overkill for most things,
426 * but allows patterns to get big without disasters.
427 */
428#define OP(p) ((int)*(p))
429#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
430#define OPERAND(p) ((p) + 3)
431/* Obtain an operand that was stored as four bytes, MSB first. */
432#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
433 + ((long)(p)[5] << 8) + (long)(p)[6])
434/* Obtain a second operand stored as four bytes. */
435#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
436/* Obtain a second single-byte operand stored after a four bytes operand. */
437#define OPERAND_CMP(p) (p)[7]
438
439static char_u *reg(int paren, int *flagp);
440
441#ifdef BT_REGEXP_DUMP
442static void regdump(char_u *, bt_regprog_T *);
443#endif
444
445static int re_num_cmp(long_u val, char_u *scan);
446
447#ifdef DEBUG
448static char_u *regprop(char_u *);
449
450static int regnarrate = 0;
451#endif
452
453
454/*
455 * Setup to parse the regexp. Used once to get the length and once to do it.
456 */
457 static void
458regcomp_start(
459 char_u *expr,
460 int re_flags) /* see vim_regcomp() */
461{
462 initchr(expr);
463 if (re_flags & RE_MAGIC)
464 reg_magic = MAGIC_ON;
465 else
466 reg_magic = MAGIC_OFF;
467 reg_string = (re_flags & RE_STRING);
468 reg_strict = (re_flags & RE_STRICT);
469 get_cpo_flags();
470
471 num_complex_braces = 0;
472 regnpar = 1;
473 vim_memset(had_endbrace, 0, sizeof(had_endbrace));
474#ifdef FEAT_SYN_HL
475 regnzpar = 1;
476 re_has_z = 0;
477#endif
478 regsize = 0L;
479 reg_toolong = FALSE;
480 regflags = 0;
481#if defined(FEAT_SYN_HL) || defined(PROTO)
482 had_eol = FALSE;
483#endif
484}
485
486/*
487 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
488 * character "c".
489 */
490 static int
491use_multibytecode(int c)
492{
493 return has_mbyte && (*mb_char2len)(c) > 1
494 && (re_multi_type(peekchr()) != NOT_MULTI
495 || (enc_utf8 && utf_iscomposing(c)));
496}
497
498/*
499 * Emit (if appropriate) a byte of code
500 */
501 static void
502regc(int b)
503{
504 if (regcode == JUST_CALC_SIZE)
505 regsize++;
506 else
507 *regcode++ = b;
508}
509
510/*
511 * Emit (if appropriate) a multi-byte character of code
512 */
513 static void
514regmbc(int c)
515{
516 if (!has_mbyte && c > 0xff)
517 return;
518 if (regcode == JUST_CALC_SIZE)
519 regsize += (*mb_char2len)(c);
520 else
521 regcode += (*mb_char2bytes)(c, regcode);
522}
523
524#define REGMBC(x) regmbc(x);
525#define CASEMBC(x) case x:
526
527/*
528 * Produce the bytes for equivalence class "c".
529 * Currently only handles latin1, latin9 and utf-8.
530 * NOTE: When changing this function, also change nfa_emit_equi_class()
531 */
532 static void
533reg_equi_class(int c)
534{
535 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
536 || STRCMP(p_enc, "iso-8859-15") == 0)
537 {
538#ifdef EBCDIC
539 int i;
540
541 /* This might be slower than switch/case below. */
542 for (i = 0; i < 16; i++)
543 {
544 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
545 {
546 char *p = EQUIVAL_CLASS_C[i];
547
548 while (*p != 0)
549 regmbc(*p++);
550 return;
551 }
552 }
553#else
554 switch (c)
555 {
556 /* Do not use '\300' style, it results in a negative number. */
557 case 'A': case 0xc0: case 0xc1: case 0xc2:
558 case 0xc3: case 0xc4: case 0xc5:
559 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
560 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
561 regmbc('A'); regmbc(0xc0); regmbc(0xc1);
562 regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
563 regmbc(0xc5);
564 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
565 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
566 REGMBC(0x1ea2)
567 return;
568 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
569 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
570 return;
571 case 'C': case 0xc7:
572 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
573 regmbc('C'); regmbc(0xc7);
574 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
575 REGMBC(0x10c)
576 return;
577 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
578 CASEMBC(0x1e0e) CASEMBC(0x1e10)
579 regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
580 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
581 return;
582 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
583 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
584 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
585 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
586 regmbc(0xca); regmbc(0xcb);
587 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
588 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
589 REGMBC(0x1ebc)
590 return;
591 case 'F': CASEMBC(0x1e1e)
592 regmbc('F'); REGMBC(0x1e1e)
593 return;
594 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
595 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
596 CASEMBC(0x1e20)
597 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
598 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
599 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
600 return;
601 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
602 CASEMBC(0x1e26) CASEMBC(0x1e28)
603 regmbc('H'); REGMBC(0x124) REGMBC(0x126)
604 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
605 return;
606 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
607 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
608 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
609 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
610 regmbc(0xce); regmbc(0xcf);
611 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
612 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
613 REGMBC(0x1ec8)
614 return;
615 case 'J': CASEMBC(0x134)
616 regmbc('J'); REGMBC(0x134)
617 return;
618 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
619 CASEMBC(0x1e34)
620 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
621 REGMBC(0x1e30) REGMBC(0x1e34)
622 return;
623 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
624 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
625 regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
626 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
627 REGMBC(0x1e3a)
628 return;
629 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
630 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
631 return;
632 case 'N': case 0xd1:
633 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
634 CASEMBC(0x1e48)
635 regmbc('N'); regmbc(0xd1);
636 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
637 REGMBC(0x1e44) REGMBC(0x1e48)
638 return;
639 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
640 case 0xd6: case 0xd8:
641 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
642 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
643 regmbc('O'); regmbc(0xd2); regmbc(0xd3);
644 regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
645 regmbc(0xd8);
646 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
647 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
648 REGMBC(0x1ec) REGMBC(0x1ece)
649 return;
650 case 'P': case 0x1e54: case 0x1e56:
651 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
652 return;
653 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
654 CASEMBC(0x1e58) CASEMBC(0x1e5e)
655 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
656 REGMBC(0x1e58) REGMBC(0x1e5e)
657 return;
658 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
659 CASEMBC(0x160) CASEMBC(0x1e60)
660 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
661 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
662 return;
663 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
664 CASEMBC(0x1e6a) CASEMBC(0x1e6e)
665 regmbc('T'); REGMBC(0x162) REGMBC(0x164)
666 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
667 return;
668 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
669 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
670 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
671 CASEMBC(0x1ee6)
672 regmbc('U'); regmbc(0xd9); regmbc(0xda);
673 regmbc(0xdb); regmbc(0xdc);
674 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
675 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
676 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
677 return;
678 case 'V': CASEMBC(0x1e7c)
679 regmbc('V'); REGMBC(0x1e7c)
680 return;
681 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
682 CASEMBC(0x1e84) CASEMBC(0x1e86)
683 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
684 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
685 return;
686 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
687 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
688 return;
689 case 'Y': case 0xdd:
690 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
691 CASEMBC(0x1ef6) CASEMBC(0x1ef8)
692 regmbc('Y'); regmbc(0xdd);
693 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
694 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
695 return;
696 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
697 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
698 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
699 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
700 REGMBC(0x1e94)
701 return;
702 case 'a': case 0xe0: case 0xe1: case 0xe2:
703 case 0xe3: case 0xe4: case 0xe5:
704 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
705 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
706 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
707 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
708 regmbc(0xe5);
709 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
710 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
711 REGMBC(0x1ea3)
712 return;
713 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
714 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
715 return;
716 case 'c': case 0xe7:
717 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
718 regmbc('c'); regmbc(0xe7);
719 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
720 REGMBC(0x10d)
721 return;
722 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
723 CASEMBC(0x1e0f) CASEMBC(0x1e11)
724 regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
725 REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
726 return;
727 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
728 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
729 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
730 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
731 regmbc(0xea); regmbc(0xeb);
732 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
733 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
734 REGMBC(0x1ebd)
735 return;
736 case 'f': CASEMBC(0x1e1f)
737 regmbc('f'); REGMBC(0x1e1f)
738 return;
739 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
740 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
741 CASEMBC(0x1e21)
742 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
743 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
744 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
745 return;
746 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
747 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
748 regmbc('h'); REGMBC(0x125) REGMBC(0x127)
749 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
750 REGMBC(0x1e96)
751 return;
752 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
753 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
754 CASEMBC(0x1d0) CASEMBC(0x1ec9)
755 regmbc('i'); regmbc(0xec); regmbc(0xed);
756 regmbc(0xee); regmbc(0xef);
757 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
758 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
759 return;
760 case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
761 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
762 return;
763 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
764 CASEMBC(0x1e35)
765 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
766 REGMBC(0x1e31) REGMBC(0x1e35)
767 return;
768 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
769 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
770 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
771 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
772 REGMBC(0x1e3b)
773 return;
774 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
775 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
776 return;
777 case 'n': case 0xf1:
778 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
779 CASEMBC(0x1e45) CASEMBC(0x1e49)
780 regmbc('n'); regmbc(0xf1);
781 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
782 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
783 return;
784 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
785 case 0xf6: case 0xf8:
786 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
787 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
788 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
789 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
790 regmbc(0xf8);
791 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
792 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
793 REGMBC(0x1ed) REGMBC(0x1ecf)
794 return;
795 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
796 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
797 return;
798 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
799 CASEMBC(0x1e59) CASEMBC(0x1e5f)
800 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
801 REGMBC(0x1e59) REGMBC(0x1e5f)
802 return;
803 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
804 CASEMBC(0x161) CASEMBC(0x1e61)
805 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
806 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
807 return;
808 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
809 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
810 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
811 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
812 return;
813 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
814 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
815 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
816 CASEMBC(0x1ee7)
817 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
818 regmbc(0xfb); regmbc(0xfc);
819 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
820 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
821 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
822 return;
823 case 'v': CASEMBC(0x1e7d)
824 regmbc('v'); REGMBC(0x1e7d)
825 return;
826 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
827 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
828 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
829 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
830 REGMBC(0x1e98)
831 return;
832 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
833 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
834 return;
835 case 'y': case 0xfd: case 0xff:
836 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
837 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
838 regmbc('y'); regmbc(0xfd); regmbc(0xff);
839 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
840 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
841 return;
842 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
843 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
844 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
845 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
846 REGMBC(0x1e95)
847 return;
848 }
849#endif
850 }
851 regmbc(c);
852}
853
854/*
855 * Emit a node.
856 * Return pointer to generated code.
857 */
858 static char_u *
859regnode(int op)
860{
861 char_u *ret;
862
863 ret = regcode;
864 if (ret == JUST_CALC_SIZE)
865 regsize += 3;
866 else
867 {
868 *regcode++ = op;
869 *regcode++ = NUL; /* Null "next" pointer. */
870 *regcode++ = NUL;
871 }
872 return ret;
873}
874
875/*
876 * Write a long as four bytes at "p" and return pointer to the next char.
877 */
878 static char_u *
879re_put_long(char_u *p, long_u val)
880{
881 *p++ = (char_u) ((val >> 24) & 0377);
882 *p++ = (char_u) ((val >> 16) & 0377);
883 *p++ = (char_u) ((val >> 8) & 0377);
884 *p++ = (char_u) (val & 0377);
885 return p;
886}
887
888/*
889 * regnext - dig the "next" pointer out of a node
890 * Returns NULL when calculating size, when there is no next item and when
891 * there is an error.
892 */
893 static char_u *
894regnext(char_u *p)
895{
896 int offset;
897
898 if (p == JUST_CALC_SIZE || reg_toolong)
899 return NULL;
900
901 offset = NEXT(p);
902 if (offset == 0)
903 return NULL;
904
905 if (OP(p) == BACK)
906 return p - offset;
907 else
908 return p + offset;
909}
910
911/*
912 * Set the next-pointer at the end of a node chain.
913 */
914 static void
915regtail(char_u *p, char_u *val)
916{
917 char_u *scan;
918 char_u *temp;
919 int offset;
920
921 if (p == JUST_CALC_SIZE)
922 return;
923
924 /* Find last node. */
925 scan = p;
926 for (;;)
927 {
928 temp = regnext(scan);
929 if (temp == NULL)
930 break;
931 scan = temp;
932 }
933
934 if (OP(scan) == BACK)
935 offset = (int)(scan - val);
936 else
937 offset = (int)(val - scan);
938 /* When the offset uses more than 16 bits it can no longer fit in the two
939 * bytes available. Use a global flag to avoid having to check return
940 * values in too many places. */
941 if (offset > 0xffff)
942 reg_toolong = TRUE;
943 else
944 {
945 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
946 *(scan + 2) = (char_u) (offset & 0377);
947 }
948}
949
950/*
951 * Like regtail, on item after a BRANCH; nop if none.
952 */
953 static void
954regoptail(char_u *p, char_u *val)
955{
956 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
957 if (p == NULL || p == JUST_CALC_SIZE
958 || (OP(p) != BRANCH
959 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
960 return;
961 regtail(OPERAND(p), val);
962}
963
964/*
965 * Insert an operator in front of already-emitted operand
966 *
967 * Means relocating the operand.
968 */
969 static void
970reginsert(int op, char_u *opnd)
971{
972 char_u *src;
973 char_u *dst;
974 char_u *place;
975
976 if (regcode == JUST_CALC_SIZE)
977 {
978 regsize += 3;
979 return;
980 }
981 src = regcode;
982 regcode += 3;
983 dst = regcode;
984 while (src > opnd)
985 *--dst = *--src;
986
987 place = opnd; /* Op node, where operand used to be. */
988 *place++ = op;
989 *place++ = NUL;
990 *place = NUL;
991}
992
993/*
994 * Insert an operator in front of already-emitted operand.
995 * Add a number to the operator.
996 */
997 static void
998reginsert_nr(int op, long val, char_u *opnd)
999{
1000 char_u *src;
1001 char_u *dst;
1002 char_u *place;
1003
1004 if (regcode == JUST_CALC_SIZE)
1005 {
1006 regsize += 7;
1007 return;
1008 }
1009 src = regcode;
1010 regcode += 7;
1011 dst = regcode;
1012 while (src > opnd)
1013 *--dst = *--src;
1014
1015 place = opnd; /* Op node, where operand used to be. */
1016 *place++ = op;
1017 *place++ = NUL;
1018 *place++ = NUL;
1019 re_put_long(place, (long_u)val);
1020}
1021
1022/*
1023 * Insert an operator in front of already-emitted operand.
1024 * The operator has the given limit values as operands. Also set next pointer.
1025 *
1026 * Means relocating the operand.
1027 */
1028 static void
1029reginsert_limits(
1030 int op,
1031 long minval,
1032 long maxval,
1033 char_u *opnd)
1034{
1035 char_u *src;
1036 char_u *dst;
1037 char_u *place;
1038
1039 if (regcode == JUST_CALC_SIZE)
1040 {
1041 regsize += 11;
1042 return;
1043 }
1044 src = regcode;
1045 regcode += 11;
1046 dst = regcode;
1047 while (src > opnd)
1048 *--dst = *--src;
1049
1050 place = opnd; /* Op node, where operand used to be. */
1051 *place++ = op;
1052 *place++ = NUL;
1053 *place++ = NUL;
1054 place = re_put_long(place, (long_u)minval);
1055 place = re_put_long(place, (long_u)maxval);
1056 regtail(opnd, place);
1057}
1058
1059/*
1060 * Return TRUE if the back reference is legal. We must have seen the close
1061 * brace.
1062 * TODO: Should also check that we don't refer to something that is repeated
1063 * (+*=): what instance of the repetition should we match?
1064 */
1065 static int
1066seen_endbrace(int refnum)
1067{
1068 if (!had_endbrace[refnum])
1069 {
1070 char_u *p;
1071
1072 /* Trick: check if "@<=" or "@<!" follows, in which case
1073 * the \1 can appear before the referenced match. */
1074 for (p = regparse; *p != NUL; ++p)
1075 if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '='))
1076 break;
1077 if (*p == NUL)
1078 {
1079 emsg(_("E65: Illegal back reference"));
1080 rc_did_emsg = TRUE;
1081 return FALSE;
1082 }
1083 }
1084 return TRUE;
1085}
1086
1087/*
1088 * Parse the lowest level.
1089 *
1090 * Optimization: gobbles an entire sequence of ordinary characters so that
1091 * it can turn them into a single node, which is smaller to store and
1092 * faster to run. Don't do this when one_exactly is set.
1093 */
1094 static char_u *
1095regatom(int *flagp)
1096{
1097 char_u *ret;
1098 int flags;
1099 int c;
1100 char_u *p;
1101 int extra = 0;
1102 int save_prev_at_start = prev_at_start;
1103
1104 *flagp = WORST; /* Tentatively. */
1105
1106 c = getchr();
1107 switch (c)
1108 {
1109 case Magic('^'):
1110 ret = regnode(BOL);
1111 break;
1112
1113 case Magic('$'):
1114 ret = regnode(EOL);
1115#if defined(FEAT_SYN_HL) || defined(PROTO)
1116 had_eol = TRUE;
1117#endif
1118 break;
1119
1120 case Magic('<'):
1121 ret = regnode(BOW);
1122 break;
1123
1124 case Magic('>'):
1125 ret = regnode(EOW);
1126 break;
1127
1128 case Magic('_'):
1129 c = no_Magic(getchr());
1130 if (c == '^') /* "\_^" is start-of-line */
1131 {
1132 ret = regnode(BOL);
1133 break;
1134 }
1135 if (c == '$') /* "\_$" is end-of-line */
1136 {
1137 ret = regnode(EOL);
1138#if defined(FEAT_SYN_HL) || defined(PROTO)
1139 had_eol = TRUE;
1140#endif
1141 break;
1142 }
1143
1144 extra = ADD_NL;
1145 *flagp |= HASNL;
1146
1147 /* "\_[" is character range plus newline */
1148 if (c == '[')
1149 goto collection;
1150
1151 /* "\_x" is character class plus newline */
1152 /* FALLTHROUGH */
1153
1154 /*
1155 * Character classes.
1156 */
1157 case Magic('.'):
1158 case Magic('i'):
1159 case Magic('I'):
1160 case Magic('k'):
1161 case Magic('K'):
1162 case Magic('f'):
1163 case Magic('F'):
1164 case Magic('p'):
1165 case Magic('P'):
1166 case Magic('s'):
1167 case Magic('S'):
1168 case Magic('d'):
1169 case Magic('D'):
1170 case Magic('x'):
1171 case Magic('X'):
1172 case Magic('o'):
1173 case Magic('O'):
1174 case Magic('w'):
1175 case Magic('W'):
1176 case Magic('h'):
1177 case Magic('H'):
1178 case Magic('a'):
1179 case Magic('A'):
1180 case Magic('l'):
1181 case Magic('L'):
1182 case Magic('u'):
1183 case Magic('U'):
1184 p = vim_strchr(classchars, no_Magic(c));
1185 if (p == NULL)
1186 EMSG_RET_NULL(_("E63: invalid use of \\_"));
1187
1188 /* When '.' is followed by a composing char ignore the dot, so that
1189 * the composing char is matched here. */
1190 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1191 {
1192 c = getchr();
1193 goto do_multibyte;
1194 }
1195 ret = regnode(classcodes[p - classchars] + extra);
1196 *flagp |= HASWIDTH | SIMPLE;
1197 break;
1198
1199 case Magic('n'):
1200 if (reg_string)
1201 {
1202 /* In a string "\n" matches a newline character. */
1203 ret = regnode(EXACTLY);
1204 regc(NL);
1205 regc(NUL);
1206 *flagp |= HASWIDTH | SIMPLE;
1207 }
1208 else
1209 {
1210 /* In buffer text "\n" matches the end of a line. */
1211 ret = regnode(NEWL);
1212 *flagp |= HASWIDTH | HASNL;
1213 }
1214 break;
1215
1216 case Magic('('):
1217 if (one_exactly)
1218 EMSG_ONE_RET_NULL;
1219 ret = reg(REG_PAREN, &flags);
1220 if (ret == NULL)
1221 return NULL;
1222 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1223 break;
1224
1225 case NUL:
1226 case Magic('|'):
1227 case Magic('&'):
1228 case Magic(')'):
1229 if (one_exactly)
1230 EMSG_ONE_RET_NULL;
1231 IEMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
1232 /* NOTREACHED */
1233
1234 case Magic('='):
1235 case Magic('?'):
1236 case Magic('+'):
1237 case Magic('@'):
1238 case Magic('{'):
1239 case Magic('*'):
1240 c = no_Magic(c);
1241 EMSG3_RET_NULL(_("E64: %s%c follows nothing"),
1242 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL), c);
1243 /* NOTREACHED */
1244
1245 case Magic('~'): /* previous substitute pattern */
1246 if (reg_prev_sub != NULL)
1247 {
1248 char_u *lp;
1249
1250 ret = regnode(EXACTLY);
1251 lp = reg_prev_sub;
1252 while (*lp != NUL)
1253 regc(*lp++);
1254 regc(NUL);
1255 if (*reg_prev_sub != NUL)
1256 {
1257 *flagp |= HASWIDTH;
1258 if ((lp - reg_prev_sub) == 1)
1259 *flagp |= SIMPLE;
1260 }
1261 }
1262 else
1263 EMSG_RET_NULL(_(e_nopresub));
1264 break;
1265
1266 case Magic('1'):
1267 case Magic('2'):
1268 case Magic('3'):
1269 case Magic('4'):
1270 case Magic('5'):
1271 case Magic('6'):
1272 case Magic('7'):
1273 case Magic('8'):
1274 case Magic('9'):
1275 {
1276 int refnum;
1277
1278 refnum = c - Magic('0');
1279 if (!seen_endbrace(refnum))
1280 return NULL;
1281 ret = regnode(BACKREF + refnum);
1282 }
1283 break;
1284
1285 case Magic('z'):
1286 {
1287 c = no_Magic(getchr());
1288 switch (c)
1289 {
1290#ifdef FEAT_SYN_HL
1291 case '(': if ((reg_do_extmatch & REX_SET) == 0)
1292 EMSG_RET_NULL(_(e_z_not_allowed));
1293 if (one_exactly)
1294 EMSG_ONE_RET_NULL;
1295 ret = reg(REG_ZPAREN, &flags);
1296 if (ret == NULL)
1297 return NULL;
1298 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1299 re_has_z = REX_SET;
1300 break;
1301
1302 case '1':
1303 case '2':
1304 case '3':
1305 case '4':
1306 case '5':
1307 case '6':
1308 case '7':
1309 case '8':
1310 case '9': if ((reg_do_extmatch & REX_USE) == 0)
1311 EMSG_RET_NULL(_(e_z1_not_allowed));
1312 ret = regnode(ZREF + c - '0');
1313 re_has_z = REX_USE;
1314 break;
1315#endif
1316
1317 case 's': ret = regnode(MOPEN + 0);
1318 if (re_mult_next("\\zs") == FAIL)
1319 return NULL;
1320 break;
1321
1322 case 'e': ret = regnode(MCLOSE + 0);
1323 if (re_mult_next("\\ze") == FAIL)
1324 return NULL;
1325 break;
1326
1327 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1328 }
1329 }
1330 break;
1331
1332 case Magic('%'):
1333 {
1334 c = no_Magic(getchr());
1335 switch (c)
1336 {
1337 /* () without a back reference */
1338 case '(':
1339 if (one_exactly)
1340 EMSG_ONE_RET_NULL;
1341 ret = reg(REG_NPAREN, &flags);
1342 if (ret == NULL)
1343 return NULL;
1344 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1345 break;
1346
1347 /* Catch \%^ and \%$ regardless of where they appear in the
1348 * pattern -- regardless of whether or not it makes sense. */
1349 case '^':
1350 ret = regnode(RE_BOF);
1351 break;
1352
1353 case '$':
1354 ret = regnode(RE_EOF);
1355 break;
1356
1357 case '#':
1358 ret = regnode(CURSOR);
1359 break;
1360
1361 case 'V':
1362 ret = regnode(RE_VISUAL);
1363 break;
1364
1365 case 'C':
1366 ret = regnode(RE_COMPOSING);
1367 break;
1368
1369 /* \%[abc]: Emit as a list of branches, all ending at the last
1370 * branch which matches nothing. */
1371 case '[':
1372 if (one_exactly) /* doesn't nest */
1373 EMSG_ONE_RET_NULL;
1374 {
1375 char_u *lastbranch;
1376 char_u *lastnode = NULL;
1377 char_u *br;
1378
1379 ret = NULL;
1380 while ((c = getchr()) != ']')
1381 {
1382 if (c == NUL)
1383 EMSG2_RET_NULL(_(e_missing_sb),
1384 reg_magic == MAGIC_ALL);
1385 br = regnode(BRANCH);
1386 if (ret == NULL)
1387 ret = br;
1388 else
1389 {
1390 regtail(lastnode, br);
1391 if (reg_toolong)
1392 return NULL;
1393 }
1394
1395 ungetchr();
1396 one_exactly = TRUE;
1397 lastnode = regatom(flagp);
1398 one_exactly = FALSE;
1399 if (lastnode == NULL)
1400 return NULL;
1401 }
1402 if (ret == NULL)
1403 EMSG2_RET_NULL(_(e_empty_sb),
1404 reg_magic == MAGIC_ALL);
1405 lastbranch = regnode(BRANCH);
1406 br = regnode(NOTHING);
1407 if (ret != JUST_CALC_SIZE)
1408 {
1409 regtail(lastnode, br);
1410 regtail(lastbranch, br);
1411 /* connect all branches to the NOTHING
1412 * branch at the end */
1413 for (br = ret; br != lastnode; )
1414 {
1415 if (OP(br) == BRANCH)
1416 {
1417 regtail(br, lastbranch);
1418 if (reg_toolong)
1419 return NULL;
1420 br = OPERAND(br);
1421 }
1422 else
1423 br = regnext(br);
1424 }
1425 }
1426 *flagp &= ~(HASWIDTH | SIMPLE);
1427 break;
1428 }
1429
1430 case 'd': /* %d123 decimal */
1431 case 'o': /* %o123 octal */
1432 case 'x': /* %xab hex 2 */
1433 case 'u': /* %uabcd hex 4 */
1434 case 'U': /* %U1234abcd hex 8 */
1435 {
1436 long i;
1437
1438 switch (c)
1439 {
1440 case 'd': i = getdecchrs(); break;
1441 case 'o': i = getoctchrs(); break;
1442 case 'x': i = gethexchrs(2); break;
1443 case 'u': i = gethexchrs(4); break;
1444 case 'U': i = gethexchrs(8); break;
1445 default: i = -1; break;
1446 }
1447
1448 if (i < 0 || i > INT_MAX)
1449 EMSG2_RET_NULL(
1450 _("E678: Invalid character after %s%%[dxouU]"),
1451 reg_magic == MAGIC_ALL);
1452 if (use_multibytecode(i))
1453 ret = regnode(MULTIBYTECODE);
1454 else
1455 ret = regnode(EXACTLY);
1456 if (i == 0)
1457 regc(0x0a);
1458 else
1459 regmbc(i);
1460 regc(NUL);
1461 *flagp |= HASWIDTH;
1462 break;
1463 }
1464
1465 default:
1466 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1467 || c == '\'')
1468 {
1469 long_u n = 0;
1470 int cmp;
1471
1472 cmp = c;
1473 if (cmp == '<' || cmp == '>')
1474 c = getchr();
1475 while (VIM_ISDIGIT(c))
1476 {
1477 n = n * 10 + (c - '0');
1478 c = getchr();
1479 }
1480 if (c == '\'' && n == 0)
1481 {
1482 /* "\%'m", "\%<'m" and "\%>'m": Mark */
1483 c = getchr();
1484 ret = regnode(RE_MARK);
1485 if (ret == JUST_CALC_SIZE)
1486 regsize += 2;
1487 else
1488 {
1489 *regcode++ = c;
1490 *regcode++ = cmp;
1491 }
1492 break;
1493 }
1494 else if (c == 'l' || c == 'c' || c == 'v')
1495 {
1496 if (c == 'l')
1497 {
1498 ret = regnode(RE_LNUM);
1499 if (save_prev_at_start)
1500 at_start = TRUE;
1501 }
1502 else if (c == 'c')
1503 ret = regnode(RE_COL);
1504 else
1505 ret = regnode(RE_VCOL);
1506 if (ret == JUST_CALC_SIZE)
1507 regsize += 5;
1508 else
1509 {
1510 /* put the number and the optional
1511 * comparator after the opcode */
1512 regcode = re_put_long(regcode, n);
1513 *regcode++ = cmp;
1514 }
1515 break;
1516 }
1517 }
1518
1519 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
1520 reg_magic == MAGIC_ALL);
1521 }
1522 }
1523 break;
1524
1525 case Magic('['):
1526collection:
1527 {
1528 char_u *lp;
1529
1530 /*
1531 * If there is no matching ']', we assume the '[' is a normal
1532 * character. This makes 'incsearch' and ":help [" work.
1533 */
1534 lp = skip_anyof(regparse);
1535 if (*lp == ']') /* there is a matching ']' */
1536 {
1537 int startc = -1; /* > 0 when next '-' is a range */
1538 int endc;
1539
1540 /*
1541 * In a character class, different parsing rules apply.
1542 * Not even \ is special anymore, nothing is.
1543 */
1544 if (*regparse == '^') /* Complement of range. */
1545 {
1546 ret = regnode(ANYBUT + extra);
1547 regparse++;
1548 }
1549 else
1550 ret = regnode(ANYOF + extra);
1551
1552 /* At the start ']' and '-' mean the literal character. */
1553 if (*regparse == ']' || *regparse == '-')
1554 {
1555 startc = *regparse;
1556 regc(*regparse++);
1557 }
1558
1559 while (*regparse != NUL && *regparse != ']')
1560 {
1561 if (*regparse == '-')
1562 {
1563 ++regparse;
1564 /* The '-' is not used for a range at the end and
1565 * after or before a '\n'. */
1566 if (*regparse == ']' || *regparse == NUL
1567 || startc == -1
1568 || (regparse[0] == '\\' && regparse[1] == 'n'))
1569 {
1570 regc('-');
1571 startc = '-'; /* [--x] is a range */
1572 }
1573 else
1574 {
1575 /* Also accept "a-[.z.]" */
1576 endc = 0;
1577 if (*regparse == '[')
1578 endc = get_coll_element(&regparse);
1579 if (endc == 0)
1580 {
1581 if (has_mbyte)
1582 endc = mb_ptr2char_adv(&regparse);
1583 else
1584 endc = *regparse++;
1585 }
1586
1587 /* Handle \o40, \x20 and \u20AC style sequences */
1588 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
1589 endc = coll_get_char();
1590
1591 if (startc > endc)
1592 EMSG_RET_NULL(_(e_reverse_range));
1593 if (has_mbyte && ((*mb_char2len)(startc) > 1
1594 || (*mb_char2len)(endc) > 1))
1595 {
1596 /* Limit to a range of 256 chars. */
1597 if (endc > startc + 256)
1598 EMSG_RET_NULL(_(e_large_class));
1599 while (++startc <= endc)
1600 regmbc(startc);
1601 }
1602 else
1603 {
1604#ifdef EBCDIC
1605 int alpha_only = FALSE;
1606
1607 /* for alphabetical range skip the gaps
1608 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
1609 if (isalpha(startc) && isalpha(endc))
1610 alpha_only = TRUE;
1611#endif
1612 while (++startc <= endc)
1613#ifdef EBCDIC
1614 if (!alpha_only || isalpha(startc))
1615#endif
1616 regc(startc);
1617 }
1618 startc = -1;
1619 }
1620 }
1621 /*
1622 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
1623 * accepts "\t", "\e", etc., but only when the 'l' flag in
1624 * 'cpoptions' is not included.
1625 * Posix doesn't recognize backslash at all.
1626 */
1627 else if (*regparse == '\\'
1628 && !reg_cpo_bsl
1629 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
1630 || (!reg_cpo_lit
1631 && vim_strchr(REGEXP_ABBR,
1632 regparse[1]) != NULL)))
1633 {
1634 regparse++;
1635 if (*regparse == 'n')
1636 {
1637 /* '\n' in range: also match NL */
1638 if (ret != JUST_CALC_SIZE)
1639 {
1640 /* Using \n inside [^] does not change what
1641 * matches. "[^\n]" is the same as ".". */
1642 if (*ret == ANYOF)
1643 {
1644 *ret = ANYOF + ADD_NL;
1645 *flagp |= HASNL;
1646 }
1647 /* else: must have had a \n already */
1648 }
1649 regparse++;
1650 startc = -1;
1651 }
1652 else if (*regparse == 'd'
1653 || *regparse == 'o'
1654 || *regparse == 'x'
1655 || *regparse == 'u'
1656 || *regparse == 'U')
1657 {
1658 startc = coll_get_char();
1659 if (startc == 0)
1660 regc(0x0a);
1661 else
1662 regmbc(startc);
1663 }
1664 else
1665 {
1666 startc = backslash_trans(*regparse++);
1667 regc(startc);
1668 }
1669 }
1670 else if (*regparse == '[')
1671 {
1672 int c_class;
1673 int cu;
1674
1675 c_class = get_char_class(&regparse);
1676 startc = -1;
1677 /* Characters assumed to be 8 bits! */
1678 switch (c_class)
1679 {
1680 case CLASS_NONE:
1681 c_class = get_equi_class(&regparse);
1682 if (c_class != 0)
1683 {
1684 /* produce equivalence class */
1685 reg_equi_class(c_class);
1686 }
1687 else if ((c_class =
1688 get_coll_element(&regparse)) != 0)
1689 {
1690 /* produce a collating element */
1691 regmbc(c_class);
1692 }
1693 else
1694 {
1695 /* literal '[', allow [[-x] as a range */
1696 startc = *regparse++;
1697 regc(startc);
1698 }
1699 break;
1700 case CLASS_ALNUM:
1701 for (cu = 1; cu < 128; cu++)
1702 if (isalnum(cu))
1703 regmbc(cu);
1704 break;
1705 case CLASS_ALPHA:
1706 for (cu = 1; cu < 128; cu++)
1707 if (isalpha(cu))
1708 regmbc(cu);
1709 break;
1710 case CLASS_BLANK:
1711 regc(' ');
1712 regc('\t');
1713 break;
1714 case CLASS_CNTRL:
1715 for (cu = 1; cu <= 127; cu++)
1716 if (iscntrl(cu))
1717 regmbc(cu);
1718 break;
1719 case CLASS_DIGIT:
1720 for (cu = 1; cu <= 127; cu++)
1721 if (VIM_ISDIGIT(cu))
1722 regmbc(cu);
1723 break;
1724 case CLASS_GRAPH:
1725 for (cu = 1; cu <= 127; cu++)
1726 if (isgraph(cu))
1727 regmbc(cu);
1728 break;
1729 case CLASS_LOWER:
1730 for (cu = 1; cu <= 255; cu++)
1731 if (MB_ISLOWER(cu) && cu != 170
1732 && cu != 186)
1733 regmbc(cu);
1734 break;
1735 case CLASS_PRINT:
1736 for (cu = 1; cu <= 255; cu++)
1737 if (vim_isprintc(cu))
1738 regmbc(cu);
1739 break;
1740 case CLASS_PUNCT:
1741 for (cu = 1; cu < 128; cu++)
1742 if (ispunct(cu))
1743 regmbc(cu);
1744 break;
1745 case CLASS_SPACE:
1746 for (cu = 9; cu <= 13; cu++)
1747 regc(cu);
1748 regc(' ');
1749 break;
1750 case CLASS_UPPER:
1751 for (cu = 1; cu <= 255; cu++)
1752 if (MB_ISUPPER(cu))
1753 regmbc(cu);
1754 break;
1755 case CLASS_XDIGIT:
1756 for (cu = 1; cu <= 255; cu++)
1757 if (vim_isxdigit(cu))
1758 regmbc(cu);
1759 break;
1760 case CLASS_TAB:
1761 regc('\t');
1762 break;
1763 case CLASS_RETURN:
1764 regc('\r');
1765 break;
1766 case CLASS_BACKSPACE:
1767 regc('\b');
1768 break;
1769 case CLASS_ESCAPE:
1770 regc('\033');
1771 break;
1772 case CLASS_IDENT:
1773 for (cu = 1; cu <= 255; cu++)
1774 if (vim_isIDc(cu))
1775 regmbc(cu);
1776 break;
1777 case CLASS_KEYWORD:
1778 for (cu = 1; cu <= 255; cu++)
1779 if (reg_iswordc(cu))
1780 regmbc(cu);
1781 break;
1782 case CLASS_FNAME:
1783 for (cu = 1; cu <= 255; cu++)
1784 if (vim_isfilec(cu))
1785 regmbc(cu);
1786 break;
1787 }
1788 }
1789 else
1790 {
1791 if (has_mbyte)
1792 {
1793 int len;
1794
1795 /* produce a multibyte character, including any
1796 * following composing characters */
1797 startc = mb_ptr2char(regparse);
1798 len = (*mb_ptr2len)(regparse);
1799 if (enc_utf8 && utf_char2len(startc) != len)
1800 startc = -1; /* composing chars */
1801 while (--len >= 0)
1802 regc(*regparse++);
1803 }
1804 else
1805 {
1806 startc = *regparse++;
1807 regc(startc);
1808 }
1809 }
1810 }
1811 regc(NUL);
1812 prevchr_len = 1; /* last char was the ']' */
1813 if (*regparse != ']')
1814 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
1815 skipchr(); /* let's be friends with the lexer again */
1816 *flagp |= HASWIDTH | SIMPLE;
1817 break;
1818 }
1819 else if (reg_strict)
1820 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
1821 }
1822 /* FALLTHROUGH */
1823
1824 default:
1825 {
1826 int len;
1827
1828 /* A multi-byte character is handled as a separate atom if it's
1829 * before a multi and when it's a composing char. */
1830 if (use_multibytecode(c))
1831 {
1832do_multibyte:
1833 ret = regnode(MULTIBYTECODE);
1834 regmbc(c);
1835 *flagp |= HASWIDTH | SIMPLE;
1836 break;
1837 }
1838
1839 ret = regnode(EXACTLY);
1840
1841 /*
1842 * Append characters as long as:
1843 * - there is no following multi, we then need the character in
1844 * front of it as a single character operand
1845 * - not running into a Magic character
1846 * - "one_exactly" is not set
1847 * But always emit at least one character. Might be a Multi,
1848 * e.g., a "[" without matching "]".
1849 */
1850 for (len = 0; c != NUL && (len == 0
1851 || (re_multi_type(peekchr()) == NOT_MULTI
1852 && !one_exactly
1853 && !is_Magic(c))); ++len)
1854 {
1855 c = no_Magic(c);
1856 if (has_mbyte)
1857 {
1858 regmbc(c);
1859 if (enc_utf8)
1860 {
1861 int l;
1862
1863 /* Need to get composing character too. */
1864 for (;;)
1865 {
1866 l = utf_ptr2len(regparse);
1867 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
1868 break;
1869 regmbc(utf_ptr2char(regparse));
1870 skipchr();
1871 }
1872 }
1873 }
1874 else
1875 regc(c);
1876 c = getchr();
1877 }
1878 ungetchr();
1879
1880 regc(NUL);
1881 *flagp |= HASWIDTH;
1882 if (len == 1)
1883 *flagp |= SIMPLE;
1884 }
1885 break;
1886 }
1887
1888 return ret;
1889}
1890
1891/*
1892 * Parse something followed by possible [*+=].
1893 *
1894 * Note that the branching code sequences used for = and the general cases
1895 * of * and + are somewhat optimized: they use the same NOTHING node as
1896 * both the endmarker for their branch list and the body of the last branch.
1897 * It might seem that this node could be dispensed with entirely, but the
1898 * endmarker role is not redundant.
1899 */
1900 static char_u *
1901regpiece(int *flagp)
1902{
1903 char_u *ret;
1904 int op;
1905 char_u *next;
1906 int flags;
1907 long minval;
1908 long maxval;
1909
1910 ret = regatom(&flags);
1911 if (ret == NULL)
1912 return NULL;
1913
1914 op = peekchr();
1915 if (re_multi_type(op) == NOT_MULTI)
1916 {
1917 *flagp = flags;
1918 return ret;
1919 }
1920 /* default flags */
1921 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1922
1923 skipchr();
1924 switch (op)
1925 {
1926 case Magic('*'):
1927 if (flags & SIMPLE)
1928 reginsert(STAR, ret);
1929 else
1930 {
1931 /* Emit x* as (x&|), where & means "self". */
1932 reginsert(BRANCH, ret); /* Either x */
1933 regoptail(ret, regnode(BACK)); /* and loop */
1934 regoptail(ret, ret); /* back */
1935 regtail(ret, regnode(BRANCH)); /* or */
1936 regtail(ret, regnode(NOTHING)); /* null. */
1937 }
1938 break;
1939
1940 case Magic('+'):
1941 if (flags & SIMPLE)
1942 reginsert(PLUS, ret);
1943 else
1944 {
1945 /* Emit x+ as x(&|), where & means "self". */
1946 next = regnode(BRANCH); /* Either */
1947 regtail(ret, next);
1948 regtail(regnode(BACK), ret); /* loop back */
1949 regtail(next, regnode(BRANCH)); /* or */
1950 regtail(ret, regnode(NOTHING)); /* null. */
1951 }
1952 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1953 break;
1954
1955 case Magic('@'):
1956 {
1957 int lop = END;
1958 long nr;
1959
1960 nr = getdecchrs();
1961 switch (no_Magic(getchr()))
1962 {
1963 case '=': lop = MATCH; break; /* \@= */
1964 case '!': lop = NOMATCH; break; /* \@! */
1965 case '>': lop = SUBPAT; break; /* \@> */
1966 case '<': switch (no_Magic(getchr()))
1967 {
1968 case '=': lop = BEHIND; break; /* \@<= */
1969 case '!': lop = NOBEHIND; break; /* \@<! */
1970 }
1971 }
1972 if (lop == END)
1973 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
1974 reg_magic == MAGIC_ALL);
1975 /* Look behind must match with behind_pos. */
1976 if (lop == BEHIND || lop == NOBEHIND)
1977 {
1978 regtail(ret, regnode(BHPOS));
1979 *flagp |= HASLOOKBH;
1980 }
1981 regtail(ret, regnode(END)); /* operand ends */
1982 if (lop == BEHIND || lop == NOBEHIND)
1983 {
1984 if (nr < 0)
1985 nr = 0; /* no limit is same as zero limit */
1986 reginsert_nr(lop, nr, ret);
1987 }
1988 else
1989 reginsert(lop, ret);
1990 break;
1991 }
1992
1993 case Magic('?'):
1994 case Magic('='):
1995 /* Emit x= as (x|) */
1996 reginsert(BRANCH, ret); /* Either x */
1997 regtail(ret, regnode(BRANCH)); /* or */
1998 next = regnode(NOTHING); /* null. */
1999 regtail(ret, next);
2000 regoptail(ret, next);
2001 break;
2002
2003 case Magic('{'):
2004 if (!read_limits(&minval, &maxval))
2005 return NULL;
2006 if (flags & SIMPLE)
2007 {
2008 reginsert(BRACE_SIMPLE, ret);
2009 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
2010 }
2011 else
2012 {
2013 if (num_complex_braces >= 10)
2014 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
2015 reg_magic == MAGIC_ALL);
2016 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
2017 regoptail(ret, regnode(BACK));
2018 regoptail(ret, ret);
2019 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
2020 ++num_complex_braces;
2021 }
2022 if (minval > 0 && maxval > 0)
2023 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
2024 break;
2025 }
2026 if (re_multi_type(peekchr()) != NOT_MULTI)
2027 {
2028 // Can't have a multi follow a multi.
2029 if (peekchr() == Magic('*'))
2030 EMSG2_RET_NULL(_("E61: Nested %s*"), reg_magic >= MAGIC_ON);
2031 EMSG3_RET_NULL(_("E62: Nested %s%c"), reg_magic == MAGIC_ALL,
2032 no_Magic(peekchr()));
2033 }
2034
2035 return ret;
2036}
2037
2038/*
2039 * Parse one alternative of an | or & operator.
2040 * Implements the concatenation operator.
2041 */
2042 static char_u *
2043regconcat(int *flagp)
2044{
2045 char_u *first = NULL;
2046 char_u *chain = NULL;
2047 char_u *latest;
2048 int flags;
2049 int cont = TRUE;
2050
2051 *flagp = WORST; /* Tentatively. */
2052
2053 while (cont)
2054 {
2055 switch (peekchr())
2056 {
2057 case NUL:
2058 case Magic('|'):
2059 case Magic('&'):
2060 case Magic(')'):
2061 cont = FALSE;
2062 break;
2063 case Magic('Z'):
2064 regflags |= RF_ICOMBINE;
2065 skipchr_keepstart();
2066 break;
2067 case Magic('c'):
2068 regflags |= RF_ICASE;
2069 skipchr_keepstart();
2070 break;
2071 case Magic('C'):
2072 regflags |= RF_NOICASE;
2073 skipchr_keepstart();
2074 break;
2075 case Magic('v'):
2076 reg_magic = MAGIC_ALL;
2077 skipchr_keepstart();
2078 curchr = -1;
2079 break;
2080 case Magic('m'):
2081 reg_magic = MAGIC_ON;
2082 skipchr_keepstart();
2083 curchr = -1;
2084 break;
2085 case Magic('M'):
2086 reg_magic = MAGIC_OFF;
2087 skipchr_keepstart();
2088 curchr = -1;
2089 break;
2090 case Magic('V'):
2091 reg_magic = MAGIC_NONE;
2092 skipchr_keepstart();
2093 curchr = -1;
2094 break;
2095 default:
2096 latest = regpiece(&flags);
2097 if (latest == NULL || reg_toolong)
2098 return NULL;
2099 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
2100 if (chain == NULL) /* First piece. */
2101 *flagp |= flags & SPSTART;
2102 else
2103 regtail(chain, latest);
2104 chain = latest;
2105 if (first == NULL)
2106 first = latest;
2107 break;
2108 }
2109 }
2110 if (first == NULL) /* Loop ran zero times. */
2111 first = regnode(NOTHING);
2112 return first;
2113}
2114
2115/*
2116 * Parse one alternative of an | operator.
2117 * Implements the & operator.
2118 */
2119 static char_u *
2120regbranch(int *flagp)
2121{
2122 char_u *ret;
2123 char_u *chain = NULL;
2124 char_u *latest;
2125 int flags;
2126
2127 *flagp = WORST | HASNL; /* Tentatively. */
2128
2129 ret = regnode(BRANCH);
2130 for (;;)
2131 {
2132 latest = regconcat(&flags);
2133 if (latest == NULL)
2134 return NULL;
2135 /* If one of the branches has width, the whole thing has. If one of
2136 * the branches anchors at start-of-line, the whole thing does.
2137 * If one of the branches uses look-behind, the whole thing does. */
2138 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
2139 /* If one of the branches doesn't match a line-break, the whole thing
2140 * doesn't. */
2141 *flagp &= ~HASNL | (flags & HASNL);
2142 if (chain != NULL)
2143 regtail(chain, latest);
2144 if (peekchr() != Magic('&'))
2145 break;
2146 skipchr();
2147 regtail(latest, regnode(END)); /* operand ends */
2148 if (reg_toolong)
2149 break;
2150 reginsert(MATCH, latest);
2151 chain = latest;
2152 }
2153
2154 return ret;
2155}
2156
2157/*
2158 * Parse regular expression, i.e. main body or parenthesized thing.
2159 *
2160 * Caller must absorb opening parenthesis.
2161 *
2162 * Combining parenthesis handling with the base level of regular expression
2163 * is a trifle forced, but the need to tie the tails of the branches to what
2164 * follows makes it hard to avoid.
2165 */
2166 static char_u *
2167reg(
2168 int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
2169 int *flagp)
2170{
2171 char_u *ret;
2172 char_u *br;
2173 char_u *ender;
2174 int parno = 0;
2175 int flags;
2176
2177 *flagp = HASWIDTH; /* Tentatively. */
2178
2179#ifdef FEAT_SYN_HL
2180 if (paren == REG_ZPAREN)
2181 {
2182 /* Make a ZOPEN node. */
2183 if (regnzpar >= NSUBEXP)
2184 EMSG_RET_NULL(_("E50: Too many \\z("));
2185 parno = regnzpar;
2186 regnzpar++;
2187 ret = regnode(ZOPEN + parno);
2188 }
2189 else
2190#endif
2191 if (paren == REG_PAREN)
2192 {
2193 /* Make a MOPEN node. */
2194 if (regnpar >= NSUBEXP)
2195 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
2196 parno = regnpar;
2197 ++regnpar;
2198 ret = regnode(MOPEN + parno);
2199 }
2200 else if (paren == REG_NPAREN)
2201 {
2202 /* Make a NOPEN node. */
2203 ret = regnode(NOPEN);
2204 }
2205 else
2206 ret = NULL;
2207
2208 /* Pick up the branches, linking them together. */
2209 br = regbranch(&flags);
2210 if (br == NULL)
2211 return NULL;
2212 if (ret != NULL)
2213 regtail(ret, br); /* [MZ]OPEN -> first. */
2214 else
2215 ret = br;
2216 /* If one of the branches can be zero-width, the whole thing can.
2217 * If one of the branches has * at start or matches a line-break, the
2218 * whole thing can. */
2219 if (!(flags & HASWIDTH))
2220 *flagp &= ~HASWIDTH;
2221 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
2222 while (peekchr() == Magic('|'))
2223 {
2224 skipchr();
2225 br = regbranch(&flags);
2226 if (br == NULL || reg_toolong)
2227 return NULL;
2228 regtail(ret, br); /* BRANCH -> BRANCH. */
2229 if (!(flags & HASWIDTH))
2230 *flagp &= ~HASWIDTH;
2231 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
2232 }
2233
2234 /* Make a closing node, and hook it on the end. */
2235 ender = regnode(
2236#ifdef FEAT_SYN_HL
2237 paren == REG_ZPAREN ? ZCLOSE + parno :
2238#endif
2239 paren == REG_PAREN ? MCLOSE + parno :
2240 paren == REG_NPAREN ? NCLOSE : END);
2241 regtail(ret, ender);
2242
2243 /* Hook the tails of the branches to the closing node. */
2244 for (br = ret; br != NULL; br = regnext(br))
2245 regoptail(br, ender);
2246
2247 /* Check for proper termination. */
2248 if (paren != REG_NOPAREN && getchr() != Magic(')'))
2249 {
2250#ifdef FEAT_SYN_HL
2251 if (paren == REG_ZPAREN)
2252 EMSG_RET_NULL(_("E52: Unmatched \\z("));
2253 else
2254#endif
2255 if (paren == REG_NPAREN)
2256 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
2257 else
2258 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
2259 }
2260 else if (paren == REG_NOPAREN && peekchr() != NUL)
2261 {
2262 if (curchr == Magic(')'))
2263 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
2264 else
2265 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
2266 /* NOTREACHED */
2267 }
2268 /*
2269 * Here we set the flag allowing back references to this set of
2270 * parentheses.
2271 */
2272 if (paren == REG_PAREN)
2273 had_endbrace[parno] = TRUE; /* have seen the close paren */
2274 return ret;
2275}
2276
2277/*
2278 * bt_regcomp() - compile a regular expression into internal code for the
2279 * traditional back track matcher.
2280 * Returns the program in allocated space. Returns NULL for an error.
2281 *
2282 * We can't allocate space until we know how big the compiled form will be,
2283 * but we can't compile it (and thus know how big it is) until we've got a
2284 * place to put the code. So we cheat: we compile it twice, once with code
2285 * generation turned off and size counting turned on, and once "for real".
2286 * This also means that we don't allocate space until we are sure that the
2287 * thing really will compile successfully, and we never have to move the
2288 * code and thus invalidate pointers into it. (Note that it has to be in
2289 * one piece because vim_free() must be able to free it all.)
2290 *
2291 * Whether upper/lower case is to be ignored is decided when executing the
2292 * program, it does not matter here.
2293 *
2294 * Beware that the optimization-preparation code in here knows about some
2295 * of the structure of the compiled regexp.
2296 * "re_flags": RE_MAGIC and/or RE_STRING.
2297 */
2298 static regprog_T *
2299bt_regcomp(char_u *expr, int re_flags)
2300{
2301 bt_regprog_T *r;
2302 char_u *scan;
2303 char_u *longest;
2304 int len;
2305 int flags;
2306
2307 if (expr == NULL)
2308 EMSG_RET_NULL(_(e_null));
2309
2310 init_class_tab();
2311
2312 // First pass: determine size, legality.
2313 regcomp_start(expr, re_flags);
2314 regcode = JUST_CALC_SIZE;
2315 regc(REGMAGIC);
2316 if (reg(REG_NOPAREN, &flags) == NULL)
2317 return NULL;
2318
2319 // Allocate space.
2320 r = alloc(offsetof(bt_regprog_T, program) + regsize);
2321 if (r == NULL)
2322 return NULL;
2323 r->re_in_use = FALSE;
2324
2325 // Second pass: emit code.
2326 regcomp_start(expr, re_flags);
2327 regcode = r->program;
2328 regc(REGMAGIC);
2329 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
2330 {
2331 vim_free(r);
2332 if (reg_toolong)
2333 EMSG_RET_NULL(_("E339: Pattern too long"));
2334 return NULL;
2335 }
2336
2337 // Dig out information for optimizations.
2338 r->regstart = NUL; // Worst-case defaults.
2339 r->reganch = 0;
2340 r->regmust = NULL;
2341 r->regmlen = 0;
2342 r->regflags = regflags;
2343 if (flags & HASNL)
2344 r->regflags |= RF_HASNL;
2345 if (flags & HASLOOKBH)
2346 r->regflags |= RF_LOOKBH;
2347#ifdef FEAT_SYN_HL
2348 // Remember whether this pattern has any \z specials in it.
2349 r->reghasz = re_has_z;
2350#endif
2351 scan = r->program + 1; // First BRANCH.
2352 if (OP(regnext(scan)) == END) // Only one top-level choice.
2353 {
2354 scan = OPERAND(scan);
2355
2356 // Starting-point info.
2357 if (OP(scan) == BOL || OP(scan) == RE_BOF)
2358 {
2359 r->reganch++;
2360 scan = regnext(scan);
2361 }
2362
2363 if (OP(scan) == EXACTLY)
2364 {
2365 if (has_mbyte)
2366 r->regstart = (*mb_ptr2char)(OPERAND(scan));
2367 else
2368 r->regstart = *OPERAND(scan);
2369 }
2370 else if ((OP(scan) == BOW
2371 || OP(scan) == EOW
2372 || OP(scan) == NOTHING
2373 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
2374 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
2375 && OP(regnext(scan)) == EXACTLY)
2376 {
2377 if (has_mbyte)
2378 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
2379 else
2380 r->regstart = *OPERAND(regnext(scan));
2381 }
2382
2383 // If there's something expensive in the r.e., find the longest
2384 // literal string that must appear and make it the regmust. Resolve
2385 // ties in favor of later strings, since the regstart check works
2386 // with the beginning of the r.e. and avoiding duplication
2387 // strengthens checking. Not a strong reason, but sufficient in the
2388 // absence of others.
2389
2390 // When the r.e. starts with BOW, it is faster to look for a regmust
2391 // first. Used a lot for "#" and "*" commands. (Added by mool).
2392 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
2393 && !(flags & HASNL))
2394 {
2395 longest = NULL;
2396 len = 0;
2397 for (; scan != NULL; scan = regnext(scan))
2398 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
2399 {
2400 longest = OPERAND(scan);
2401 len = (int)STRLEN(OPERAND(scan));
2402 }
2403 r->regmust = longest;
2404 r->regmlen = len;
2405 }
2406 }
2407#ifdef BT_REGEXP_DUMP
2408 regdump(expr, r);
2409#endif
2410 r->engine = &bt_regengine;
2411 return (regprog_T *)r;
2412}
2413
2414#if defined(FEAT_SYN_HL) || defined(PROTO)
2415/*
2416 * Check if during the previous call to vim_regcomp the EOL item "$" has been
2417 * found. This is messy, but it works fine.
2418 */
2419 int
2420vim_regcomp_had_eol(void)
2421{
2422 return had_eol;
2423}
2424#endif
2425
2426/*
2427 * Get a number after a backslash that is inside [].
2428 * When nothing is recognized return a backslash.
2429 */
2430 static int
2431coll_get_char(void)
2432{
2433 long nr = -1;
2434
2435 switch (*regparse++)
2436 {
2437 case 'd': nr = getdecchrs(); break;
2438 case 'o': nr = getoctchrs(); break;
2439 case 'x': nr = gethexchrs(2); break;
2440 case 'u': nr = gethexchrs(4); break;
2441 case 'U': nr = gethexchrs(8); break;
2442 }
2443 if (nr < 0 || nr > INT_MAX)
2444 {
2445 /* If getting the number fails be backwards compatible: the character
2446 * is a backslash. */
2447 --regparse;
2448 nr = '\\';
2449 }
2450 return nr;
2451}
2452
2453/*
2454 * Free a compiled regexp program, returned by bt_regcomp().
2455 */
2456 static void
2457bt_regfree(regprog_T *prog)
2458{
2459 vim_free(prog);
2460}
2461
2462#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
2463
2464/*
2465 * The arguments from BRACE_LIMITS are stored here. They are actually local
2466 * to regmatch(), but they are here to reduce the amount of stack space used
2467 * (it can be called recursively many times).
2468 */
2469static long bl_minval;
2470static long bl_maxval;
2471
2472/*
2473 * Save the input line and position in a regsave_T.
2474 */
2475 static void
2476reg_save(regsave_T *save, garray_T *gap)
2477{
2478 if (REG_MULTI)
2479 {
2480 save->rs_u.pos.col = (colnr_T)(rex.input - rex.line);
2481 save->rs_u.pos.lnum = rex.lnum;
2482 }
2483 else
2484 save->rs_u.ptr = rex.input;
2485 save->rs_len = gap->ga_len;
2486}
2487
2488/*
2489 * Restore the input line and position from a regsave_T.
2490 */
2491 static void
2492reg_restore(regsave_T *save, garray_T *gap)
2493{
2494 if (REG_MULTI)
2495 {
2496 if (rex.lnum != save->rs_u.pos.lnum)
2497 {
2498 /* only call reg_getline() when the line number changed to save
2499 * a bit of time */
2500 rex.lnum = save->rs_u.pos.lnum;
2501 rex.line = reg_getline(rex.lnum);
2502 }
2503 rex.input = rex.line + save->rs_u.pos.col;
2504 }
2505 else
2506 rex.input = save->rs_u.ptr;
2507 gap->ga_len = save->rs_len;
2508}
2509
2510/*
2511 * Return TRUE if current position is equal to saved position.
2512 */
2513 static int
2514reg_save_equal(regsave_T *save)
2515{
2516 if (REG_MULTI)
2517 return rex.lnum == save->rs_u.pos.lnum
2518 && rex.input == rex.line + save->rs_u.pos.col;
2519 return rex.input == save->rs_u.ptr;
2520}
2521
2522/* Save the sub-expressions before attempting a match. */
2523#define save_se(savep, posp, pp) \
2524 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
2525
2526/* After a failed match restore the sub-expressions. */
2527#define restore_se(savep, posp, pp) { \
2528 if (REG_MULTI) \
2529 *(posp) = (savep)->se_u.pos; \
2530 else \
2531 *(pp) = (savep)->se_u.ptr; }
2532
2533/*
2534 * Tentatively set the sub-expression start to the current position (after
2535 * calling regmatch() they will have changed). Need to save the existing
2536 * values for when there is no match.
2537 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
2538 * depending on REG_MULTI.
2539 */
2540 static void
2541save_se_multi(save_se_T *savep, lpos_T *posp)
2542{
2543 savep->se_u.pos = *posp;
2544 posp->lnum = rex.lnum;
2545 posp->col = (colnr_T)(rex.input - rex.line);
2546}
2547
2548 static void
2549save_se_one(save_se_T *savep, char_u **pp)
2550{
2551 savep->se_u.ptr = *pp;
2552 *pp = rex.input;
2553}
2554
2555/*
2556 * regrepeat - repeatedly match something simple, return how many.
2557 * Advances rex.input (and rex.lnum) to just after the matched chars.
2558 */
2559 static int
2560regrepeat(
2561 char_u *p,
2562 long maxcount) /* maximum number of matches allowed */
2563{
2564 long count = 0;
2565 char_u *scan;
2566 char_u *opnd;
2567 int mask;
2568 int testval = 0;
2569
2570 scan = rex.input; /* Make local copy of rex.input for speed. */
2571 opnd = OPERAND(p);
2572 switch (OP(p))
2573 {
2574 case ANY:
2575 case ANY + ADD_NL:
2576 while (count < maxcount)
2577 {
2578 /* Matching anything means we continue until end-of-line (or
2579 * end-of-file for ANY + ADD_NL), only limited by maxcount. */
2580 while (*scan != NUL && count < maxcount)
2581 {
2582 ++count;
2583 MB_PTR_ADV(scan);
2584 }
2585 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2586 || rex.reg_line_lbr || count == maxcount)
2587 break;
2588 ++count; /* count the line-break */
2589 reg_nextline();
2590 scan = rex.input;
2591 if (got_int)
2592 break;
2593 }
2594 break;
2595
2596 case IDENT:
2597 case IDENT + ADD_NL:
2598 testval = TRUE;
2599 /* FALLTHROUGH */
2600 case SIDENT:
2601 case SIDENT + ADD_NL:
2602 while (count < maxcount)
2603 {
2604 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
2605 {
2606 MB_PTR_ADV(scan);
2607 }
2608 else if (*scan == NUL)
2609 {
2610 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2611 || rex.reg_line_lbr)
2612 break;
2613 reg_nextline();
2614 scan = rex.input;
2615 if (got_int)
2616 break;
2617 }
2618 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2619 ++scan;
2620 else
2621 break;
2622 ++count;
2623 }
2624 break;
2625
2626 case KWORD:
2627 case KWORD + ADD_NL:
2628 testval = TRUE;
2629 /* FALLTHROUGH */
2630 case SKWORD:
2631 case SKWORD + ADD_NL:
2632 while (count < maxcount)
2633 {
2634 if (vim_iswordp_buf(scan, rex.reg_buf)
2635 && (testval || !VIM_ISDIGIT(*scan)))
2636 {
2637 MB_PTR_ADV(scan);
2638 }
2639 else if (*scan == NUL)
2640 {
2641 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2642 || rex.reg_line_lbr)
2643 break;
2644 reg_nextline();
2645 scan = rex.input;
2646 if (got_int)
2647 break;
2648 }
2649 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2650 ++scan;
2651 else
2652 break;
2653 ++count;
2654 }
2655 break;
2656
2657 case FNAME:
2658 case FNAME + ADD_NL:
2659 testval = TRUE;
2660 /* FALLTHROUGH */
2661 case SFNAME:
2662 case SFNAME + ADD_NL:
2663 while (count < maxcount)
2664 {
2665 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
2666 {
2667 MB_PTR_ADV(scan);
2668 }
2669 else if (*scan == NUL)
2670 {
2671 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2672 || rex.reg_line_lbr)
2673 break;
2674 reg_nextline();
2675 scan = rex.input;
2676 if (got_int)
2677 break;
2678 }
2679 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2680 ++scan;
2681 else
2682 break;
2683 ++count;
2684 }
2685 break;
2686
2687 case PRINT:
2688 case PRINT + ADD_NL:
2689 testval = TRUE;
2690 /* FALLTHROUGH */
2691 case SPRINT:
2692 case SPRINT + ADD_NL:
2693 while (count < maxcount)
2694 {
2695 if (*scan == NUL)
2696 {
2697 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2698 || rex.reg_line_lbr)
2699 break;
2700 reg_nextline();
2701 scan = rex.input;
2702 if (got_int)
2703 break;
2704 }
2705 else if (vim_isprintc(PTR2CHAR(scan)) == 1
2706 && (testval || !VIM_ISDIGIT(*scan)))
2707 {
2708 MB_PTR_ADV(scan);
2709 }
2710 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2711 ++scan;
2712 else
2713 break;
2714 ++count;
2715 }
2716 break;
2717
2718 case WHITE:
2719 case WHITE + ADD_NL:
2720 testval = mask = RI_WHITE;
2721do_class:
2722 while (count < maxcount)
2723 {
2724 int l;
2725
2726 if (*scan == NUL)
2727 {
2728 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2729 || rex.reg_line_lbr)
2730 break;
2731 reg_nextline();
2732 scan = rex.input;
2733 if (got_int)
2734 break;
2735 }
2736 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
2737 {
2738 if (testval != 0)
2739 break;
2740 scan += l;
2741 }
2742 else if ((class_tab[*scan] & mask) == testval)
2743 ++scan;
2744 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2745 ++scan;
2746 else
2747 break;
2748 ++count;
2749 }
2750 break;
2751
2752 case NWHITE:
2753 case NWHITE + ADD_NL:
2754 mask = RI_WHITE;
2755 goto do_class;
2756 case DIGIT:
2757 case DIGIT + ADD_NL:
2758 testval = mask = RI_DIGIT;
2759 goto do_class;
2760 case NDIGIT:
2761 case NDIGIT + ADD_NL:
2762 mask = RI_DIGIT;
2763 goto do_class;
2764 case HEX:
2765 case HEX + ADD_NL:
2766 testval = mask = RI_HEX;
2767 goto do_class;
2768 case NHEX:
2769 case NHEX + ADD_NL:
2770 mask = RI_HEX;
2771 goto do_class;
2772 case OCTAL:
2773 case OCTAL + ADD_NL:
2774 testval = mask = RI_OCTAL;
2775 goto do_class;
2776 case NOCTAL:
2777 case NOCTAL + ADD_NL:
2778 mask = RI_OCTAL;
2779 goto do_class;
2780 case WORD:
2781 case WORD + ADD_NL:
2782 testval = mask = RI_WORD;
2783 goto do_class;
2784 case NWORD:
2785 case NWORD + ADD_NL:
2786 mask = RI_WORD;
2787 goto do_class;
2788 case HEAD:
2789 case HEAD + ADD_NL:
2790 testval = mask = RI_HEAD;
2791 goto do_class;
2792 case NHEAD:
2793 case NHEAD + ADD_NL:
2794 mask = RI_HEAD;
2795 goto do_class;
2796 case ALPHA:
2797 case ALPHA + ADD_NL:
2798 testval = mask = RI_ALPHA;
2799 goto do_class;
2800 case NALPHA:
2801 case NALPHA + ADD_NL:
2802 mask = RI_ALPHA;
2803 goto do_class;
2804 case LOWER:
2805 case LOWER + ADD_NL:
2806 testval = mask = RI_LOWER;
2807 goto do_class;
2808 case NLOWER:
2809 case NLOWER + ADD_NL:
2810 mask = RI_LOWER;
2811 goto do_class;
2812 case UPPER:
2813 case UPPER + ADD_NL:
2814 testval = mask = RI_UPPER;
2815 goto do_class;
2816 case NUPPER:
2817 case NUPPER + ADD_NL:
2818 mask = RI_UPPER;
2819 goto do_class;
2820
2821 case EXACTLY:
2822 {
2823 int cu, cl;
2824
2825 /* This doesn't do a multi-byte character, because a MULTIBYTECODE
2826 * would have been used for it. It does handle single-byte
2827 * characters, such as latin1. */
2828 if (rex.reg_ic)
2829 {
2830 cu = MB_TOUPPER(*opnd);
2831 cl = MB_TOLOWER(*opnd);
2832 while (count < maxcount && (*scan == cu || *scan == cl))
2833 {
2834 count++;
2835 scan++;
2836 }
2837 }
2838 else
2839 {
2840 cu = *opnd;
2841 while (count < maxcount && *scan == cu)
2842 {
2843 count++;
2844 scan++;
2845 }
2846 }
2847 break;
2848 }
2849
2850 case MULTIBYTECODE:
2851 {
2852 int i, len, cf = 0;
2853
2854 /* Safety check (just in case 'encoding' was changed since
2855 * compiling the program). */
2856 if ((len = (*mb_ptr2len)(opnd)) > 1)
2857 {
2858 if (rex.reg_ic && enc_utf8)
2859 cf = utf_fold(utf_ptr2char(opnd));
2860 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
2861 {
2862 for (i = 0; i < len; ++i)
2863 if (opnd[i] != scan[i])
2864 break;
2865 if (i < len && (!rex.reg_ic || !enc_utf8
2866 || utf_fold(utf_ptr2char(scan)) != cf))
2867 break;
2868 scan += len;
2869 ++count;
2870 }
2871 }
2872 }
2873 break;
2874
2875 case ANYOF:
2876 case ANYOF + ADD_NL:
2877 testval = TRUE;
2878 /* FALLTHROUGH */
2879
2880 case ANYBUT:
2881 case ANYBUT + ADD_NL:
2882 while (count < maxcount)
2883 {
2884 int len;
2885
2886 if (*scan == NUL)
2887 {
2888 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2889 || rex.reg_line_lbr)
2890 break;
2891 reg_nextline();
2892 scan = rex.input;
2893 if (got_int)
2894 break;
2895 }
2896 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2897 ++scan;
2898 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
2899 {
2900 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
2901 break;
2902 scan += len;
2903 }
2904 else
2905 {
2906 if ((cstrchr(opnd, *scan) == NULL) == testval)
2907 break;
2908 ++scan;
2909 }
2910 ++count;
2911 }
2912 break;
2913
2914 case NEWL:
2915 while (count < maxcount
2916 && ((*scan == NUL && rex.lnum <= rex.reg_maxline
2917 && !rex.reg_line_lbr && REG_MULTI)
2918 || (*scan == '\n' && rex.reg_line_lbr)))
2919 {
2920 count++;
2921 if (rex.reg_line_lbr)
2922 ADVANCE_REGINPUT();
2923 else
2924 reg_nextline();
2925 scan = rex.input;
2926 if (got_int)
2927 break;
2928 }
2929 break;
2930
2931 default: /* Oh dear. Called inappropriately. */
2932 emsg(_(e_re_corr));
2933#ifdef DEBUG
2934 printf("Called regrepeat with op code %d\n", OP(p));
2935#endif
2936 break;
2937 }
2938
2939 rex.input = scan;
2940
2941 return (int)count;
2942}
2943
2944/*
2945 * Push an item onto the regstack.
2946 * Returns pointer to new item. Returns NULL when out of memory.
2947 */
2948 static regitem_T *
2949regstack_push(regstate_T state, char_u *scan)
2950{
2951 regitem_T *rp;
2952
2953 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
2954 {
2955 emsg(_(e_maxmempat));
2956 return NULL;
2957 }
2958 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
2959 return NULL;
2960
2961 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
2962 rp->rs_state = state;
2963 rp->rs_scan = scan;
2964
2965 regstack.ga_len += sizeof(regitem_T);
2966 return rp;
2967}
2968
2969/*
2970 * Pop an item from the regstack.
2971 */
2972 static void
2973regstack_pop(char_u **scan)
2974{
2975 regitem_T *rp;
2976
2977 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
2978 *scan = rp->rs_scan;
2979
2980 regstack.ga_len -= sizeof(regitem_T);
2981}
2982
2983/*
2984 * Save the current subexpr to "bp", so that they can be restored
2985 * later by restore_subexpr().
2986 */
2987 static void
2988save_subexpr(regbehind_T *bp)
2989{
2990 int i;
2991
2992 /* When "rex.need_clear_subexpr" is set we don't need to save the values, only
2993 * remember that this flag needs to be set again when restoring. */
2994 bp->save_need_clear_subexpr = rex.need_clear_subexpr;
2995 if (!rex.need_clear_subexpr)
2996 {
2997 for (i = 0; i < NSUBEXP; ++i)
2998 {
2999 if (REG_MULTI)
3000 {
3001 bp->save_start[i].se_u.pos = rex.reg_startpos[i];
3002 bp->save_end[i].se_u.pos = rex.reg_endpos[i];
3003 }
3004 else
3005 {
3006 bp->save_start[i].se_u.ptr = rex.reg_startp[i];
3007 bp->save_end[i].se_u.ptr = rex.reg_endp[i];
3008 }
3009 }
3010 }
3011}
3012
3013/*
3014 * Restore the subexpr from "bp".
3015 */
3016 static void
3017restore_subexpr(regbehind_T *bp)
3018{
3019 int i;
3020
3021 /* Only need to restore saved values when they are not to be cleared. */
3022 rex.need_clear_subexpr = bp->save_need_clear_subexpr;
3023 if (!rex.need_clear_subexpr)
3024 {
3025 for (i = 0; i < NSUBEXP; ++i)
3026 {
3027 if (REG_MULTI)
3028 {
3029 rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
3030 rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
3031 }
3032 else
3033 {
3034 rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
3035 rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
3036 }
3037 }
3038 }
3039}
3040
3041/*
3042 * regmatch - main matching routine
3043 *
3044 * Conceptually the strategy is simple: Check to see whether the current node
3045 * matches, push an item onto the regstack and loop to see whether the rest
3046 * matches, and then act accordingly. In practice we make some effort to
3047 * avoid using the regstack, in particular by going through "ordinary" nodes
3048 * (that don't need to know whether the rest of the match failed) by a nested
3049 * loop.
3050 *
3051 * Returns TRUE when there is a match. Leaves rex.input and rex.lnum just after
3052 * the last matched character.
3053 * Returns FALSE when there is no match. Leaves rex.input and rex.lnum in an
3054 * undefined state!
3055 */
3056 static int
3057regmatch(
3058 char_u *scan, /* Current node. */
3059 proftime_T *tm UNUSED, /* timeout limit or NULL */
3060 int *timed_out UNUSED) /* flag set on timeout or NULL */
3061{
3062 char_u *next; /* Next node. */
3063 int op;
3064 int c;
3065 regitem_T *rp;
3066 int no;
3067 int status; /* one of the RA_ values: */
3068#ifdef FEAT_RELTIME
3069 int tm_count = 0;
3070#endif
3071
3072 /* Make "regstack" and "backpos" empty. They are allocated and freed in
3073 * bt_regexec_both() to reduce malloc()/free() calls. */
3074 regstack.ga_len = 0;
3075 backpos.ga_len = 0;
3076
3077 /*
3078 * Repeat until "regstack" is empty.
3079 */
3080 for (;;)
3081 {
3082 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
3083 * Allow interrupting them with CTRL-C. */
3084 fast_breakcheck();
3085
3086#ifdef DEBUG
3087 if (scan != NULL && regnarrate)
3088 {
3089 mch_errmsg((char *)regprop(scan));
3090 mch_errmsg("(\n");
3091 }
3092#endif
3093
3094 /*
3095 * Repeat for items that can be matched sequentially, without using the
3096 * regstack.
3097 */
3098 for (;;)
3099 {
3100 if (got_int || scan == NULL)
3101 {
3102 status = RA_FAIL;
3103 break;
3104 }
3105#ifdef FEAT_RELTIME
3106 /* Check for timeout once in a 100 times to avoid overhead. */
3107 if (tm != NULL && ++tm_count == 100)
3108 {
3109 tm_count = 0;
3110 if (profile_passed_limit(tm))
3111 {
3112 if (timed_out != NULL)
3113 *timed_out = TRUE;
3114 status = RA_FAIL;
3115 break;
3116 }
3117 }
3118#endif
3119 status = RA_CONT;
3120
3121#ifdef DEBUG
3122 if (regnarrate)
3123 {
3124 mch_errmsg((char *)regprop(scan));
3125 mch_errmsg("...\n");
3126# ifdef FEAT_SYN_HL
3127 if (re_extmatch_in != NULL)
3128 {
3129 int i;
3130
3131 mch_errmsg(_("External submatches:\n"));
3132 for (i = 0; i < NSUBEXP; i++)
3133 {
3134 mch_errmsg(" \"");
3135 if (re_extmatch_in->matches[i] != NULL)
3136 mch_errmsg((char *)re_extmatch_in->matches[i]);
3137 mch_errmsg("\"\n");
3138 }
3139 }
3140# endif
3141 }
3142#endif
3143 next = regnext(scan);
3144
3145 op = OP(scan);
3146 /* Check for character class with NL added. */
3147 if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
3148 && *rex.input == NUL && rex.lnum <= rex.reg_maxline)
3149 {
3150 reg_nextline();
3151 }
3152 else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n')
3153 {
3154 ADVANCE_REGINPUT();
3155 }
3156 else
3157 {
3158 if (WITH_NL(op))
3159 op -= ADD_NL;
3160 if (has_mbyte)
3161 c = (*mb_ptr2char)(rex.input);
3162 else
3163 c = *rex.input;
3164 switch (op)
3165 {
3166 case BOL:
3167 if (rex.input != rex.line)
3168 status = RA_NOMATCH;
3169 break;
3170
3171 case EOL:
3172 if (c != NUL)
3173 status = RA_NOMATCH;
3174 break;
3175
3176 case RE_BOF:
3177 /* We're not at the beginning of the file when below the first
3178 * line where we started, not at the start of the line or we
3179 * didn't start at the first line of the buffer. */
3180 if (rex.lnum != 0 || rex.input != rex.line
3181 || (REG_MULTI && rex.reg_firstlnum > 1))
3182 status = RA_NOMATCH;
3183 break;
3184
3185 case RE_EOF:
3186 if (rex.lnum != rex.reg_maxline || c != NUL)
3187 status = RA_NOMATCH;
3188 break;
3189
3190 case CURSOR:
3191 /* Check if the buffer is in a window and compare the
3192 * rex.reg_win->w_cursor position to the match position. */
3193 if (rex.reg_win == NULL
3194 || (rex.lnum + rex.reg_firstlnum
3195 != rex.reg_win->w_cursor.lnum)
3196 || ((colnr_T)(rex.input - rex.line)
3197 != rex.reg_win->w_cursor.col))
3198 status = RA_NOMATCH;
3199 break;
3200
3201 case RE_MARK:
3202 /* Compare the mark position to the match position. */
3203 {
3204 int mark = OPERAND(scan)[0];
3205 int cmp = OPERAND(scan)[1];
3206 pos_T *pos;
3207
3208 pos = getmark_buf(rex.reg_buf, mark, FALSE);
3209 if (pos == NULL /* mark doesn't exist */
3210 || pos->lnum <= 0 /* mark isn't set in reg_buf */
3211 || (pos->lnum == rex.lnum + rex.reg_firstlnum
3212 ? (pos->col == (colnr_T)(rex.input - rex.line)
3213 ? (cmp == '<' || cmp == '>')
3214 : (pos->col < (colnr_T)(rex.input - rex.line)
3215 ? cmp != '>'
3216 : cmp != '<'))
3217 : (pos->lnum < rex.lnum + rex.reg_firstlnum
3218 ? cmp != '>'
3219 : cmp != '<')))
3220 status = RA_NOMATCH;
3221 }
3222 break;
3223
3224 case RE_VISUAL:
3225 if (!reg_match_visual())
3226 status = RA_NOMATCH;
3227 break;
3228
3229 case RE_LNUM:
3230 if (!REG_MULTI || !re_num_cmp((long_u)(rex.lnum + rex.reg_firstlnum),
3231 scan))
3232 status = RA_NOMATCH;
3233 break;
3234
3235 case RE_COL:
3236 if (!re_num_cmp((long_u)(rex.input - rex.line) + 1, scan))
3237 status = RA_NOMATCH;
3238 break;
3239
3240 case RE_VCOL:
3241 if (!re_num_cmp((long_u)win_linetabsize(
3242 rex.reg_win == NULL ? curwin : rex.reg_win,
3243 rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan))
3244 status = RA_NOMATCH;
3245 break;
3246
3247 case BOW: /* \<word; rex.input points to w */
3248 if (c == NUL) /* Can't match at end of line */
3249 status = RA_NOMATCH;
3250 else if (has_mbyte)
3251 {
3252 int this_class;
3253
3254 /* Get class of current and previous char (if it exists). */
3255 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
3256 if (this_class <= 1)
3257 status = RA_NOMATCH; /* not on a word at all */
3258 else if (reg_prev_class() == this_class)
3259 status = RA_NOMATCH; /* previous char is in same word */
3260 }
3261 else
3262 {
3263 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line
3264 && vim_iswordc_buf(rex.input[-1], rex.reg_buf)))
3265 status = RA_NOMATCH;
3266 }
3267 break;
3268
3269 case EOW: /* word\>; rex.input points after d */
3270 if (rex.input == rex.line) /* Can't match at start of line */
3271 status = RA_NOMATCH;
3272 else if (has_mbyte)
3273 {
3274 int this_class, prev_class;
3275
3276 /* Get class of current and previous char (if it exists). */
3277 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
3278 prev_class = reg_prev_class();
3279 if (this_class == prev_class
3280 || prev_class == 0 || prev_class == 1)
3281 status = RA_NOMATCH;
3282 }
3283 else
3284 {
3285 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf)
3286 || (rex.input[0] != NUL
3287 && vim_iswordc_buf(c, rex.reg_buf)))
3288 status = RA_NOMATCH;
3289 }
3290 break; /* Matched with EOW */
3291
3292 case ANY:
3293 /* ANY does not match new lines. */
3294 if (c == NUL)
3295 status = RA_NOMATCH;
3296 else
3297 ADVANCE_REGINPUT();
3298 break;
3299
3300 case IDENT:
3301 if (!vim_isIDc(c))
3302 status = RA_NOMATCH;
3303 else
3304 ADVANCE_REGINPUT();
3305 break;
3306
3307 case SIDENT:
3308 if (VIM_ISDIGIT(*rex.input) || !vim_isIDc(c))
3309 status = RA_NOMATCH;
3310 else
3311 ADVANCE_REGINPUT();
3312 break;
3313
3314 case KWORD:
3315 if (!vim_iswordp_buf(rex.input, rex.reg_buf))
3316 status = RA_NOMATCH;
3317 else
3318 ADVANCE_REGINPUT();
3319 break;
3320
3321 case SKWORD:
3322 if (VIM_ISDIGIT(*rex.input)
3323 || !vim_iswordp_buf(rex.input, rex.reg_buf))
3324 status = RA_NOMATCH;
3325 else
3326 ADVANCE_REGINPUT();
3327 break;
3328
3329 case FNAME:
3330 if (!vim_isfilec(c))
3331 status = RA_NOMATCH;
3332 else
3333 ADVANCE_REGINPUT();
3334 break;
3335
3336 case SFNAME:
3337 if (VIM_ISDIGIT(*rex.input) || !vim_isfilec(c))
3338 status = RA_NOMATCH;
3339 else
3340 ADVANCE_REGINPUT();
3341 break;
3342
3343 case PRINT:
3344 if (!vim_isprintc(PTR2CHAR(rex.input)))
3345 status = RA_NOMATCH;
3346 else
3347 ADVANCE_REGINPUT();
3348 break;
3349
3350 case SPRINT:
3351 if (VIM_ISDIGIT(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input)))
3352 status = RA_NOMATCH;
3353 else
3354 ADVANCE_REGINPUT();
3355 break;
3356
3357 case WHITE:
3358 if (!VIM_ISWHITE(c))
3359 status = RA_NOMATCH;
3360 else
3361 ADVANCE_REGINPUT();
3362 break;
3363
3364 case NWHITE:
3365 if (c == NUL || VIM_ISWHITE(c))
3366 status = RA_NOMATCH;
3367 else
3368 ADVANCE_REGINPUT();
3369 break;
3370
3371 case DIGIT:
3372 if (!ri_digit(c))
3373 status = RA_NOMATCH;
3374 else
3375 ADVANCE_REGINPUT();
3376 break;
3377
3378 case NDIGIT:
3379 if (c == NUL || ri_digit(c))
3380 status = RA_NOMATCH;
3381 else
3382 ADVANCE_REGINPUT();
3383 break;
3384
3385 case HEX:
3386 if (!ri_hex(c))
3387 status = RA_NOMATCH;
3388 else
3389 ADVANCE_REGINPUT();
3390 break;
3391
3392 case NHEX:
3393 if (c == NUL || ri_hex(c))
3394 status = RA_NOMATCH;
3395 else
3396 ADVANCE_REGINPUT();
3397 break;
3398
3399 case OCTAL:
3400 if (!ri_octal(c))
3401 status = RA_NOMATCH;
3402 else
3403 ADVANCE_REGINPUT();
3404 break;
3405
3406 case NOCTAL:
3407 if (c == NUL || ri_octal(c))
3408 status = RA_NOMATCH;
3409 else
3410 ADVANCE_REGINPUT();
3411 break;
3412
3413 case WORD:
3414 if (!ri_word(c))
3415 status = RA_NOMATCH;
3416 else
3417 ADVANCE_REGINPUT();
3418 break;
3419
3420 case NWORD:
3421 if (c == NUL || ri_word(c))
3422 status = RA_NOMATCH;
3423 else
3424 ADVANCE_REGINPUT();
3425 break;
3426
3427 case HEAD:
3428 if (!ri_head(c))
3429 status = RA_NOMATCH;
3430 else
3431 ADVANCE_REGINPUT();
3432 break;
3433
3434 case NHEAD:
3435 if (c == NUL || ri_head(c))
3436 status = RA_NOMATCH;
3437 else
3438 ADVANCE_REGINPUT();
3439 break;
3440
3441 case ALPHA:
3442 if (!ri_alpha(c))
3443 status = RA_NOMATCH;
3444 else
3445 ADVANCE_REGINPUT();
3446 break;
3447
3448 case NALPHA:
3449 if (c == NUL || ri_alpha(c))
3450 status = RA_NOMATCH;
3451 else
3452 ADVANCE_REGINPUT();
3453 break;
3454
3455 case LOWER:
3456 if (!ri_lower(c))
3457 status = RA_NOMATCH;
3458 else
3459 ADVANCE_REGINPUT();
3460 break;
3461
3462 case NLOWER:
3463 if (c == NUL || ri_lower(c))
3464 status = RA_NOMATCH;
3465 else
3466 ADVANCE_REGINPUT();
3467 break;
3468
3469 case UPPER:
3470 if (!ri_upper(c))
3471 status = RA_NOMATCH;
3472 else
3473 ADVANCE_REGINPUT();
3474 break;
3475
3476 case NUPPER:
3477 if (c == NUL || ri_upper(c))
3478 status = RA_NOMATCH;
3479 else
3480 ADVANCE_REGINPUT();
3481 break;
3482
3483 case EXACTLY:
3484 {
3485 int len;
3486 char_u *opnd;
3487
3488 opnd = OPERAND(scan);
3489 /* Inline the first byte, for speed. */
3490 if (*opnd != *rex.input
3491 && (!rex.reg_ic
3492 || (!enc_utf8
3493 && MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
3494 status = RA_NOMATCH;
3495 else if (*opnd == NUL)
3496 {
3497 /* match empty string always works; happens when "~" is
3498 * empty. */
3499 }
3500 else
3501 {
3502 if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic))
3503 {
3504 len = 1; /* matched a single byte above */
3505 }
3506 else
3507 {
3508 /* Need to match first byte again for multi-byte. */
3509 len = (int)STRLEN(opnd);
3510 if (cstrncmp(opnd, rex.input, &len) != 0)
3511 status = RA_NOMATCH;
3512 }
3513 /* Check for following composing character, unless %C
3514 * follows (skips over all composing chars). */
3515 if (status != RA_NOMATCH
3516 && enc_utf8
3517 && UTF_COMPOSINGLIKE(rex.input, rex.input + len)
3518 && !rex.reg_icombine
3519 && OP(next) != RE_COMPOSING)
3520 {
3521 /* raaron: This code makes a composing character get
3522 * ignored, which is the correct behavior (sometimes)
3523 * for voweled Hebrew texts. */
3524 status = RA_NOMATCH;
3525 }
3526 if (status != RA_NOMATCH)
3527 rex.input += len;
3528 }
3529 }
3530 break;
3531
3532 case ANYOF:
3533 case ANYBUT:
3534 if (c == NUL)
3535 status = RA_NOMATCH;
3536 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
3537 status = RA_NOMATCH;
3538 else
3539 ADVANCE_REGINPUT();
3540 break;
3541
3542 case MULTIBYTECODE:
3543 if (has_mbyte)
3544 {
3545 int i, len;
3546 char_u *opnd;
3547 int opndc = 0, inpc;
3548
3549 opnd = OPERAND(scan);
3550 /* Safety check (just in case 'encoding' was changed since
3551 * compiling the program). */
3552 if ((len = (*mb_ptr2len)(opnd)) < 2)
3553 {
3554 status = RA_NOMATCH;
3555 break;
3556 }
3557 if (enc_utf8)
3558 opndc = utf_ptr2char(opnd);
3559 if (enc_utf8 && utf_iscomposing(opndc))
3560 {
3561 /* When only a composing char is given match at any
3562 * position where that composing char appears. */
3563 status = RA_NOMATCH;
3564 for (i = 0; rex.input[i] != NUL;
3565 i += utf_ptr2len(rex.input + i))
3566 {
3567 inpc = utf_ptr2char(rex.input + i);
3568 if (!utf_iscomposing(inpc))
3569 {
3570 if (i > 0)
3571 break;
3572 }
3573 else if (opndc == inpc)
3574 {
3575 /* Include all following composing chars. */
3576 len = i + utfc_ptr2len(rex.input + i);
3577 status = RA_MATCH;
3578 break;
3579 }
3580 }
3581 }
3582 else
3583 for (i = 0; i < len; ++i)
3584 if (opnd[i] != rex.input[i])
3585 {
3586 status = RA_NOMATCH;
3587 break;
3588 }
3589 rex.input += len;
3590 }
3591 else
3592 status = RA_NOMATCH;
3593 break;
3594 case RE_COMPOSING:
3595 if (enc_utf8)
3596 {
3597 /* Skip composing characters. */
3598 while (utf_iscomposing(utf_ptr2char(rex.input)))
3599 MB_CPTR_ADV(rex.input);
3600 }
3601 break;
3602
3603 case NOTHING:
3604 break;
3605
3606 case BACK:
3607 {
3608 int i;
3609 backpos_T *bp;
3610
3611 /*
3612 * When we run into BACK we need to check if we don't keep
3613 * looping without matching any input. The second and later
3614 * times a BACK is encountered it fails if the input is still
3615 * at the same position as the previous time.
3616 * The positions are stored in "backpos" and found by the
3617 * current value of "scan", the position in the RE program.
3618 */
3619 bp = (backpos_T *)backpos.ga_data;
3620 for (i = 0; i < backpos.ga_len; ++i)
3621 if (bp[i].bp_scan == scan)
3622 break;
3623 if (i == backpos.ga_len)
3624 {
3625 /* First time at this BACK, make room to store the pos. */
3626 if (ga_grow(&backpos, 1) == FAIL)
3627 status = RA_FAIL;
3628 else
3629 {
3630 /* get "ga_data" again, it may have changed */
3631 bp = (backpos_T *)backpos.ga_data;
3632 bp[i].bp_scan = scan;
3633 ++backpos.ga_len;
3634 }
3635 }
3636 else if (reg_save_equal(&bp[i].bp_pos))
3637 /* Still at same position as last time, fail. */
3638 status = RA_NOMATCH;
3639
3640 if (status != RA_FAIL && status != RA_NOMATCH)
3641 reg_save(&bp[i].bp_pos, &backpos);
3642 }
3643 break;
3644
3645 case MOPEN + 0: /* Match start: \zs */
3646 case MOPEN + 1: /* \( */
3647 case MOPEN + 2:
3648 case MOPEN + 3:
3649 case MOPEN + 4:
3650 case MOPEN + 5:
3651 case MOPEN + 6:
3652 case MOPEN + 7:
3653 case MOPEN + 8:
3654 case MOPEN + 9:
3655 {
3656 no = op - MOPEN;
3657 cleanup_subexpr();
3658 rp = regstack_push(RS_MOPEN, scan);
3659 if (rp == NULL)
3660 status = RA_FAIL;
3661 else
3662 {
3663 rp->rs_no = no;
3664 save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
3665 &rex.reg_startp[no]);
3666 /* We simply continue and handle the result when done. */
3667 }
3668 }
3669 break;
3670
3671 case NOPEN: /* \%( */
3672 case NCLOSE: /* \) after \%( */
3673 if (regstack_push(RS_NOPEN, scan) == NULL)
3674 status = RA_FAIL;
3675 /* We simply continue and handle the result when done. */
3676 break;
3677
3678#ifdef FEAT_SYN_HL
3679 case ZOPEN + 1:
3680 case ZOPEN + 2:
3681 case ZOPEN + 3:
3682 case ZOPEN + 4:
3683 case ZOPEN + 5:
3684 case ZOPEN + 6:
3685 case ZOPEN + 7:
3686 case ZOPEN + 8:
3687 case ZOPEN + 9:
3688 {
3689 no = op - ZOPEN;
3690 cleanup_zsubexpr();
3691 rp = regstack_push(RS_ZOPEN, scan);
3692 if (rp == NULL)
3693 status = RA_FAIL;
3694 else
3695 {
3696 rp->rs_no = no;
3697 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
3698 &reg_startzp[no]);
3699 /* We simply continue and handle the result when done. */
3700 }
3701 }
3702 break;
3703#endif
3704
3705 case MCLOSE + 0: /* Match end: \ze */
3706 case MCLOSE + 1: /* \) */
3707 case MCLOSE + 2:
3708 case MCLOSE + 3:
3709 case MCLOSE + 4:
3710 case MCLOSE + 5:
3711 case MCLOSE + 6:
3712 case MCLOSE + 7:
3713 case MCLOSE + 8:
3714 case MCLOSE + 9:
3715 {
3716 no = op - MCLOSE;
3717 cleanup_subexpr();
3718 rp = regstack_push(RS_MCLOSE, scan);
3719 if (rp == NULL)
3720 status = RA_FAIL;
3721 else
3722 {
3723 rp->rs_no = no;
3724 save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
3725 &rex.reg_endp[no]);
3726 /* We simply continue and handle the result when done. */
3727 }
3728 }
3729 break;
3730
3731#ifdef FEAT_SYN_HL
3732 case ZCLOSE + 1: /* \) after \z( */
3733 case ZCLOSE + 2:
3734 case ZCLOSE + 3:
3735 case ZCLOSE + 4:
3736 case ZCLOSE + 5:
3737 case ZCLOSE + 6:
3738 case ZCLOSE + 7:
3739 case ZCLOSE + 8:
3740 case ZCLOSE + 9:
3741 {
3742 no = op - ZCLOSE;
3743 cleanup_zsubexpr();
3744 rp = regstack_push(RS_ZCLOSE, scan);
3745 if (rp == NULL)
3746 status = RA_FAIL;
3747 else
3748 {
3749 rp->rs_no = no;
3750 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
3751 &reg_endzp[no]);
3752 /* We simply continue and handle the result when done. */
3753 }
3754 }
3755 break;
3756#endif
3757
3758 case BACKREF + 1:
3759 case BACKREF + 2:
3760 case BACKREF + 3:
3761 case BACKREF + 4:
3762 case BACKREF + 5:
3763 case BACKREF + 6:
3764 case BACKREF + 7:
3765 case BACKREF + 8:
3766 case BACKREF + 9:
3767 {
3768 int len;
3769
3770 no = op - BACKREF;
3771 cleanup_subexpr();
3772 if (!REG_MULTI) /* Single-line regexp */
3773 {
3774 if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
3775 {
3776 /* Backref was not set: Match an empty string. */
3777 len = 0;
3778 }
3779 else
3780 {
3781 /* Compare current input with back-ref in the same
3782 * line. */
3783 len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
3784 if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0)
3785 status = RA_NOMATCH;
3786 }
3787 }
3788 else /* Multi-line regexp */
3789 {
3790 if (rex.reg_startpos[no].lnum < 0
3791 || rex.reg_endpos[no].lnum < 0)
3792 {
3793 /* Backref was not set: Match an empty string. */
3794 len = 0;
3795 }
3796 else
3797 {
3798 if (rex.reg_startpos[no].lnum == rex.lnum
3799 && rex.reg_endpos[no].lnum == rex.lnum)
3800 {
3801 /* Compare back-ref within the current line. */
3802 len = rex.reg_endpos[no].col
3803 - rex.reg_startpos[no].col;
3804 if (cstrncmp(rex.line + rex.reg_startpos[no].col,
3805 rex.input, &len) != 0)
3806 status = RA_NOMATCH;
3807 }
3808 else
3809 {
3810 /* Messy situation: Need to compare between two
3811 * lines. */
3812 int r = match_with_backref(
3813 rex.reg_startpos[no].lnum,
3814 rex.reg_startpos[no].col,
3815 rex.reg_endpos[no].lnum,
3816 rex.reg_endpos[no].col,
3817 &len);
3818
3819 if (r != RA_MATCH)
3820 status = r;
3821 }
3822 }
3823 }
3824
3825 /* Matched the backref, skip over it. */
3826 rex.input += len;
3827 }
3828 break;
3829
3830#ifdef FEAT_SYN_HL
3831 case ZREF + 1:
3832 case ZREF + 2:
3833 case ZREF + 3:
3834 case ZREF + 4:
3835 case ZREF + 5:
3836 case ZREF + 6:
3837 case ZREF + 7:
3838 case ZREF + 8:
3839 case ZREF + 9:
3840 {
3841 int len;
3842
3843 cleanup_zsubexpr();
3844 no = op - ZREF;
3845 if (re_extmatch_in != NULL
3846 && re_extmatch_in->matches[no] != NULL)
3847 {
3848 len = (int)STRLEN(re_extmatch_in->matches[no]);
3849 if (cstrncmp(re_extmatch_in->matches[no],
3850 rex.input, &len) != 0)
3851 status = RA_NOMATCH;
3852 else
3853 rex.input += len;
3854 }
3855 else
3856 {
3857 /* Backref was not set: Match an empty string. */
3858 }
3859 }
3860 break;
3861#endif
3862
3863 case BRANCH:
3864 {
3865 if (OP(next) != BRANCH) /* No choice. */
3866 next = OPERAND(scan); /* Avoid recursion. */
3867 else
3868 {
3869 rp = regstack_push(RS_BRANCH, scan);
3870 if (rp == NULL)
3871 status = RA_FAIL;
3872 else
3873 status = RA_BREAK; /* rest is below */
3874 }
3875 }
3876 break;
3877
3878 case BRACE_LIMITS:
3879 {
3880 if (OP(next) == BRACE_SIMPLE)
3881 {
3882 bl_minval = OPERAND_MIN(scan);
3883 bl_maxval = OPERAND_MAX(scan);
3884 }
3885 else if (OP(next) >= BRACE_COMPLEX
3886 && OP(next) < BRACE_COMPLEX + 10)
3887 {
3888 no = OP(next) - BRACE_COMPLEX;
3889 brace_min[no] = OPERAND_MIN(scan);
3890 brace_max[no] = OPERAND_MAX(scan);
3891 brace_count[no] = 0;
3892 }
3893 else
3894 {
3895 internal_error("BRACE_LIMITS");
3896 status = RA_FAIL;
3897 }
3898 }
3899 break;
3900
3901 case BRACE_COMPLEX + 0:
3902 case BRACE_COMPLEX + 1:
3903 case BRACE_COMPLEX + 2:
3904 case BRACE_COMPLEX + 3:
3905 case BRACE_COMPLEX + 4:
3906 case BRACE_COMPLEX + 5:
3907 case BRACE_COMPLEX + 6:
3908 case BRACE_COMPLEX + 7:
3909 case BRACE_COMPLEX + 8:
3910 case BRACE_COMPLEX + 9:
3911 {
3912 no = op - BRACE_COMPLEX;
3913 ++brace_count[no];
3914
3915 /* If not matched enough times yet, try one more */
3916 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
3917 ? brace_min[no] : brace_max[no]))
3918 {
3919 rp = regstack_push(RS_BRCPLX_MORE, scan);
3920 if (rp == NULL)
3921 status = RA_FAIL;
3922 else
3923 {
3924 rp->rs_no = no;
3925 reg_save(&rp->rs_un.regsave, &backpos);
3926 next = OPERAND(scan);
3927 /* We continue and handle the result when done. */
3928 }
3929 break;
3930 }
3931
3932 /* If matched enough times, may try matching some more */
3933 if (brace_min[no] <= brace_max[no])
3934 {
3935 /* Range is the normal way around, use longest match */
3936 if (brace_count[no] <= brace_max[no])
3937 {
3938 rp = regstack_push(RS_BRCPLX_LONG, scan);
3939 if (rp == NULL)
3940 status = RA_FAIL;
3941 else
3942 {
3943 rp->rs_no = no;
3944 reg_save(&rp->rs_un.regsave, &backpos);
3945 next = OPERAND(scan);
3946 /* We continue and handle the result when done. */
3947 }
3948 }
3949 }
3950 else
3951 {
3952 /* Range is backwards, use shortest match first */
3953 if (brace_count[no] <= brace_min[no])
3954 {
3955 rp = regstack_push(RS_BRCPLX_SHORT, scan);
3956 if (rp == NULL)
3957 status = RA_FAIL;
3958 else
3959 {
3960 reg_save(&rp->rs_un.regsave, &backpos);
3961 /* We continue and handle the result when done. */
3962 }
3963 }
3964 }
3965 }
3966 break;
3967
3968 case BRACE_SIMPLE:
3969 case STAR:
3970 case PLUS:
3971 {
3972 regstar_T rst;
3973
3974 /*
3975 * Lookahead to avoid useless match attempts when we know
3976 * what character comes next.
3977 */
3978 if (OP(next) == EXACTLY)
3979 {
3980 rst.nextb = *OPERAND(next);
3981 if (rex.reg_ic)
3982 {
3983 if (MB_ISUPPER(rst.nextb))
3984 rst.nextb_ic = MB_TOLOWER(rst.nextb);
3985 else
3986 rst.nextb_ic = MB_TOUPPER(rst.nextb);
3987 }
3988 else
3989 rst.nextb_ic = rst.nextb;
3990 }
3991 else
3992 {
3993 rst.nextb = NUL;
3994 rst.nextb_ic = NUL;
3995 }
3996 if (op != BRACE_SIMPLE)
3997 {
3998 rst.minval = (op == STAR) ? 0 : 1;
3999 rst.maxval = MAX_LIMIT;
4000 }
4001 else
4002 {
4003 rst.minval = bl_minval;
4004 rst.maxval = bl_maxval;
4005 }
4006
4007 /*
4008 * When maxval > minval, try matching as much as possible, up
4009 * to maxval. When maxval < minval, try matching at least the
4010 * minimal number (since the range is backwards, that's also
4011 * maxval!).
4012 */
4013 rst.count = regrepeat(OPERAND(scan), rst.maxval);
4014 if (got_int)
4015 {
4016 status = RA_FAIL;
4017 break;
4018 }
4019 if (rst.minval <= rst.maxval
4020 ? rst.count >= rst.minval : rst.count >= rst.maxval)
4021 {
4022 /* It could match. Prepare for trying to match what
4023 * follows. The code is below. Parameters are stored in
4024 * a regstar_T on the regstack. */
4025 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4026 {
4027 emsg(_(e_maxmempat));
4028 status = RA_FAIL;
4029 }
4030 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4031 status = RA_FAIL;
4032 else
4033 {
4034 regstack.ga_len += sizeof(regstar_T);
4035 rp = regstack_push(rst.minval <= rst.maxval
4036 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
4037 if (rp == NULL)
4038 status = RA_FAIL;
4039 else
4040 {
4041 *(((regstar_T *)rp) - 1) = rst;
4042 status = RA_BREAK; /* skip the restore bits */
4043 }
4044 }
4045 }
4046 else
4047 status = RA_NOMATCH;
4048
4049 }
4050 break;
4051
4052 case NOMATCH:
4053 case MATCH:
4054 case SUBPAT:
4055 rp = regstack_push(RS_NOMATCH, scan);
4056 if (rp == NULL)
4057 status = RA_FAIL;
4058 else
4059 {
4060 rp->rs_no = op;
4061 reg_save(&rp->rs_un.regsave, &backpos);
4062 next = OPERAND(scan);
4063 /* We continue and handle the result when done. */
4064 }
4065 break;
4066
4067 case BEHIND:
4068 case NOBEHIND:
4069 /* Need a bit of room to store extra positions. */
4070 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4071 {
4072 emsg(_(e_maxmempat));
4073 status = RA_FAIL;
4074 }
4075 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4076 status = RA_FAIL;
4077 else
4078 {
4079 regstack.ga_len += sizeof(regbehind_T);
4080 rp = regstack_push(RS_BEHIND1, scan);
4081 if (rp == NULL)
4082 status = RA_FAIL;
4083 else
4084 {
4085 /* Need to save the subexpr to be able to restore them
4086 * when there is a match but we don't use it. */
4087 save_subexpr(((regbehind_T *)rp) - 1);
4088
4089 rp->rs_no = op;
4090 reg_save(&rp->rs_un.regsave, &backpos);
4091 /* First try if what follows matches. If it does then we
4092 * check the behind match by looping. */
4093 }
4094 }
4095 break;
4096
4097 case BHPOS:
4098 if (REG_MULTI)
4099 {
4100 if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line)
4101 || behind_pos.rs_u.pos.lnum != rex.lnum)
4102 status = RA_NOMATCH;
4103 }
4104 else if (behind_pos.rs_u.ptr != rex.input)
4105 status = RA_NOMATCH;
4106 break;
4107
4108 case NEWL:
4109 if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline
4110 || rex.reg_line_lbr)
4111 && (c != '\n' || !rex.reg_line_lbr))
4112 status = RA_NOMATCH;
4113 else if (rex.reg_line_lbr)
4114 ADVANCE_REGINPUT();
4115 else
4116 reg_nextline();
4117 break;
4118
4119 case END:
4120 status = RA_MATCH; /* Success! */
4121 break;
4122
4123 default:
4124 emsg(_(e_re_corr));
4125#ifdef DEBUG
4126 printf("Illegal op code %d\n", op);
4127#endif
4128 status = RA_FAIL;
4129 break;
4130 }
4131 }
4132
4133 /* If we can't continue sequentially, break the inner loop. */
4134 if (status != RA_CONT)
4135 break;
4136
4137 /* Continue in inner loop, advance to next item. */
4138 scan = next;
4139
4140 } /* end of inner loop */
4141
4142 /*
4143 * If there is something on the regstack execute the code for the state.
4144 * If the state is popped then loop and use the older state.
4145 */
4146 while (regstack.ga_len > 0 && status != RA_FAIL)
4147 {
4148 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4149 switch (rp->rs_state)
4150 {
4151 case RS_NOPEN:
4152 /* Result is passed on as-is, simply pop the state. */
4153 regstack_pop(&scan);
4154 break;
4155
4156 case RS_MOPEN:
4157 /* Pop the state. Restore pointers when there is no match. */
4158 if (status == RA_NOMATCH)
4159 restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
4160 &rex.reg_startp[rp->rs_no]);
4161 regstack_pop(&scan);
4162 break;
4163
4164#ifdef FEAT_SYN_HL
4165 case RS_ZOPEN:
4166 /* Pop the state. Restore pointers when there is no match. */
4167 if (status == RA_NOMATCH)
4168 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4169 &reg_startzp[rp->rs_no]);
4170 regstack_pop(&scan);
4171 break;
4172#endif
4173
4174 case RS_MCLOSE:
4175 /* Pop the state. Restore pointers when there is no match. */
4176 if (status == RA_NOMATCH)
4177 restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
4178 &rex.reg_endp[rp->rs_no]);
4179 regstack_pop(&scan);
4180 break;
4181
4182#ifdef FEAT_SYN_HL
4183 case RS_ZCLOSE:
4184 /* Pop the state. Restore pointers when there is no match. */
4185 if (status == RA_NOMATCH)
4186 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4187 &reg_endzp[rp->rs_no]);
4188 regstack_pop(&scan);
4189 break;
4190#endif
4191
4192 case RS_BRANCH:
4193 if (status == RA_MATCH)
4194 /* this branch matched, use it */
4195 regstack_pop(&scan);
4196 else
4197 {
4198 if (status != RA_BREAK)
4199 {
4200 /* After a non-matching branch: try next one. */
4201 reg_restore(&rp->rs_un.regsave, &backpos);
4202 scan = rp->rs_scan;
4203 }
4204 if (scan == NULL || OP(scan) != BRANCH)
4205 {
4206 /* no more branches, didn't find a match */
4207 status = RA_NOMATCH;
4208 regstack_pop(&scan);
4209 }
4210 else
4211 {
4212 /* Prepare to try a branch. */
4213 rp->rs_scan = regnext(scan);
4214 reg_save(&rp->rs_un.regsave, &backpos);
4215 scan = OPERAND(scan);
4216 }
4217 }
4218 break;
4219
4220 case RS_BRCPLX_MORE:
4221 /* Pop the state. Restore pointers when there is no match. */
4222 if (status == RA_NOMATCH)
4223 {
4224 reg_restore(&rp->rs_un.regsave, &backpos);
4225 --brace_count[rp->rs_no]; /* decrement match count */
4226 }
4227 regstack_pop(&scan);
4228 break;
4229
4230 case RS_BRCPLX_LONG:
4231 /* Pop the state. Restore pointers when there is no match. */
4232 if (status == RA_NOMATCH)
4233 {
4234 /* There was no match, but we did find enough matches. */
4235 reg_restore(&rp->rs_un.regsave, &backpos);
4236 --brace_count[rp->rs_no];
4237 /* continue with the items after "\{}" */
4238 status = RA_CONT;
4239 }
4240 regstack_pop(&scan);
4241 if (status == RA_CONT)
4242 scan = regnext(scan);
4243 break;
4244
4245 case RS_BRCPLX_SHORT:
4246 /* Pop the state. Restore pointers when there is no match. */
4247 if (status == RA_NOMATCH)
4248 /* There was no match, try to match one more item. */
4249 reg_restore(&rp->rs_un.regsave, &backpos);
4250 regstack_pop(&scan);
4251 if (status == RA_NOMATCH)
4252 {
4253 scan = OPERAND(scan);
4254 status = RA_CONT;
4255 }
4256 break;
4257
4258 case RS_NOMATCH:
4259 /* Pop the state. If the operand matches for NOMATCH or
4260 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
4261 * except for SUBPAT, and continue with the next item. */
4262 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
4263 status = RA_NOMATCH;
4264 else
4265 {
4266 status = RA_CONT;
4267 if (rp->rs_no != SUBPAT) /* zero-width */
4268 reg_restore(&rp->rs_un.regsave, &backpos);
4269 }
4270 regstack_pop(&scan);
4271 if (status == RA_CONT)
4272 scan = regnext(scan);
4273 break;
4274
4275 case RS_BEHIND1:
4276 if (status == RA_NOMATCH)
4277 {
4278 regstack_pop(&scan);
4279 regstack.ga_len -= sizeof(regbehind_T);
4280 }
4281 else
4282 {
4283 /* The stuff after BEHIND/NOBEHIND matches. Now try if
4284 * the behind part does (not) match before the current
4285 * position in the input. This must be done at every
4286 * position in the input and checking if the match ends at
4287 * the current position. */
4288
4289 /* save the position after the found match for next */
4290 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
4291
4292 /* Start looking for a match with operand at the current
4293 * position. Go back one character until we find the
4294 * result, hitting the start of the line or the previous
4295 * line (for multi-line matching).
4296 * Set behind_pos to where the match should end, BHPOS
4297 * will match it. Save the current value. */
4298 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
4299 behind_pos = rp->rs_un.regsave;
4300
4301 rp->rs_state = RS_BEHIND2;
4302
4303 reg_restore(&rp->rs_un.regsave, &backpos);
4304 scan = OPERAND(rp->rs_scan) + 4;
4305 }
4306 break;
4307
4308 case RS_BEHIND2:
4309 /*
4310 * Looping for BEHIND / NOBEHIND match.
4311 */
4312 if (status == RA_MATCH && reg_save_equal(&behind_pos))
4313 {
4314 /* found a match that ends where "next" started */
4315 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4316 if (rp->rs_no == BEHIND)
4317 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4318 &backpos);
4319 else
4320 {
4321 /* But we didn't want a match. Need to restore the
4322 * subexpr, because what follows matched, so they have
4323 * been set. */
4324 status = RA_NOMATCH;
4325 restore_subexpr(((regbehind_T *)rp) - 1);
4326 }
4327 regstack_pop(&scan);
4328 regstack.ga_len -= sizeof(regbehind_T);
4329 }
4330 else
4331 {
4332 long limit;
4333
4334 /* No match or a match that doesn't end where we want it: Go
4335 * back one character. May go to previous line once. */
4336 no = OK;
4337 limit = OPERAND_MIN(rp->rs_scan);
4338 if (REG_MULTI)
4339 {
4340 if (limit > 0
4341 && ((rp->rs_un.regsave.rs_u.pos.lnum
4342 < behind_pos.rs_u.pos.lnum
4343 ? (colnr_T)STRLEN(rex.line)
4344 : behind_pos.rs_u.pos.col)
4345 - rp->rs_un.regsave.rs_u.pos.col >= limit))
4346 no = FAIL;
4347 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
4348 {
4349 if (rp->rs_un.regsave.rs_u.pos.lnum
4350 < behind_pos.rs_u.pos.lnum
4351 || reg_getline(
4352 --rp->rs_un.regsave.rs_u.pos.lnum)
4353 == NULL)
4354 no = FAIL;
4355 else
4356 {
4357 reg_restore(&rp->rs_un.regsave, &backpos);
4358 rp->rs_un.regsave.rs_u.pos.col =
4359 (colnr_T)STRLEN(rex.line);
4360 }
4361 }
4362 else
4363 {
4364 if (has_mbyte)
4365 {
4366 char_u *line =
4367 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum);
4368
4369 rp->rs_un.regsave.rs_u.pos.col -=
4370 (*mb_head_off)(line, line
4371 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
4372 }
4373 else
4374 --rp->rs_un.regsave.rs_u.pos.col;
4375 }
4376 }
4377 else
4378 {
4379 if (rp->rs_un.regsave.rs_u.ptr == rex.line)
4380 no = FAIL;
4381 else
4382 {
4383 MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr);
4384 if (limit > 0 && (long)(behind_pos.rs_u.ptr
4385 - rp->rs_un.regsave.rs_u.ptr) > limit)
4386 no = FAIL;
4387 }
4388 }
4389 if (no == OK)
4390 {
4391 /* Advanced, prepare for finding match again. */
4392 reg_restore(&rp->rs_un.regsave, &backpos);
4393 scan = OPERAND(rp->rs_scan) + 4;
4394 if (status == RA_MATCH)
4395 {
4396 /* We did match, so subexpr may have been changed,
4397 * need to restore them for the next try. */
4398 status = RA_NOMATCH;
4399 restore_subexpr(((regbehind_T *)rp) - 1);
4400 }
4401 }
4402 else
4403 {
4404 /* Can't advance. For NOBEHIND that's a match. */
4405 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4406 if (rp->rs_no == NOBEHIND)
4407 {
4408 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4409 &backpos);
4410 status = RA_MATCH;
4411 }
4412 else
4413 {
4414 /* We do want a proper match. Need to restore the
4415 * subexpr if we had a match, because they may have
4416 * been set. */
4417 if (status == RA_MATCH)
4418 {
4419 status = RA_NOMATCH;
4420 restore_subexpr(((regbehind_T *)rp) - 1);
4421 }
4422 }
4423 regstack_pop(&scan);
4424 regstack.ga_len -= sizeof(regbehind_T);
4425 }
4426 }
4427 break;
4428
4429 case RS_STAR_LONG:
4430 case RS_STAR_SHORT:
4431 {
4432 regstar_T *rst = ((regstar_T *)rp) - 1;
4433
4434 if (status == RA_MATCH)
4435 {
4436 regstack_pop(&scan);
4437 regstack.ga_len -= sizeof(regstar_T);
4438 break;
4439 }
4440
4441 /* Tried once already, restore input pointers. */
4442 if (status != RA_BREAK)
4443 reg_restore(&rp->rs_un.regsave, &backpos);
4444
4445 /* Repeat until we found a position where it could match. */
4446 for (;;)
4447 {
4448 if (status != RA_BREAK)
4449 {
4450 /* Tried first position already, advance. */
4451 if (rp->rs_state == RS_STAR_LONG)
4452 {
4453 /* Trying for longest match, but couldn't or
4454 * didn't match -- back up one char. */
4455 if (--rst->count < rst->minval)
4456 break;
4457 if (rex.input == rex.line)
4458 {
4459 /* backup to last char of previous line */
4460 --rex.lnum;
4461 rex.line = reg_getline(rex.lnum);
4462 /* Just in case regrepeat() didn't count
4463 * right. */
4464 if (rex.line == NULL)
4465 break;
4466 rex.input = rex.line + STRLEN(rex.line);
4467 fast_breakcheck();
4468 }
4469 else
4470 MB_PTR_BACK(rex.line, rex.input);
4471 }
4472 else
4473 {
4474 /* Range is backwards, use shortest match first.
4475 * Careful: maxval and minval are exchanged!
4476 * Couldn't or didn't match: try advancing one
4477 * char. */
4478 if (rst->count == rst->minval
4479 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
4480 break;
4481 ++rst->count;
4482 }
4483 if (got_int)
4484 break;
4485 }
4486 else
4487 status = RA_NOMATCH;
4488
4489 /* If it could match, try it. */
4490 if (rst->nextb == NUL || *rex.input == rst->nextb
4491 || *rex.input == rst->nextb_ic)
4492 {
4493 reg_save(&rp->rs_un.regsave, &backpos);
4494 scan = regnext(rp->rs_scan);
4495 status = RA_CONT;
4496 break;
4497 }
4498 }
4499 if (status != RA_CONT)
4500 {
4501 /* Failed. */
4502 regstack_pop(&scan);
4503 regstack.ga_len -= sizeof(regstar_T);
4504 status = RA_NOMATCH;
4505 }
4506 }
4507 break;
4508 }
4509
4510 /* If we want to continue the inner loop or didn't pop a state
4511 * continue matching loop */
4512 if (status == RA_CONT || rp == (regitem_T *)
4513 ((char *)regstack.ga_data + regstack.ga_len) - 1)
4514 break;
4515 }
4516
4517 /* May need to continue with the inner loop, starting at "scan". */
4518 if (status == RA_CONT)
4519 continue;
4520
4521 /*
4522 * If the regstack is empty or something failed we are done.
4523 */
4524 if (regstack.ga_len == 0 || status == RA_FAIL)
4525 {
4526 if (scan == NULL)
4527 {
4528 /*
4529 * We get here only if there's trouble -- normally "case END" is
4530 * the terminating point.
4531 */
4532 emsg(_(e_re_corr));
4533#ifdef DEBUG
4534 printf("Premature EOL\n");
4535#endif
4536 }
4537 return (status == RA_MATCH);
4538 }
4539
4540 } /* End of loop until the regstack is empty. */
4541
4542 /* NOTREACHED */
4543}
4544
4545/*
4546 * regtry - try match of "prog" with at rex.line["col"].
4547 * Returns 0 for failure, number of lines contained in the match otherwise.
4548 */
4549 static long
4550regtry(
4551 bt_regprog_T *prog,
4552 colnr_T col,
4553 proftime_T *tm, // timeout limit or NULL
4554 int *timed_out) // flag set on timeout or NULL
4555{
4556 rex.input = rex.line + col;
4557 rex.need_clear_subexpr = TRUE;
4558#ifdef FEAT_SYN_HL
4559 // Clear the external match subpointers if necessary.
4560 rex.need_clear_zsubexpr = (prog->reghasz == REX_SET);
4561#endif
4562
4563 if (regmatch(prog->program + 1, tm, timed_out) == 0)
4564 return 0;
4565
4566 cleanup_subexpr();
4567 if (REG_MULTI)
4568 {
4569 if (rex.reg_startpos[0].lnum < 0)
4570 {
4571 rex.reg_startpos[0].lnum = 0;
4572 rex.reg_startpos[0].col = col;
4573 }
4574 if (rex.reg_endpos[0].lnum < 0)
4575 {
4576 rex.reg_endpos[0].lnum = rex.lnum;
4577 rex.reg_endpos[0].col = (int)(rex.input - rex.line);
4578 }
4579 else
4580 // Use line number of "\ze".
4581 rex.lnum = rex.reg_endpos[0].lnum;
4582 }
4583 else
4584 {
4585 if (rex.reg_startp[0] == NULL)
4586 rex.reg_startp[0] = rex.line + col;
4587 if (rex.reg_endp[0] == NULL)
4588 rex.reg_endp[0] = rex.input;
4589 }
4590#ifdef FEAT_SYN_HL
4591 // Package any found \z(...\) matches for export. Default is none.
4592 unref_extmatch(re_extmatch_out);
4593 re_extmatch_out = NULL;
4594
4595 if (prog->reghasz == REX_SET)
4596 {
4597 int i;
4598
4599 cleanup_zsubexpr();
4600 re_extmatch_out = make_extmatch();
4601 for (i = 0; i < NSUBEXP; i++)
4602 {
4603 if (REG_MULTI)
4604 {
4605 // Only accept single line matches.
4606 if (reg_startzpos[i].lnum >= 0
4607 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4608 && reg_endzpos[i].col >= reg_startzpos[i].col)
4609 re_extmatch_out->matches[i] =
4610 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
4611 + reg_startzpos[i].col,
4612 reg_endzpos[i].col - reg_startzpos[i].col);
4613 }
4614 else
4615 {
4616 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4617 re_extmatch_out->matches[i] =
4618 vim_strnsave(reg_startzp[i],
4619 (int)(reg_endzp[i] - reg_startzp[i]));
4620 }
4621 }
4622 }
4623#endif
4624 return 1 + rex.lnum;
4625}
4626
4627/*
4628 * Match a regexp against a string ("line" points to the string) or multiple
4629 * lines ("line" is NULL, use reg_getline()).
4630 * Returns 0 for failure, number of lines contained in the match otherwise.
4631 */
4632 static long
4633bt_regexec_both(
4634 char_u *line,
4635 colnr_T col, // column to start looking for match
4636 proftime_T *tm, // timeout limit or NULL
4637 int *timed_out) // flag set on timeout or NULL
4638{
4639 bt_regprog_T *prog;
4640 char_u *s;
4641 long retval = 0L;
4642
4643 // Create "regstack" and "backpos" if they are not allocated yet.
4644 // We allocate *_INITIAL amount of bytes first and then set the grow size
4645 // to much bigger value to avoid many malloc calls in case of deep regular
4646 // expressions.
4647 if (regstack.ga_data == NULL)
4648 {
4649 // Use an item size of 1 byte, since we push different things
4650 // onto the regstack.
4651 ga_init2(&regstack, 1, REGSTACK_INITIAL);
4652 (void)ga_grow(&regstack, REGSTACK_INITIAL);
4653 regstack.ga_growsize = REGSTACK_INITIAL * 8;
4654 }
4655
4656 if (backpos.ga_data == NULL)
4657 {
4658 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
4659 (void)ga_grow(&backpos, BACKPOS_INITIAL);
4660 backpos.ga_growsize = BACKPOS_INITIAL * 8;
4661 }
4662
4663 if (REG_MULTI)
4664 {
4665 prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
4666 line = reg_getline((linenr_T)0);
4667 rex.reg_startpos = rex.reg_mmatch->startpos;
4668 rex.reg_endpos = rex.reg_mmatch->endpos;
4669 }
4670 else
4671 {
4672 prog = (bt_regprog_T *)rex.reg_match->regprog;
4673 rex.reg_startp = rex.reg_match->startp;
4674 rex.reg_endp = rex.reg_match->endp;
4675 }
4676
4677 // Be paranoid...
4678 if (prog == NULL || line == NULL)
4679 {
4680 emsg(_(e_null));
4681 goto theend;
4682 }
4683
4684 // Check validity of program.
4685 if (prog_magic_wrong())
4686 goto theend;
4687
4688 // If the start column is past the maximum column: no need to try.
4689 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
4690 goto theend;
4691
4692 // If pattern contains "\c" or "\C": overrule value of rex.reg_ic
4693 if (prog->regflags & RF_ICASE)
4694 rex.reg_ic = TRUE;
4695 else if (prog->regflags & RF_NOICASE)
4696 rex.reg_ic = FALSE;
4697
4698 // If pattern contains "\Z" overrule value of rex.reg_icombine
4699 if (prog->regflags & RF_ICOMBINE)
4700 rex.reg_icombine = TRUE;
4701
4702 // If there is a "must appear" string, look for it.
4703 if (prog->regmust != NULL)
4704 {
4705 int c;
4706
4707 if (has_mbyte)
4708 c = (*mb_ptr2char)(prog->regmust);
4709 else
4710 c = *prog->regmust;
4711 s = line + col;
4712
4713 // This is used very often, esp. for ":global". Use three versions of
4714 // the loop to avoid overhead of conditions.
4715 if (!rex.reg_ic && !has_mbyte)
4716 while ((s = vim_strbyte(s, c)) != NULL)
4717 {
4718 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4719 break; // Found it.
4720 ++s;
4721 }
4722 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
4723 while ((s = vim_strchr(s, c)) != NULL)
4724 {
4725 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4726 break; // Found it.
4727 MB_PTR_ADV(s);
4728 }
4729 else
4730 while ((s = cstrchr(s, c)) != NULL)
4731 {
4732 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4733 break; // Found it.
4734 MB_PTR_ADV(s);
4735 }
4736 if (s == NULL) // Not present.
4737 goto theend;
4738 }
4739
4740 rex.line = line;
4741 rex.lnum = 0;
4742 reg_toolong = FALSE;
4743
4744 // Simplest case: Anchored match need be tried only once.
4745 if (prog->reganch)
4746 {
4747 int c;
4748
4749 if (has_mbyte)
4750 c = (*mb_ptr2char)(rex.line + col);
4751 else
4752 c = rex.line[col];
4753 if (prog->regstart == NUL
4754 || prog->regstart == c
4755 || (rex.reg_ic
4756 && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
4757 || (c < 255 && prog->regstart < 255 &&
4758 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
4759 retval = regtry(prog, col, tm, timed_out);
4760 else
4761 retval = 0;
4762 }
4763 else
4764 {
4765#ifdef FEAT_RELTIME
4766 int tm_count = 0;
4767#endif
4768 // Messy cases: unanchored match.
4769 while (!got_int)
4770 {
4771 if (prog->regstart != NUL)
4772 {
4773 // Skip until the char we know it must start with.
4774 // Used often, do some work to avoid call overhead.
4775 if (!rex.reg_ic && !has_mbyte)
4776 s = vim_strbyte(rex.line + col, prog->regstart);
4777 else
4778 s = cstrchr(rex.line + col, prog->regstart);
4779 if (s == NULL)
4780 {
4781 retval = 0;
4782 break;
4783 }
4784 col = (int)(s - rex.line);
4785 }
4786
4787 // Check for maximum column to try.
4788 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
4789 {
4790 retval = 0;
4791 break;
4792 }
4793
4794 retval = regtry(prog, col, tm, timed_out);
4795 if (retval > 0)
4796 break;
4797
4798 // if not currently on the first line, get it again
4799 if (rex.lnum != 0)
4800 {
4801 rex.lnum = 0;
4802 rex.line = reg_getline((linenr_T)0);
4803 }
4804 if (rex.line[col] == NUL)
4805 break;
4806 if (has_mbyte)
4807 col += (*mb_ptr2len)(rex.line + col);
4808 else
4809 ++col;
4810#ifdef FEAT_RELTIME
4811 // Check for timeout once in a twenty times to avoid overhead.
4812 if (tm != NULL && ++tm_count == 20)
4813 {
4814 tm_count = 0;
4815 if (profile_passed_limit(tm))
4816 {
4817 if (timed_out != NULL)
4818 *timed_out = TRUE;
4819 break;
4820 }
4821 }
4822#endif
4823 }
4824 }
4825
4826theend:
4827 // Free "reg_tofree" when it's a bit big.
4828 // Free regstack and backpos if they are bigger than their initial size.
4829 if (reg_tofreelen > 400)
4830 VIM_CLEAR(reg_tofree);
4831 if (regstack.ga_maxlen > REGSTACK_INITIAL)
4832 ga_clear(&regstack);
4833 if (backpos.ga_maxlen > BACKPOS_INITIAL)
4834 ga_clear(&backpos);
4835
4836 return retval;
4837}
4838
4839/*
4840 * Match a regexp against a string.
4841 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
4842 * Uses curbuf for line count and 'iskeyword'.
4843 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
4844 *
4845 * Returns 0 for failure, number of lines contained in the match otherwise.
4846 */
4847 static int
4848bt_regexec_nl(
4849 regmatch_T *rmp,
4850 char_u *line, // string to match against
4851 colnr_T col, // column to start looking for match
4852 int line_lbr)
4853{
4854 rex.reg_match = rmp;
4855 rex.reg_mmatch = NULL;
4856 rex.reg_maxline = 0;
4857 rex.reg_line_lbr = line_lbr;
4858 rex.reg_buf = curbuf;
4859 rex.reg_win = NULL;
4860 rex.reg_ic = rmp->rm_ic;
4861 rex.reg_icombine = FALSE;
4862 rex.reg_maxcol = 0;
4863
4864 return bt_regexec_both(line, col, NULL, NULL);
4865}
4866
4867/*
4868 * Match a regexp against multiple lines.
4869 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
4870 * Uses curbuf for line count and 'iskeyword'.
4871 *
4872 * Return zero if there is no match. Return number of lines contained in the
4873 * match otherwise.
4874 */
4875 static long
4876bt_regexec_multi(
4877 regmmatch_T *rmp,
4878 win_T *win, // window in which to search or NULL
4879 buf_T *buf, // buffer in which to search
4880 linenr_T lnum, // nr of line to start looking for match
4881 colnr_T col, // column to start looking for match
4882 proftime_T *tm, // timeout limit or NULL
4883 int *timed_out) // flag set on timeout or NULL
4884{
4885 rex.reg_match = NULL;
4886 rex.reg_mmatch = rmp;
4887 rex.reg_buf = buf;
4888 rex.reg_win = win;
4889 rex.reg_firstlnum = lnum;
4890 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
4891 rex.reg_line_lbr = FALSE;
4892 rex.reg_ic = rmp->rmm_ic;
4893 rex.reg_icombine = FALSE;
4894 rex.reg_maxcol = rmp->rmm_maxcol;
4895
4896 return bt_regexec_both(NULL, col, tm, timed_out);
4897}
4898
4899/*
4900 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
4901 */
4902 static int
4903re_num_cmp(long_u val, char_u *scan)
4904{
4905 long_u n = OPERAND_MIN(scan);
4906
4907 if (OPERAND_CMP(scan) == '>')
4908 return val > n;
4909 if (OPERAND_CMP(scan) == '<')
4910 return val < n;
4911 return val == n;
4912}
4913
4914#ifdef BT_REGEXP_DUMP
4915
4916/*
4917 * regdump - dump a regexp onto stdout in vaguely comprehensible form
4918 */
4919 static void
4920regdump(char_u *pattern, bt_regprog_T *r)
4921{
4922 char_u *s;
4923 int op = EXACTLY; // Arbitrary non-END op.
4924 char_u *next;
4925 char_u *end = NULL;
4926 FILE *f;
4927
4928#ifdef BT_REGEXP_LOG
4929 f = fopen("bt_regexp_log.log", "a");
4930#else
4931 f = stdout;
4932#endif
4933 if (f == NULL)
4934 return;
4935 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
4936
4937 s = r->program + 1;
4938 // Loop until we find the END that isn't before a referred next (an END
4939 // can also appear in a NOMATCH operand).
4940 while (op != END || s <= end)
4941 {
4942 op = OP(s);
4943 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); // Where, what.
4944 next = regnext(s);
4945 if (next == NULL) // Next ptr.
4946 fprintf(f, "(0)");
4947 else
4948 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
4949 if (end < next)
4950 end = next;
4951 if (op == BRACE_LIMITS)
4952 {
4953 // Two ints
4954 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
4955 s += 8;
4956 }
4957 else if (op == BEHIND || op == NOBEHIND)
4958 {
4959 // one int
4960 fprintf(f, " count %ld", OPERAND_MIN(s));
4961 s += 4;
4962 }
4963 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
4964 {
4965 // one int plus comparator
4966 fprintf(f, " count %ld", OPERAND_MIN(s));
4967 s += 5;
4968 }
4969 s += 3;
4970 if (op == ANYOF || op == ANYOF + ADD_NL
4971 || op == ANYBUT || op == ANYBUT + ADD_NL
4972 || op == EXACTLY)
4973 {
4974 // Literal string, where present.
4975 fprintf(f, "\nxxxxxxxxx\n");
4976 while (*s != NUL)
4977 fprintf(f, "%c", *s++);
4978 fprintf(f, "\nxxxxxxxxx\n");
4979 s++;
4980 }
4981 fprintf(f, "\r\n");
4982 }
4983
4984 // Header fields of interest.
4985 if (r->regstart != NUL)
4986 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
4987 ? (char *)transchar(r->regstart)
4988 : "multibyte", r->regstart);
4989 if (r->reganch)
4990 fprintf(f, "anchored; ");
4991 if (r->regmust != NULL)
4992 fprintf(f, "must have \"%s\"", r->regmust);
4993 fprintf(f, "\r\n");
4994
4995#ifdef BT_REGEXP_LOG
4996 fclose(f);
4997#endif
4998}
4999#endif // BT_REGEXP_DUMP
5000
5001#ifdef DEBUG
5002/*
5003 * regprop - printable representation of opcode
5004 */
5005 static char_u *
5006regprop(char_u *op)
5007{
5008 char *p;
5009 static char buf[50];
5010
5011 STRCPY(buf, ":");
5012
5013 switch ((int) OP(op))
5014 {
5015 case BOL:
5016 p = "BOL";
5017 break;
5018 case EOL:
5019 p = "EOL";
5020 break;
5021 case RE_BOF:
5022 p = "BOF";
5023 break;
5024 case RE_EOF:
5025 p = "EOF";
5026 break;
5027 case CURSOR:
5028 p = "CURSOR";
5029 break;
5030 case RE_VISUAL:
5031 p = "RE_VISUAL";
5032 break;
5033 case RE_LNUM:
5034 p = "RE_LNUM";
5035 break;
5036 case RE_MARK:
5037 p = "RE_MARK";
5038 break;
5039 case RE_COL:
5040 p = "RE_COL";
5041 break;
5042 case RE_VCOL:
5043 p = "RE_VCOL";
5044 break;
5045 case BOW:
5046 p = "BOW";
5047 break;
5048 case EOW:
5049 p = "EOW";
5050 break;
5051 case ANY:
5052 p = "ANY";
5053 break;
5054 case ANY + ADD_NL:
5055 p = "ANY+NL";
5056 break;
5057 case ANYOF:
5058 p = "ANYOF";
5059 break;
5060 case ANYOF + ADD_NL:
5061 p = "ANYOF+NL";
5062 break;
5063 case ANYBUT:
5064 p = "ANYBUT";
5065 break;
5066 case ANYBUT + ADD_NL:
5067 p = "ANYBUT+NL";
5068 break;
5069 case IDENT:
5070 p = "IDENT";
5071 break;
5072 case IDENT + ADD_NL:
5073 p = "IDENT+NL";
5074 break;
5075 case SIDENT:
5076 p = "SIDENT";
5077 break;
5078 case SIDENT + ADD_NL:
5079 p = "SIDENT+NL";
5080 break;
5081 case KWORD:
5082 p = "KWORD";
5083 break;
5084 case KWORD + ADD_NL:
5085 p = "KWORD+NL";
5086 break;
5087 case SKWORD:
5088 p = "SKWORD";
5089 break;
5090 case SKWORD + ADD_NL:
5091 p = "SKWORD+NL";
5092 break;
5093 case FNAME:
5094 p = "FNAME";
5095 break;
5096 case FNAME + ADD_NL:
5097 p = "FNAME+NL";
5098 break;
5099 case SFNAME:
5100 p = "SFNAME";
5101 break;
5102 case SFNAME + ADD_NL:
5103 p = "SFNAME+NL";
5104 break;
5105 case PRINT:
5106 p = "PRINT";
5107 break;
5108 case PRINT + ADD_NL:
5109 p = "PRINT+NL";
5110 break;
5111 case SPRINT:
5112 p = "SPRINT";
5113 break;
5114 case SPRINT + ADD_NL:
5115 p = "SPRINT+NL";
5116 break;
5117 case WHITE:
5118 p = "WHITE";
5119 break;
5120 case WHITE + ADD_NL:
5121 p = "WHITE+NL";
5122 break;
5123 case NWHITE:
5124 p = "NWHITE";
5125 break;
5126 case NWHITE + ADD_NL:
5127 p = "NWHITE+NL";
5128 break;
5129 case DIGIT:
5130 p = "DIGIT";
5131 break;
5132 case DIGIT + ADD_NL:
5133 p = "DIGIT+NL";
5134 break;
5135 case NDIGIT:
5136 p = "NDIGIT";
5137 break;
5138 case NDIGIT + ADD_NL:
5139 p = "NDIGIT+NL";
5140 break;
5141 case HEX:
5142 p = "HEX";
5143 break;
5144 case HEX + ADD_NL:
5145 p = "HEX+NL";
5146 break;
5147 case NHEX:
5148 p = "NHEX";
5149 break;
5150 case NHEX + ADD_NL:
5151 p = "NHEX+NL";
5152 break;
5153 case OCTAL:
5154 p = "OCTAL";
5155 break;
5156 case OCTAL + ADD_NL:
5157 p = "OCTAL+NL";
5158 break;
5159 case NOCTAL:
5160 p = "NOCTAL";
5161 break;
5162 case NOCTAL + ADD_NL:
5163 p = "NOCTAL+NL";
5164 break;
5165 case WORD:
5166 p = "WORD";
5167 break;
5168 case WORD + ADD_NL:
5169 p = "WORD+NL";
5170 break;
5171 case NWORD:
5172 p = "NWORD";
5173 break;
5174 case NWORD + ADD_NL:
5175 p = "NWORD+NL";
5176 break;
5177 case HEAD:
5178 p = "HEAD";
5179 break;
5180 case HEAD + ADD_NL:
5181 p = "HEAD+NL";
5182 break;
5183 case NHEAD:
5184 p = "NHEAD";
5185 break;
5186 case NHEAD + ADD_NL:
5187 p = "NHEAD+NL";
5188 break;
5189 case ALPHA:
5190 p = "ALPHA";
5191 break;
5192 case ALPHA + ADD_NL:
5193 p = "ALPHA+NL";
5194 break;
5195 case NALPHA:
5196 p = "NALPHA";
5197 break;
5198 case NALPHA + ADD_NL:
5199 p = "NALPHA+NL";
5200 break;
5201 case LOWER:
5202 p = "LOWER";
5203 break;
5204 case LOWER + ADD_NL:
5205 p = "LOWER+NL";
5206 break;
5207 case NLOWER:
5208 p = "NLOWER";
5209 break;
5210 case NLOWER + ADD_NL:
5211 p = "NLOWER+NL";
5212 break;
5213 case UPPER:
5214 p = "UPPER";
5215 break;
5216 case UPPER + ADD_NL:
5217 p = "UPPER+NL";
5218 break;
5219 case NUPPER:
5220 p = "NUPPER";
5221 break;
5222 case NUPPER + ADD_NL:
5223 p = "NUPPER+NL";
5224 break;
5225 case BRANCH:
5226 p = "BRANCH";
5227 break;
5228 case EXACTLY:
5229 p = "EXACTLY";
5230 break;
5231 case NOTHING:
5232 p = "NOTHING";
5233 break;
5234 case BACK:
5235 p = "BACK";
5236 break;
5237 case END:
5238 p = "END";
5239 break;
5240 case MOPEN + 0:
5241 p = "MATCH START";
5242 break;
5243 case MOPEN + 1:
5244 case MOPEN + 2:
5245 case MOPEN + 3:
5246 case MOPEN + 4:
5247 case MOPEN + 5:
5248 case MOPEN + 6:
5249 case MOPEN + 7:
5250 case MOPEN + 8:
5251 case MOPEN + 9:
5252 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
5253 p = NULL;
5254 break;
5255 case MCLOSE + 0:
5256 p = "MATCH END";
5257 break;
5258 case MCLOSE + 1:
5259 case MCLOSE + 2:
5260 case MCLOSE + 3:
5261 case MCLOSE + 4:
5262 case MCLOSE + 5:
5263 case MCLOSE + 6:
5264 case MCLOSE + 7:
5265 case MCLOSE + 8:
5266 case MCLOSE + 9:
5267 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
5268 p = NULL;
5269 break;
5270 case BACKREF + 1:
5271 case BACKREF + 2:
5272 case BACKREF + 3:
5273 case BACKREF + 4:
5274 case BACKREF + 5:
5275 case BACKREF + 6:
5276 case BACKREF + 7:
5277 case BACKREF + 8:
5278 case BACKREF + 9:
5279 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
5280 p = NULL;
5281 break;
5282 case NOPEN:
5283 p = "NOPEN";
5284 break;
5285 case NCLOSE:
5286 p = "NCLOSE";
5287 break;
5288#ifdef FEAT_SYN_HL
5289 case ZOPEN + 1:
5290 case ZOPEN + 2:
5291 case ZOPEN + 3:
5292 case ZOPEN + 4:
5293 case ZOPEN + 5:
5294 case ZOPEN + 6:
5295 case ZOPEN + 7:
5296 case ZOPEN + 8:
5297 case ZOPEN + 9:
5298 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
5299 p = NULL;
5300 break;
5301 case ZCLOSE + 1:
5302 case ZCLOSE + 2:
5303 case ZCLOSE + 3:
5304 case ZCLOSE + 4:
5305 case ZCLOSE + 5:
5306 case ZCLOSE + 6:
5307 case ZCLOSE + 7:
5308 case ZCLOSE + 8:
5309 case ZCLOSE + 9:
5310 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
5311 p = NULL;
5312 break;
5313 case ZREF + 1:
5314 case ZREF + 2:
5315 case ZREF + 3:
5316 case ZREF + 4:
5317 case ZREF + 5:
5318 case ZREF + 6:
5319 case ZREF + 7:
5320 case ZREF + 8:
5321 case ZREF + 9:
5322 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
5323 p = NULL;
5324 break;
5325#endif
5326 case STAR:
5327 p = "STAR";
5328 break;
5329 case PLUS:
5330 p = "PLUS";
5331 break;
5332 case NOMATCH:
5333 p = "NOMATCH";
5334 break;
5335 case MATCH:
5336 p = "MATCH";
5337 break;
5338 case BEHIND:
5339 p = "BEHIND";
5340 break;
5341 case NOBEHIND:
5342 p = "NOBEHIND";
5343 break;
5344 case SUBPAT:
5345 p = "SUBPAT";
5346 break;
5347 case BRACE_LIMITS:
5348 p = "BRACE_LIMITS";
5349 break;
5350 case BRACE_SIMPLE:
5351 p = "BRACE_SIMPLE";
5352 break;
5353 case BRACE_COMPLEX + 0:
5354 case BRACE_COMPLEX + 1:
5355 case BRACE_COMPLEX + 2:
5356 case BRACE_COMPLEX + 3:
5357 case BRACE_COMPLEX + 4:
5358 case BRACE_COMPLEX + 5:
5359 case BRACE_COMPLEX + 6:
5360 case BRACE_COMPLEX + 7:
5361 case BRACE_COMPLEX + 8:
5362 case BRACE_COMPLEX + 9:
5363 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
5364 p = NULL;
5365 break;
5366 case MULTIBYTECODE:
5367 p = "MULTIBYTECODE";
5368 break;
5369 case NEWL:
5370 p = "NEWL";
5371 break;
5372 default:
5373 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
5374 p = NULL;
5375 break;
5376 }
5377 if (p != NULL)
5378 STRCAT(buf, p);
5379 return (char_u *)buf;
5380}
5381#endif // DEBUG