blob: dae6d519be58283de07e430e06bb2f2d70b260f0 [file] [log] [blame]
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * Backtracking regular expression implementation.
4 *
5 * This file is included in "regexp.c".
6 *
7 * NOTICE:
8 *
9 * This is NOT the original regular expression code as written by Henry
10 * Spencer. This code has been modified specifically for use with the VIM
11 * editor, and should not be used separately from Vim. If you want a good
12 * regular expression library, get the original code. The copyright notice
13 * that follows is from the original.
14 *
15 * END NOTICE
16 *
17 * Copyright (c) 1986 by University of Toronto.
18 * Written by Henry Spencer. Not derived from licensed software.
19 *
20 * Permission is granted to anyone to use this software for any
21 * purpose on any computer system, and to redistribute it freely,
22 * subject to the following restrictions:
23 *
24 * 1. The author is not responsible for the consequences of use of
25 * this software, no matter how awful, even if they arise
26 * from defects in it.
27 *
28 * 2. The origin of this software must not be misrepresented, either
29 * by explicit claim or by omission.
30 *
31 * 3. Altered versions must be plainly marked as such, and must not
32 * be misrepresented as being the original software.
33 *
34 * Beware that some of this code is subtly aware of the way operator
35 * precedence is structured in regular expressions. Serious changes in
36 * regular-expression syntax might require a total rethink.
37 *
38 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
39 * Webb, Ciaran McCreesh and Bram Moolenaar.
40 * Named character class support added by Walter Briscoe (1998 Jul 01)
41 */
42
43/*
44 * The "internal use only" fields in regexp.h are present to pass info from
45 * compile to execute that permits the execute phase to run lots faster on
46 * simple cases. They are:
47 *
48 * regstart char that must begin a match; NUL if none obvious; Can be a
49 * multi-byte character.
50 * reganch is the match anchored (at beginning-of-line only)?
51 * regmust string (pointer into program) that match must include, or NULL
52 * regmlen length of regmust string
53 * regflags RF_ values or'ed together
54 *
55 * Regstart and reganch permit very fast decisions on suitable starting points
56 * for a match, cutting down the work a lot. Regmust permits fast rejection
57 * of lines that cannot possibly match. The regmust tests are costly enough
58 * that vim_regcomp() supplies a regmust only if the r.e. contains something
59 * potentially expensive (at present, the only such thing detected is * or +
60 * at the start of the r.e., which can involve a lot of backup). Regmlen is
61 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
62 * computing it anyway.
63 */
64
65/*
66 * Structure for regexp "program". This is essentially a linear encoding
67 * of a nondeterministic finite-state machine (aka syntax charts or
68 * "railroad normal form" in parsing technology). Each node is an opcode
69 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
70 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
71 * pointer with a BRANCH on both ends of it is connecting two alternatives.
72 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
73 * (as opposed to a collection of them) is never concatenated with anything
74 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
75 * node points to the node after the stuff to be repeated.
76 * The operand of some types of node is a literal string; for others, it is a
77 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
78 * is the first node of the branch.
79 * (NB this is *not* a tree structure: the tail of the branch connects to the
80 * thing following the set of BRANCHes.)
81 *
82 * pattern is coded like:
83 *
84 * +-----------------+
85 * | V
86 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
87 * | ^ | ^
88 * +------+ +----------+
89 *
90 *
91 * +------------------+
92 * V |
93 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
94 * | | ^ ^
95 * | +---------------+ |
96 * +---------------------------------------------+
97 *
98 *
99 * +----------------------+
100 * V |
101 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
102 * | | ^ ^
103 * | +-----------+ |
104 * +--------------------------------------------------+
105 *
106 *
107 * +-------------------------+
108 * V |
109 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
110 * | | ^
111 * | +----------------+
112 * +-----------------------------------------------+
113 *
114 *
115 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
116 * | | ^ ^
117 * | +----------------+ |
118 * +--------------------------------+
119 *
120 * +---------+
121 * | V
122 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
123 * | | | | ^ ^
124 * | | | +-----+ |
125 * | | +----------------+ |
126 * | +---------------------------+ |
127 * +------------------------------------------------------+
128 *
129 * They all start with a BRANCH for "\|" alternatives, even when there is only
130 * one alternative.
131 */
132
133/*
134 * The opcodes are:
135 */
136
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200137// definition number opnd? meaning
138#define END 0 // End of program or NOMATCH operand.
139#define BOL 1 // Match "" at beginning of line.
140#define EOL 2 // Match "" at end of line.
141#define BRANCH 3 // node Match this alternative, or the
142 // next...
143#define BACK 4 // Match "", "next" ptr points backward.
144#define EXACTLY 5 // str Match this string.
145#define NOTHING 6 // Match empty string.
146#define STAR 7 // node Match this (simple) thing 0 or more
147 // times.
148#define PLUS 8 // node Match this (simple) thing 1 or more
149 // times.
150#define MATCH 9 // node match the operand zero-width
151#define NOMATCH 10 // node check for no match with operand
152#define BEHIND 11 // node look behind for a match with operand
153#define NOBEHIND 12 // node look behind for no match with operand
154#define SUBPAT 13 // node match the operand here
155#define BRACE_SIMPLE 14 // node Match this (simple) thing between m and
156 // n times (\{m,n\}).
157#define BOW 15 // Match "" after [^a-zA-Z0-9_]
158#define EOW 16 // Match "" at [^a-zA-Z0-9_]
159#define BRACE_LIMITS 17 // nr nr define the min & max for BRACE_SIMPLE
160 // and BRACE_COMPLEX.
161#define NEWL 18 // Match line-break
162#define BHPOS 19 // End position for BEHIND or NOBEHIND
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200163
164
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200165// character classes: 20-48 normal, 50-78 include a line-break
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200166#define ADD_NL 30
167#define FIRST_NL ANY + ADD_NL
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200168#define ANY 20 // Match any one character.
169#define ANYOF 21 // str Match any character in this string.
170#define ANYBUT 22 // str Match any character not in this
171 // string.
172#define IDENT 23 // Match identifier char
173#define SIDENT 24 // Match identifier char but no digit
174#define KWORD 25 // Match keyword char
175#define SKWORD 26 // Match word char but no digit
176#define FNAME 27 // Match file name char
177#define SFNAME 28 // Match file name char but no digit
178#define PRINT 29 // Match printable char
179#define SPRINT 30 // Match printable char but no digit
180#define WHITE 31 // Match whitespace char
181#define NWHITE 32 // Match non-whitespace char
182#define DIGIT 33 // Match digit char
183#define NDIGIT 34 // Match non-digit char
184#define HEX 35 // Match hex char
185#define NHEX 36 // Match non-hex char
186#define OCTAL 37 // Match octal char
187#define NOCTAL 38 // Match non-octal char
188#define WORD 39 // Match word char
189#define NWORD 40 // Match non-word char
190#define HEAD 41 // Match head char
191#define NHEAD 42 // Match non-head char
192#define ALPHA 43 // Match alpha char
193#define NALPHA 44 // Match non-alpha char
194#define LOWER 45 // Match lowercase char
195#define NLOWER 46 // Match non-lowercase char
196#define UPPER 47 // Match uppercase char
197#define NUPPER 48 // Match non-uppercase char
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200198#define LAST_NL NUPPER + ADD_NL
199#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
200
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200201#define MOPEN 80 // -89 Mark this point in input as start of
202 // \( subexpr. MOPEN + 0 marks start of
203 // match.
204#define MCLOSE 90 // -99 Analogous to MOPEN. MCLOSE + 0 marks
205 // end of match.
206#define BACKREF 100 // -109 node Match same string again \1-\9
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200207
208#ifdef FEAT_SYN_HL
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200209# define ZOPEN 110 // -119 Mark this point in input as start of
210 // \z( subexpr.
211# define ZCLOSE 120 // -129 Analogous to ZOPEN.
212# define ZREF 130 // -139 node Match external submatch \z1-\z9
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200213#endif
214
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200215#define BRACE_COMPLEX 140 // -149 node Match nodes between m & n times
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200216
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200217#define NOPEN 150 // Mark this point in input as start of
218 // \%( subexpr.
219#define NCLOSE 151 // Analogous to NOPEN.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200220
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200221#define MULTIBYTECODE 200 // mbc Match one multi-byte character
222#define RE_BOF 201 // Match "" at beginning of file.
223#define RE_EOF 202 // Match "" at end of file.
224#define CURSOR 203 // Match location of cursor.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200225
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200226#define RE_LNUM 204 // nr cmp Match line number
227#define RE_COL 205 // nr cmp Match column number
228#define RE_VCOL 206 // nr cmp Match virtual column number
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200229
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200230#define RE_MARK 207 // mark cmp Match mark position
231#define RE_VISUAL 208 // Match Visual area
232#define RE_COMPOSING 209 // any composing characters
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200233
234/*
235 * Flags to be passed up and down.
236 */
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200237#define HASWIDTH 0x1 // Known never to match null string.
238#define SIMPLE 0x2 // Simple enough to be STAR/PLUS operand.
239#define SPSTART 0x4 // Starts with * or +.
240#define HASNL 0x8 // Contains some \n.
241#define HASLOOKBH 0x10 // Contains "\@<=" or "\@<!".
242#define WORST 0 // Worst case.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200243
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200244static int num_complex_braces; // Complex \{...} count
245static char_u *regcode; // Code-emit pointer, or JUST_CALC_SIZE
246static long regsize; // Code size.
247static int reg_toolong; // TRUE when offset out of range
248static char_u had_endbrace[NSUBEXP]; // flags, TRUE if end of () found
249static long brace_min[10]; // Minimums for complex brace repeats
250static long brace_max[10]; // Maximums for complex brace repeats
251static int brace_count[10]; // Current counts for complex brace repeats
252static int one_exactly = FALSE; // only do one char for EXACTLY
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200253
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200254// When making changes to classchars also change nfa_classcodes.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200255static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
256static int classcodes[] = {
257 ANY, IDENT, SIDENT, KWORD, SKWORD,
258 FNAME, SFNAME, PRINT, SPRINT,
259 WHITE, NWHITE, DIGIT, NDIGIT,
260 HEX, NHEX, OCTAL, NOCTAL,
261 WORD, NWORD, HEAD, NHEAD,
262 ALPHA, NALPHA, LOWER, NLOWER,
263 UPPER, NUPPER
264};
265
266/*
267 * When regcode is set to this value, code is not emitted and size is computed
268 * instead.
269 */
270#define JUST_CALC_SIZE ((char_u *) -1)
271
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200272// Values for rs_state in regitem_T.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200273typedef enum regstate_E
274{
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200275 RS_NOPEN = 0 // NOPEN and NCLOSE
276 , RS_MOPEN // MOPEN + [0-9]
277 , RS_MCLOSE // MCLOSE + [0-9]
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200278#ifdef FEAT_SYN_HL
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200279 , RS_ZOPEN // ZOPEN + [0-9]
280 , RS_ZCLOSE // ZCLOSE + [0-9]
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200281#endif
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200282 , RS_BRANCH // BRANCH
283 , RS_BRCPLX_MORE // BRACE_COMPLEX and trying one more match
284 , RS_BRCPLX_LONG // BRACE_COMPLEX and trying longest match
285 , RS_BRCPLX_SHORT // BRACE_COMPLEX and trying shortest match
286 , RS_NOMATCH // NOMATCH
287 , RS_BEHIND1 // BEHIND / NOBEHIND matching rest
288 , RS_BEHIND2 // BEHIND / NOBEHIND matching behind part
289 , RS_STAR_LONG // STAR/PLUS/BRACE_SIMPLE longest match
290 , RS_STAR_SHORT // STAR/PLUS/BRACE_SIMPLE shortest match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200291} regstate_T;
292
293/*
294 * Structure used to save the current input state, when it needs to be
295 * restored after trying a match. Used by reg_save() and reg_restore().
296 * Also stores the length of "backpos".
297 */
298typedef struct
299{
300 union
301 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200302 char_u *ptr; // rex.input pointer, for single-line regexp
303 lpos_T pos; // rex.input pos, for multi-line regexp
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200304 } rs_u;
305 int rs_len;
306} regsave_T;
307
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200308// struct to save start/end pointer/position in for \(\)
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200309typedef struct
310{
311 union
312 {
313 char_u *ptr;
314 lpos_T pos;
315 } se_u;
316} save_se_T;
317
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200318// used for BEHIND and NOBEHIND matching
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200319typedef struct regbehind_S
320{
321 regsave_T save_after;
322 regsave_T save_behind;
323 int save_need_clear_subexpr;
324 save_se_T save_start[NSUBEXP];
325 save_se_T save_end[NSUBEXP];
326} regbehind_T;
327
328/*
329 * When there are alternatives a regstate_T is put on the regstack to remember
330 * what we are doing.
331 * Before it may be another type of item, depending on rs_state, to remember
332 * more things.
333 */
334typedef struct regitem_S
335{
336 regstate_T rs_state; // what we are doing, one of RS_ above
337 short rs_no; // submatch nr or BEHIND/NOBEHIND
338 char_u *rs_scan; // current node in program
339 union
340 {
341 save_se_T sesave;
342 regsave_T regsave;
343 } rs_un; // room for saving rex.input
344} regitem_T;
345
346
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200347// used for STAR, PLUS and BRACE_SIMPLE matching
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200348typedef struct regstar_S
349{
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200350 int nextb; // next byte
351 int nextb_ic; // next byte reverse case
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200352 long count;
353 long minval;
354 long maxval;
355} regstar_T;
356
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200357// used to store input position when a BACK was encountered, so that we now if
358// we made any progress since the last time.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200359typedef struct backpos_S
360{
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200361 char_u *bp_scan; // "scan" where BACK was encountered
362 regsave_T bp_pos; // last input position
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200363} backpos_T;
364
365/*
366 * "regstack" and "backpos" are used by regmatch(). They are kept over calls
367 * to avoid invoking malloc() and free() often.
368 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
369 * or regbehind_T.
370 * "backpos_T" is a table with backpos_T for BACK
371 */
372static garray_T regstack = {0, 0, 0, 0, NULL};
373static garray_T backpos = {0, 0, 0, 0, NULL};
374
375static regsave_T behind_pos;
376
377/*
378 * Both for regstack and backpos tables we use the following strategy of
379 * allocation (to reduce malloc/free calls):
380 * - Initial size is fairly small.
381 * - When needed, the tables are grown bigger (8 times at first, double after
382 * that).
383 * - After executing the match we free the memory only if the array has grown.
384 * Thus the memory is kept allocated when it's at the initial size.
385 * This makes it fast while not keeping a lot of memory allocated.
386 * A three times speed increase was observed when using many simple patterns.
387 */
388#define REGSTACK_INITIAL 2048
389#define BACKPOS_INITIAL 64
390
391/*
392 * Opcode notes:
393 *
394 * BRANCH The set of branches constituting a single choice are hooked
395 * together with their "next" pointers, since precedence prevents
396 * anything being concatenated to any individual branch. The
397 * "next" pointer of the last BRANCH in a choice points to the
398 * thing following the whole choice. This is also where the
399 * final "next" pointer of each individual branch points; each
400 * branch starts with the operand node of a BRANCH node.
401 *
402 * BACK Normal "next" pointers all implicitly point forward; BACK
403 * exists to make loop structures possible.
404 *
405 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular
406 * BRANCH structures using BACK. Simple cases (one character
407 * per match) are implemented with STAR and PLUS for speed
408 * and to minimize recursive plunges.
409 *
410 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
411 * node, and defines the min and max limits to be used for that
412 * node.
413 *
414 * MOPEN,MCLOSE ...are numbered at compile time.
415 * ZOPEN,ZCLOSE ...ditto
416 */
417
418/*
419 * A node is one char of opcode followed by two chars of "next" pointer.
420 * "Next" pointers are stored as two 8-bit bytes, high order first. The
421 * value is a positive offset from the opcode of the node containing it.
422 * An operand, if any, simply follows the node. (Note that much of the
423 * code generation knows about this implicit relationship.)
424 *
425 * Using two bytes for the "next" pointer is vast overkill for most things,
426 * but allows patterns to get big without disasters.
427 */
428#define OP(p) ((int)*(p))
429#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
430#define OPERAND(p) ((p) + 3)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200431// Obtain an operand that was stored as four bytes, MSB first.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200432#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
433 + ((long)(p)[5] << 8) + (long)(p)[6])
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200434// Obtain a second operand stored as four bytes.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200435#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200436// Obtain a second single-byte operand stored after a four bytes operand.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200437#define OPERAND_CMP(p) (p)[7]
438
439static char_u *reg(int paren, int *flagp);
440
441#ifdef BT_REGEXP_DUMP
442static void regdump(char_u *, bt_regprog_T *);
443#endif
444
445static int re_num_cmp(long_u val, char_u *scan);
446
447#ifdef DEBUG
448static char_u *regprop(char_u *);
449
450static int regnarrate = 0;
451#endif
452
453
454/*
455 * Setup to parse the regexp. Used once to get the length and once to do it.
456 */
457 static void
458regcomp_start(
459 char_u *expr,
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200460 int re_flags) // see vim_regcomp()
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200461{
462 initchr(expr);
463 if (re_flags & RE_MAGIC)
464 reg_magic = MAGIC_ON;
465 else
466 reg_magic = MAGIC_OFF;
467 reg_string = (re_flags & RE_STRING);
468 reg_strict = (re_flags & RE_STRICT);
469 get_cpo_flags();
470
471 num_complex_braces = 0;
472 regnpar = 1;
Bram Moolenaara80faa82020-04-12 19:37:17 +0200473 CLEAR_FIELD(had_endbrace);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200474#ifdef FEAT_SYN_HL
475 regnzpar = 1;
476 re_has_z = 0;
477#endif
478 regsize = 0L;
479 reg_toolong = FALSE;
480 regflags = 0;
481#if defined(FEAT_SYN_HL) || defined(PROTO)
482 had_eol = FALSE;
483#endif
484}
485
486/*
487 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
488 * character "c".
489 */
490 static int
491use_multibytecode(int c)
492{
493 return has_mbyte && (*mb_char2len)(c) > 1
494 && (re_multi_type(peekchr()) != NOT_MULTI
495 || (enc_utf8 && utf_iscomposing(c)));
496}
497
498/*
499 * Emit (if appropriate) a byte of code
500 */
501 static void
502regc(int b)
503{
504 if (regcode == JUST_CALC_SIZE)
505 regsize++;
506 else
507 *regcode++ = b;
508}
509
510/*
511 * Emit (if appropriate) a multi-byte character of code
512 */
513 static void
514regmbc(int c)
515{
516 if (!has_mbyte && c > 0xff)
517 return;
518 if (regcode == JUST_CALC_SIZE)
519 regsize += (*mb_char2len)(c);
520 else
521 regcode += (*mb_char2bytes)(c, regcode);
522}
523
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200524
525/*
526 * Produce the bytes for equivalence class "c".
527 * Currently only handles latin1, latin9 and utf-8.
528 * NOTE: When changing this function, also change nfa_emit_equi_class()
529 */
530 static void
531reg_equi_class(int c)
532{
533 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
534 || STRCMP(p_enc, "iso-8859-15") == 0)
535 {
536#ifdef EBCDIC
537 int i;
538
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200539 // This might be slower than switch/case below.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200540 for (i = 0; i < 16; i++)
541 {
542 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
543 {
544 char *p = EQUIVAL_CLASS_C[i];
545
546 while (*p != 0)
547 regmbc(*p++);
548 return;
549 }
550 }
551#else
552 switch (c)
553 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +0200554 // Do not use '\300' style, it results in a negative number.
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200555 case 'A': case 0xc0: case 0xc1: case 0xc2: case 0xc3: case 0xc4:
556 case 0xc5: case 0x100: case 0x102: case 0x104: case 0x1cd:
557 case 0x1de: case 0x1e0: case 0x1fa: case 0x202: case 0x226:
558 case 0x23a: case 0x1e00: case 0x1ea0: case 0x1ea2: case 0x1ea4:
559 case 0x1ea6: case 0x1ea8: case 0x1eaa: case 0x1eac: case 0x1eae:
560 case 0x1eb0: case 0x1eb2: case 0x1eb4: case 0x1eb6:
561 regmbc('A'); regmbc(0xc0); regmbc(0xc1); regmbc(0xc2);
562 regmbc(0xc3); regmbc(0xc4); regmbc(0xc5);
563 regmbc(0x100); regmbc(0x102); regmbc(0x104);
564 regmbc(0x1cd); regmbc(0x1de); regmbc(0x1e0);
565 regmbc(0x1fa); regmbc(0x202); regmbc(0x226);
566 regmbc(0x23a); regmbc(0x1e00); regmbc(0x1ea0);
567 regmbc(0x1ea2); regmbc(0x1ea4); regmbc(0x1ea6);
568 regmbc(0x1ea8); regmbc(0x1eaa); regmbc(0x1eac);
569 regmbc(0x1eae); regmbc(0x1eb0); regmbc(0x1eb2);
570 regmbc(0x1eb4); regmbc(0x1eb6);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200571 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200572 case 'B': case 0x181: case 0x243: case 0x1e02:
573 case 0x1e04: case 0x1e06:
574 regmbc('B');
575 regmbc(0x181); regmbc(0x243); regmbc(0x1e02);
576 regmbc(0x1e04); regmbc(0x1e06);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200577 return;
578 case 'C': case 0xc7:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200579 case 0x106: case 0x108: case 0x10a: case 0x10c: case 0x187:
580 case 0x23b: case 0x1e08: case 0xa792:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200581 regmbc('C'); regmbc(0xc7);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200582 regmbc(0x106); regmbc(0x108); regmbc(0x10a);
583 regmbc(0x10c); regmbc(0x187); regmbc(0x23b);
584 regmbc(0x1e08); regmbc(0xa792);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200585 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200586 case 'D': case 0x10e: case 0x110: case 0x18a:
587 case 0x1e0a: case 0x1e0c: case 0x1e0e: case 0x1e10:
588 case 0x1e12:
589 regmbc('D'); regmbc(0x10e); regmbc(0x110);
590 regmbc(0x18a); regmbc(0x1e0a); regmbc(0x1e0c);
591 regmbc(0x1e0e); regmbc(0x1e10); regmbc(0x1e12);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200592 return;
593 case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200594 case 0x112: case 0x114: case 0x116: case 0x118: case 0x11a:
595 case 0x204: case 0x206: case 0x228: case 0x246: case 0x1e14:
596 case 0x1e16: case 0x1e18: case 0x1e1a: case 0x1e1c:
597 case 0x1eb8: case 0x1eba: case 0x1ebc: case 0x1ebe:
598 case 0x1ec0: case 0x1ec2: case 0x1ec4: case 0x1ec6:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200599 regmbc('E'); regmbc(0xc8); regmbc(0xc9);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200600 regmbc(0xca); regmbc(0xcb); regmbc(0x112);
601 regmbc(0x114); regmbc(0x116); regmbc(0x118);
602 regmbc(0x11a); regmbc(0x204); regmbc(0x206);
603 regmbc(0x228); regmbc(0x246); regmbc(0x1e14);
604 regmbc(0x1e16); regmbc(0x1e18); regmbc(0x1e1a);
605 regmbc(0x1e1c); regmbc(0x1eb8); regmbc(0x1eba);
606 regmbc(0x1ebc); regmbc(0x1ebe); regmbc(0x1ec0);
607 regmbc(0x1ec2); regmbc(0x1ec4); regmbc(0x1ec6);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200608 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200609 case 'F': case 0x191: case 0x1e1e: case 0xa798:
610 regmbc('F'); regmbc(0x191); regmbc(0x1e1e);
611 regmbc(0xa798);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200612 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200613 case 'G': case 0x11c: case 0x11e: case 0x120:
614 case 0x122: case 0x193: case 0x1e4: case 0x1e6:
615 case 0x1f4: case 0x1e20: case 0xa7a0:
616 regmbc('G'); regmbc(0x11c); regmbc(0x11e);
617 regmbc(0x120); regmbc(0x122); regmbc(0x193);
618 regmbc(0x1e4); regmbc(0x1e6); regmbc(0x1f4);
619 regmbc(0x1e20); regmbc(0xa7a0);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200620 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200621 case 'H': case 0x124: case 0x126: case 0x21e:
622 case 0x1e22: case 0x1e24: case 0x1e26:
623 case 0x1e28: case 0x1e2a: case 0x2c67:
624 regmbc('H'); regmbc(0x124); regmbc(0x126);
625 regmbc(0x21e); regmbc(0x1e22); regmbc(0x1e24);
626 regmbc(0x1e26); regmbc(0x1e28); regmbc(0x1e2a);
627 regmbc(0x2c67);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200628 return;
629 case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200630 case 0x128: case 0x12a: case 0x12c: case 0x12e:
631 case 0x130: case 0x197: case 0x1cf: case 0x208:
632 case 0x20a: case 0x1e2c: case 0x1e2e: case 0x1ec8:
633 case 0x1eca:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200634 regmbc('I'); regmbc(0xcc); regmbc(0xcd);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200635 regmbc(0xce); regmbc(0xcf); regmbc(0x128);
636 regmbc(0x12a); regmbc(0x12c); regmbc(0x12e);
637 regmbc(0x130); regmbc(0x197); regmbc(0x1cf);
638 regmbc(0x208); regmbc(0x20a); regmbc(0x1e2c);
639 regmbc(0x1e2e); regmbc(0x1ec8); regmbc(0x1eca);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200640 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200641 case 'J': case 0x134: case 0x248:
642 regmbc('J'); regmbc(0x134); regmbc(0x248);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200643 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200644 case 'K': case 0x136: case 0x198: case 0x1e8: case 0x1e30:
645 case 0x1e32: case 0x1e34: case 0x2c69: case 0xa740:
646 regmbc('K'); regmbc(0x136); regmbc(0x198);
647 regmbc(0x1e8); regmbc(0x1e30); regmbc(0x1e32);
648 regmbc(0x1e34); regmbc(0x2c69); regmbc(0xa740);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200649 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200650 case 'L': case 0x139: case 0x13b: case 0x13d: case 0x13f:
651 case 0x141: case 0x23d: case 0x1e36: case 0x1e38:
652 case 0x1e3a: case 0x1e3c: case 0x2c60:
653 regmbc('L'); regmbc(0x139); regmbc(0x13b);
654 regmbc(0x13d); regmbc(0x13f); regmbc(0x141);
655 regmbc(0x23d); regmbc(0x1e36); regmbc(0x1e38);
656 regmbc(0x1e3a); regmbc(0x1e3c); regmbc(0x2c60);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200657 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200658 case 'M': case 0x1e3e: case 0x1e40: case 0x1e42:
659 regmbc('M'); regmbc(0x1e3e); regmbc(0x1e40);
660 regmbc(0x1e42);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200661 return;
662 case 'N': case 0xd1:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200663 case 0x143: case 0x145: case 0x147: case 0x1f8:
664 case 0x1e44: case 0x1e46: case 0x1e48: case 0x1e4a:
665 case 0xa7a4:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200666 regmbc('N'); regmbc(0xd1);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200667 regmbc(0x143); regmbc(0x145); regmbc(0x147);
668 regmbc(0x1f8); regmbc(0x1e44); regmbc(0x1e46);
669 regmbc(0x1e48); regmbc(0x1e4a); regmbc(0xa7a4);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200670 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200671 case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5: case 0xd6:
672 case 0xd8: case 0x14c: case 0x14e: case 0x150: case 0x19f:
673 case 0x1a0: case 0x1d1: case 0x1ea: case 0x1ec: case 0x1fe:
674 case 0x20c: case 0x20e: case 0x22a: case 0x22c: case 0x22e:
675 case 0x230: case 0x1e4c: case 0x1e4e: case 0x1e50: case 0x1e52:
676 case 0x1ecc: case 0x1ece: case 0x1ed0: case 0x1ed2: case 0x1ed4:
677 case 0x1ed6: case 0x1ed8: case 0x1eda: case 0x1edc: case 0x1ede:
678 case 0x1ee0: case 0x1ee2:
679 regmbc('O'); regmbc(0xd2); regmbc(0xd3); regmbc(0xd4);
680 regmbc(0xd5); regmbc(0xd6); regmbc(0xd8);
681 regmbc(0x14c); regmbc(0x14e); regmbc(0x150);
682 regmbc(0x19f); regmbc(0x1a0); regmbc(0x1d1);
683 regmbc(0x1ea); regmbc(0x1ec); regmbc(0x1fe);
684 regmbc(0x20c); regmbc(0x20e); regmbc(0x22a);
685 regmbc(0x22c); regmbc(0x22e); regmbc(0x230);
686 regmbc(0x1e4c); regmbc(0x1e4e); regmbc(0x1e50);
687 regmbc(0x1e52); regmbc(0x1ecc); regmbc(0x1ece);
688 regmbc(0x1ed0); regmbc(0x1ed2); regmbc(0x1ed4);
689 regmbc(0x1ed6); regmbc(0x1ed8); regmbc(0x1eda);
690 regmbc(0x1edc); regmbc(0x1ede); regmbc(0x1ee0);
691 regmbc(0x1ee2);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200692 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200693 case 'P': case 0x1a4: case 0x1e54: case 0x1e56: case 0x2c63:
694 regmbc('P'); regmbc(0x1a4); regmbc(0x1e54);
695 regmbc(0x1e56); regmbc(0x2c63);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200696 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200697 case 'Q': case 0x24a:
698 regmbc('Q'); regmbc(0x24a);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200699 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200700 case 'R': case 0x154: case 0x156: case 0x158: case 0x210:
701 case 0x212: case 0x24c: case 0x1e58: case 0x1e5a:
702 case 0x1e5c: case 0x1e5e: case 0x2c64: case 0xa7a6:
703 regmbc('R'); regmbc(0x154); regmbc(0x156);
704 regmbc(0x210); regmbc(0x212); regmbc(0x158);
705 regmbc(0x24c); regmbc(0x1e58); regmbc(0x1e5a);
706 regmbc(0x1e5c); regmbc(0x1e5e); regmbc(0x2c64);
707 regmbc(0xa7a6);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200708 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200709 case 'S': case 0x15a: case 0x15c: case 0x15e: case 0x160:
710 case 0x218: case 0x1e60: case 0x1e62: case 0x1e64:
711 case 0x1e66: case 0x1e68: case 0x2c7e: case 0xa7a8:
712 regmbc('S'); regmbc(0x15a); regmbc(0x15c);
713 regmbc(0x15e); regmbc(0x160); regmbc(0x218);
714 regmbc(0x1e60); regmbc(0x1e62); regmbc(0x1e64);
715 regmbc(0x1e66); regmbc(0x1e68); regmbc(0x2c7e);
716 regmbc(0xa7a8);
717 return;
718 case 'T': case 0x162: case 0x164: case 0x166: case 0x1ac:
719 case 0x1ae: case 0x21a: case 0x23e: case 0x1e6a: case 0x1e6c:
720 case 0x1e6e: case 0x1e70:
721 regmbc('T'); regmbc(0x162); regmbc(0x164);
722 regmbc(0x166); regmbc(0x1ac); regmbc(0x23e);
723 regmbc(0x1ae); regmbc(0x21a); regmbc(0x1e6a);
724 regmbc(0x1e6c); regmbc(0x1e6e); regmbc(0x1e70);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200725 return;
726 case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200727 case 0x168: case 0x16a: case 0x16c: case 0x16e:
728 case 0x170: case 0x172: case 0x1af: case 0x1d3:
729 case 0x1d5: case 0x1d7: case 0x1d9: case 0x1db:
730 case 0x214: case 0x216: case 0x244: case 0x1e72:
731 case 0x1e74: case 0x1e76: case 0x1e78: case 0x1e7a:
732 case 0x1ee4: case 0x1ee6: case 0x1ee8: case 0x1eea:
733 case 0x1eec: case 0x1eee: case 0x1ef0:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200734 regmbc('U'); regmbc(0xd9); regmbc(0xda);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200735 regmbc(0xdb); regmbc(0xdc); regmbc(0x168);
736 regmbc(0x16a); regmbc(0x16c); regmbc(0x16e);
737 regmbc(0x170); regmbc(0x172); regmbc(0x1af);
738 regmbc(0x1d3); regmbc(0x1d5); regmbc(0x1d7);
739 regmbc(0x1d9); regmbc(0x1db); regmbc(0x214);
740 regmbc(0x216); regmbc(0x244); regmbc(0x1e72);
741 regmbc(0x1e74); regmbc(0x1e76); regmbc(0x1e78);
742 regmbc(0x1e7a); regmbc(0x1ee4); regmbc(0x1ee6);
743 regmbc(0x1ee8); regmbc(0x1eea); regmbc(0x1eec);
744 regmbc(0x1eee); regmbc(0x1ef0);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200745 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200746 case 'V': case 0x1b2: case 0x1e7c: case 0x1e7e:
747 regmbc('V'); regmbc(0x1b2); regmbc(0x1e7c);
748 regmbc(0x1e7e);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200749 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200750 case 'W': case 0x174: case 0x1e80: case 0x1e82:
751 case 0x1e84: case 0x1e86: case 0x1e88:
752 regmbc('W'); regmbc(0x174); regmbc(0x1e80);
753 regmbc(0x1e82); regmbc(0x1e84); regmbc(0x1e86);
754 regmbc(0x1e88);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200755 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200756 case 'X': case 0x1e8a: case 0x1e8c:
757 regmbc('X'); regmbc(0x1e8a); regmbc(0x1e8c);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200758 return;
759 case 'Y': case 0xdd:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200760 case 0x176: case 0x178: case 0x1b3: case 0x232: case 0x24e:
761 case 0x1e8e: case 0x1ef2: case 0x1ef6: case 0x1ef4: case 0x1ef8:
762 regmbc('Y'); regmbc(0xdd); regmbc(0x176);
763 regmbc(0x178); regmbc(0x1b3); regmbc(0x232);
764 regmbc(0x24e); regmbc(0x1e8e); regmbc(0x1ef2);
765 regmbc(0x1ef4); regmbc(0x1ef6); regmbc(0x1ef8);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200766 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200767 case 'Z': case 0x179: case 0x17b: case 0x17d: case 0x1b5:
768 case 0x1e90: case 0x1e92: case 0x1e94: case 0x2c6b:
769 regmbc('Z'); regmbc(0x179); regmbc(0x17b);
770 regmbc(0x17d); regmbc(0x1b5); regmbc(0x1e90);
771 regmbc(0x1e92); regmbc(0x1e94); regmbc(0x2c6b);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200772 return;
773 case 'a': case 0xe0: case 0xe1: case 0xe2:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200774 case 0xe3: case 0xe4: case 0xe5: case 0x101: case 0x103:
775 case 0x105: case 0x1ce: case 0x1df: case 0x1e1: case 0x1fb:
776 case 0x201: case 0x203: case 0x227: case 0x1d8f: case 0x1e01:
777 case 0x1e9a: case 0x1ea1: case 0x1ea3: case 0x1ea5:
778 case 0x1ea7: case 0x1ea9: case 0x1eab: case 0x1ead:
779 case 0x1eaf: case 0x1eb1: case 0x1eb3: case 0x1eb5:
780 case 0x1eb7: case 0x2c65:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200781 regmbc('a'); regmbc(0xe0); regmbc(0xe1);
782 regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200783 regmbc(0xe5); regmbc(0x101); regmbc(0x103);
784 regmbc(0x105); regmbc(0x1ce); regmbc(0x1df);
785 regmbc(0x1e1); regmbc(0x1fb); regmbc(0x201);
786 regmbc(0x203); regmbc(0x227); regmbc(0x1d8f);
787 regmbc(0x1e01); regmbc(0x1e9a); regmbc(0x1ea1);
788 regmbc(0x1ea3); regmbc(0x1ea5); regmbc(0x1ea7);
789 regmbc(0x1ea9); regmbc(0x1eab); regmbc(0x1ead);
790 regmbc(0x1eaf); regmbc(0x1eb1); regmbc(0x1eb3);
791 regmbc(0x1eb5); regmbc(0x1eb7); regmbc(0x2c65);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200792 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200793 case 'b': case 0x180: case 0x253: case 0x1d6c: case 0x1d80:
794 case 0x1e03: case 0x1e05: case 0x1e07:
795 regmbc('b');
796 regmbc(0x180); regmbc(0x253); regmbc(0x1d6c);
797 regmbc(0x1d80); regmbc(0x1e03); regmbc(0x1e05);
798 regmbc(0x1e07);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200799 return;
800 case 'c': case 0xe7:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200801 case 0x107: case 0x109: case 0x10b: case 0x10d: case 0x188:
802 case 0x23c: case 0x1e09: case 0xa793: case 0xa794:
803 regmbc('c'); regmbc(0xe7); regmbc(0x107);
804 regmbc(0x109); regmbc(0x10b); regmbc(0x10d);
805 regmbc(0x188); regmbc(0x23c); regmbc(0x1e09);
806 regmbc(0xa793); regmbc(0xa794);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200807 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200808 case 'd': case 0x10f: case 0x111: case 0x257: case 0x1d6d:
809 case 0x1d81: case 0x1d91: case 0x1e0b: case 0x1e0d:
810 case 0x1e0f: case 0x1e11: case 0x1e13:
811 regmbc('d'); regmbc(0x10f); regmbc(0x111);
812 regmbc(0x257); regmbc(0x1d6d); regmbc(0x1d81);
813 regmbc(0x1d91); regmbc(0x1e0b); regmbc(0x1e0d);
814 regmbc(0x1e0f); regmbc(0x1e11); regmbc(0x1e13);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200815 return;
816 case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200817 case 0x113: case 0x115: case 0x117: case 0x119:
818 case 0x11b: case 0x205: case 0x207: case 0x229:
819 case 0x247: case 0x1d92: case 0x1e15: case 0x1e17:
820 case 0x1e19: case 0x1e1b: case 0x1eb9: case 0x1ebb:
821 case 0x1e1d: case 0x1ebd: case 0x1ebf: case 0x1ec1:
822 case 0x1ec3: case 0x1ec5: case 0x1ec7:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200823 regmbc('e'); regmbc(0xe8); regmbc(0xe9);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200824 regmbc(0xea); regmbc(0xeb); regmbc(0x113);
825 regmbc(0x115); regmbc(0x117); regmbc(0x119);
826 regmbc(0x11b); regmbc(0x205); regmbc(0x207);
827 regmbc(0x229); regmbc(0x247); regmbc(0x1d92);
828 regmbc(0x1e15); regmbc(0x1e17); regmbc(0x1e19);
829 regmbc(0x1e1b); regmbc(0x1e1d); regmbc(0x1eb9);
830 regmbc(0x1ebb); regmbc(0x1ebd); regmbc(0x1ebf);
831 regmbc(0x1ec1); regmbc(0x1ec3); regmbc(0x1ec5);
832 regmbc(0x1ec7);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200833 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200834 case 'f': case 0x192: case 0x1d6e: case 0x1d82:
835 case 0x1e1f: case 0xa799:
836 regmbc('f'); regmbc(0x192); regmbc(0x1d6e);
837 regmbc(0x1d82); regmbc(0x1e1f); regmbc(0xa799);
838 return;
839 case 'g': case 0x11d: case 0x11f: case 0x121: case 0x123:
840 case 0x1e5: case 0x1e7: case 0x260: case 0x1f5: case 0x1d83:
841 case 0x1e21: case 0xa7a1:
842 regmbc('g'); regmbc(0x11d); regmbc(0x11f);
843 regmbc(0x121); regmbc(0x123); regmbc(0x1e5);
844 regmbc(0x1e7); regmbc(0x1f5); regmbc(0x260);
845 regmbc(0x1d83); regmbc(0x1e21); regmbc(0xa7a1);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200846 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200847 case 'h': case 0x125: case 0x127: case 0x21f: case 0x1e23:
848 case 0x1e25: case 0x1e27: case 0x1e29: case 0x1e2b:
849 case 0x1e96: case 0x2c68: case 0xa795:
850 regmbc('h'); regmbc(0x125); regmbc(0x127);
851 regmbc(0x21f); regmbc(0x1e23); regmbc(0x1e25);
852 regmbc(0x1e27); regmbc(0x1e29); regmbc(0x1e2b);
853 regmbc(0x1e96); regmbc(0x2c68); regmbc(0xa795);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200854 return;
855 case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200856 case 0x129: case 0x12b: case 0x12d: case 0x12f:
857 case 0x1d0: case 0x209: case 0x20b: case 0x268:
858 case 0x1d96: case 0x1e2d: case 0x1e2f: case 0x1ec9:
859 case 0x1ecb:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200860 regmbc('i'); regmbc(0xec); regmbc(0xed);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200861 regmbc(0xee); regmbc(0xef); regmbc(0x129);
862 regmbc(0x12b); regmbc(0x12d); regmbc(0x12f);
863 regmbc(0x1d0); regmbc(0x209); regmbc(0x20b);
864 regmbc(0x268); regmbc(0x1d96); regmbc(0x1e2d);
865 regmbc(0x1e2f); regmbc(0x1ec9); regmbc(0x1ecb);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200866 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200867 case 'j': case 0x135: case 0x1f0: case 0x249:
868 regmbc('j'); regmbc(0x135); regmbc(0x1f0);
869 regmbc(0x249);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200870 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200871 case 'k': case 0x137: case 0x199: case 0x1e9:
872 case 0x1d84: case 0x1e31: case 0x1e33: case 0x1e35:
873 case 0x2c6a: case 0xa741:
874 regmbc('k'); regmbc(0x137); regmbc(0x199);
875 regmbc(0x1e9); regmbc(0x1d84); regmbc(0x1e31);
876 regmbc(0x1e33); regmbc(0x1e35); regmbc(0x2c6a);
877 regmbc(0xa741);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200878 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200879 case 'l': case 0x13a: case 0x13c: case 0x13e:
880 case 0x140: case 0x142: case 0x19a: case 0x1e37:
881 case 0x1e39: case 0x1e3b: case 0x1e3d: case 0x2c61:
882 regmbc('l'); regmbc(0x13a); regmbc(0x13c);
883 regmbc(0x13e); regmbc(0x140); regmbc(0x142);
884 regmbc(0x19a); regmbc(0x1e37); regmbc(0x1e39);
885 regmbc(0x1e3b); regmbc(0x1e3d); regmbc(0x2c61);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200886 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200887 case 'm': case 0x1d6f: case 0x1e3f: case 0x1e41: case 0x1e43:
888 regmbc('m'); regmbc(0x1d6f); regmbc(0x1e3f);
889 regmbc(0x1e41); regmbc(0x1e43);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200890 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200891 case 'n': case 0xf1: case 0x144: case 0x146: case 0x148:
892 case 0x149: case 0x1f9: case 0x1d70: case 0x1d87:
893 case 0x1e45: case 0x1e47: case 0x1e49: case 0x1e4b:
894 case 0xa7a5:
895 regmbc('n'); regmbc(0xf1); regmbc(0x144);
896 regmbc(0x146); regmbc(0x148); regmbc(0x149);
897 regmbc(0x1f9); regmbc(0x1d70); regmbc(0x1d87);
898 regmbc(0x1e45); regmbc(0x1e47); regmbc(0x1e49);
899 regmbc(0x1e4b); regmbc(0xa7a5);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200900 return;
901 case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200902 case 0xf6: case 0xf8: case 0x14d: case 0x14f: case 0x151:
903 case 0x1a1: case 0x1d2: case 0x1eb: case 0x1ed: case 0x1ff:
904 case 0x20d: case 0x20f: case 0x22b: case 0x22d: case 0x22f:
905 case 0x231: case 0x275: case 0x1e4d: case 0x1e4f:
906 case 0x1e51: case 0x1e53: case 0x1ecd: case 0x1ecf:
907 case 0x1ed1: case 0x1ed3: case 0x1ed5: case 0x1ed7:
908 case 0x1ed9: case 0x1edb: case 0x1edd: case 0x1edf:
909 case 0x1ee1: case 0x1ee3:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200910 regmbc('o'); regmbc(0xf2); regmbc(0xf3);
911 regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200912 regmbc(0xf8); regmbc(0x14d); regmbc(0x14f);
913 regmbc(0x151); regmbc(0x1a1); regmbc(0x1d2);
914 regmbc(0x1eb); regmbc(0x1ed); regmbc(0x1ff);
915 regmbc(0x20d); regmbc(0x20f); regmbc(0x22b);
916 regmbc(0x22d); regmbc(0x22f); regmbc(0x231);
917 regmbc(0x275); regmbc(0x1e4d); regmbc(0x1e4f);
918 regmbc(0x1e51); regmbc(0x1e53); regmbc(0x1ecd);
919 regmbc(0x1ecf); regmbc(0x1ed1); regmbc(0x1ed3);
920 regmbc(0x1ed5); regmbc(0x1ed7); regmbc(0x1ed9);
921 regmbc(0x1edb); regmbc(0x1edd); regmbc(0x1edf);
922 regmbc(0x1ee1); regmbc(0x1ee3);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200923 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200924 case 'p': case 0x1a5: case 0x1d71: case 0x1d88: case 0x1d7d:
925 case 0x1e55: case 0x1e57:
926 regmbc('p'); regmbc(0x1a5); regmbc(0x1d71);
927 regmbc(0x1d7d); regmbc(0x1d88); regmbc(0x1e55);
928 regmbc(0x1e57);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200929 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200930 case 'q': case 0x24b: case 0x2a0:
931 regmbc('q'); regmbc(0x24b); regmbc(0x2a0);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200932 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200933 case 'r': case 0x155: case 0x157: case 0x159: case 0x211:
934 case 0x213: case 0x24d: case 0x27d: case 0x1d72: case 0x1d73:
935 case 0x1d89: case 0x1e59: case 0x1e5b: case 0x1e5d: case 0x1e5f:
936 case 0xa7a7:
937 regmbc('r'); regmbc(0x155); regmbc(0x157);
938 regmbc(0x159); regmbc(0x211); regmbc(0x213);
939 regmbc(0x24d); regmbc(0x1d72); regmbc(0x1d73);
940 regmbc(0x1d89); regmbc(0x1e59); regmbc(0x27d);
941 regmbc(0x1e5b); regmbc(0x1e5d); regmbc(0x1e5f);
942 regmbc(0xa7a7);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200943 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200944 case 's': case 0x15b: case 0x15d: case 0x15f: case 0x161:
945 case 0x1e61: case 0x219: case 0x23f: case 0x1d74: case 0x1d8a:
946 case 0x1e63: case 0x1e65: case 0x1e67: case 0x1e69: case 0xa7a9:
947 regmbc('s'); regmbc(0x15b); regmbc(0x15d);
948 regmbc(0x15f); regmbc(0x161); regmbc(0x23f);
949 regmbc(0x219); regmbc(0x1d74); regmbc(0x1d8a);
950 regmbc(0x1e61); regmbc(0x1e63); regmbc(0x1e65);
951 regmbc(0x1e67); regmbc(0x1e69); regmbc(0xa7a9);
952 return;
953 case 't': case 0x163: case 0x165: case 0x167: case 0x1ab:
954 case 0x1ad: case 0x21b: case 0x288: case 0x1d75: case 0x1e6b:
955 case 0x1e6d: case 0x1e6f: case 0x1e71: case 0x1e97: case 0x2c66:
956 regmbc('t'); regmbc(0x163); regmbc(0x165);
957 regmbc(0x167); regmbc(0x1ab); regmbc(0x21b);
958 regmbc(0x1ad); regmbc(0x288); regmbc(0x1d75);
959 regmbc(0x1e6b); regmbc(0x1e6d); regmbc(0x1e6f);
960 regmbc(0x1e71); regmbc(0x1e97); regmbc(0x2c66);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200961 return;
962 case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200963 case 0x169: case 0x16b: case 0x16d: case 0x16f:
964 case 0x171: case 0x173: case 0x1b0: case 0x1d4:
965 case 0x1d6: case 0x1d8: case 0x1da: case 0x1dc:
966 case 0x215: case 0x217: case 0x289: case 0x1e73:
967 case 0x1d7e: case 0x1d99: case 0x1e75: case 0x1e77:
968 case 0x1e79: case 0x1e7b: case 0x1ee5: case 0x1ee7:
969 case 0x1ee9: case 0x1eeb: case 0x1eed: case 0x1eef:
970 case 0x1ef1:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200971 regmbc('u'); regmbc(0xf9); regmbc(0xfa);
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200972 regmbc(0xfb); regmbc(0xfc); regmbc(0x169);
973 regmbc(0x16b); regmbc(0x16d); regmbc(0x16f);
974 regmbc(0x171); regmbc(0x173); regmbc(0x1d6);
975 regmbc(0x1d8); regmbc(0x1da); regmbc(0x1dc);
976 regmbc(0x215); regmbc(0x217); regmbc(0x1b0);
977 regmbc(0x1d4); regmbc(0x289); regmbc(0x1d7e);
978 regmbc(0x1d99); regmbc(0x1e73); regmbc(0x1e75);
979 regmbc(0x1e77); regmbc(0x1e79); regmbc(0x1e7b);
980 regmbc(0x1ee5); regmbc(0x1ee7); regmbc(0x1ee9);
981 regmbc(0x1eeb); regmbc(0x1eed); regmbc(0x1eef);
982 regmbc(0x1ef1);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200983 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200984 case 'v': case 0x28b: case 0x1d8c: case 0x1e7d: case 0x1e7f:
985 regmbc('v'); regmbc(0x28b); regmbc(0x1d8c);
986 regmbc(0x1e7d); regmbc(0x1e7f);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200987 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200988 case 'w': case 0x175: case 0x1e81: case 0x1e83:
989 case 0x1e85: case 0x1e87: case 0x1e89: case 0x1e98:
990 regmbc('w'); regmbc(0x175); regmbc(0x1e81);
991 regmbc(0x1e83); regmbc(0x1e85); regmbc(0x1e87);
992 regmbc(0x1e89); regmbc(0x1e98);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200993 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200994 case 'x': case 0x1e8b: case 0x1e8d:
995 regmbc('x'); regmbc(0x1e8b); regmbc(0x1e8d);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +0200996 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200997 case 'y': case 0xfd: case 0xff: case 0x177: case 0x1b4:
998 case 0x233: case 0x24f: case 0x1e8f: case 0x1e99: case 0x1ef3:
999 case 0x1ef5: case 0x1ef7: case 0x1ef9:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001000 regmbc('y'); regmbc(0xfd); regmbc(0xff);
Bram Moolenaar0b94e292021-04-05 13:59:53 +02001001 regmbc(0x177); regmbc(0x1b4); regmbc(0x233);
1002 regmbc(0x24f); regmbc(0x1e8f); regmbc(0x1e99);
1003 regmbc(0x1ef3); regmbc(0x1ef5); regmbc(0x1ef7);
1004 regmbc(0x1ef9);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001005 return;
Bram Moolenaar0b94e292021-04-05 13:59:53 +02001006 case 'z': case 0x17a: case 0x17c: case 0x17e: case 0x1b6:
1007 case 0x1d76: case 0x1d8e: case 0x1e91: case 0x1e93:
1008 case 0x1e95: case 0x2c6c:
1009 regmbc('z'); regmbc(0x17a); regmbc(0x17c);
1010 regmbc(0x17e); regmbc(0x1b6); regmbc(0x1d76);
1011 regmbc(0x1d8e); regmbc(0x1e91); regmbc(0x1e93);
1012 regmbc(0x1e95); regmbc(0x2c6c);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001013 return;
1014 }
1015#endif
1016 }
1017 regmbc(c);
1018}
1019
1020/*
1021 * Emit a node.
1022 * Return pointer to generated code.
1023 */
1024 static char_u *
1025regnode(int op)
1026{
1027 char_u *ret;
1028
1029 ret = regcode;
1030 if (ret == JUST_CALC_SIZE)
1031 regsize += 3;
1032 else
1033 {
1034 *regcode++ = op;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001035 *regcode++ = NUL; // Null "next" pointer.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001036 *regcode++ = NUL;
1037 }
1038 return ret;
1039}
1040
1041/*
1042 * Write a long as four bytes at "p" and return pointer to the next char.
1043 */
1044 static char_u *
1045re_put_long(char_u *p, long_u val)
1046{
1047 *p++ = (char_u) ((val >> 24) & 0377);
1048 *p++ = (char_u) ((val >> 16) & 0377);
1049 *p++ = (char_u) ((val >> 8) & 0377);
1050 *p++ = (char_u) (val & 0377);
1051 return p;
1052}
1053
1054/*
1055 * regnext - dig the "next" pointer out of a node
1056 * Returns NULL when calculating size, when there is no next item and when
1057 * there is an error.
1058 */
1059 static char_u *
1060regnext(char_u *p)
1061{
1062 int offset;
1063
1064 if (p == JUST_CALC_SIZE || reg_toolong)
1065 return NULL;
1066
1067 offset = NEXT(p);
1068 if (offset == 0)
1069 return NULL;
1070
1071 if (OP(p) == BACK)
1072 return p - offset;
1073 else
1074 return p + offset;
1075}
1076
1077/*
1078 * Set the next-pointer at the end of a node chain.
1079 */
1080 static void
1081regtail(char_u *p, char_u *val)
1082{
1083 char_u *scan;
1084 char_u *temp;
1085 int offset;
1086
1087 if (p == JUST_CALC_SIZE)
1088 return;
1089
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001090 // Find last node.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001091 scan = p;
1092 for (;;)
1093 {
1094 temp = regnext(scan);
1095 if (temp == NULL)
1096 break;
1097 scan = temp;
1098 }
1099
1100 if (OP(scan) == BACK)
1101 offset = (int)(scan - val);
1102 else
1103 offset = (int)(val - scan);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001104 // When the offset uses more than 16 bits it can no longer fit in the two
1105 // bytes available. Use a global flag to avoid having to check return
1106 // values in too many places.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001107 if (offset > 0xffff)
1108 reg_toolong = TRUE;
1109 else
1110 {
1111 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
1112 *(scan + 2) = (char_u) (offset & 0377);
1113 }
1114}
1115
1116/*
1117 * Like regtail, on item after a BRANCH; nop if none.
1118 */
1119 static void
1120regoptail(char_u *p, char_u *val)
1121{
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001122 // When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless"
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001123 if (p == NULL || p == JUST_CALC_SIZE
1124 || (OP(p) != BRANCH
1125 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
1126 return;
1127 regtail(OPERAND(p), val);
1128}
1129
1130/*
1131 * Insert an operator in front of already-emitted operand
1132 *
1133 * Means relocating the operand.
1134 */
1135 static void
1136reginsert(int op, char_u *opnd)
1137{
1138 char_u *src;
1139 char_u *dst;
1140 char_u *place;
1141
1142 if (regcode == JUST_CALC_SIZE)
1143 {
1144 regsize += 3;
1145 return;
1146 }
1147 src = regcode;
1148 regcode += 3;
1149 dst = regcode;
1150 while (src > opnd)
1151 *--dst = *--src;
1152
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001153 place = opnd; // Op node, where operand used to be.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001154 *place++ = op;
1155 *place++ = NUL;
1156 *place = NUL;
1157}
1158
1159/*
1160 * Insert an operator in front of already-emitted operand.
1161 * Add a number to the operator.
1162 */
1163 static void
1164reginsert_nr(int op, long val, char_u *opnd)
1165{
1166 char_u *src;
1167 char_u *dst;
1168 char_u *place;
1169
1170 if (regcode == JUST_CALC_SIZE)
1171 {
1172 regsize += 7;
1173 return;
1174 }
1175 src = regcode;
1176 regcode += 7;
1177 dst = regcode;
1178 while (src > opnd)
1179 *--dst = *--src;
1180
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001181 place = opnd; // Op node, where operand used to be.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001182 *place++ = op;
1183 *place++ = NUL;
1184 *place++ = NUL;
1185 re_put_long(place, (long_u)val);
1186}
1187
1188/*
1189 * Insert an operator in front of already-emitted operand.
1190 * The operator has the given limit values as operands. Also set next pointer.
1191 *
1192 * Means relocating the operand.
1193 */
1194 static void
1195reginsert_limits(
1196 int op,
1197 long minval,
1198 long maxval,
1199 char_u *opnd)
1200{
1201 char_u *src;
1202 char_u *dst;
1203 char_u *place;
1204
1205 if (regcode == JUST_CALC_SIZE)
1206 {
1207 regsize += 11;
1208 return;
1209 }
1210 src = regcode;
1211 regcode += 11;
1212 dst = regcode;
1213 while (src > opnd)
1214 *--dst = *--src;
1215
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001216 place = opnd; // Op node, where operand used to be.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001217 *place++ = op;
1218 *place++ = NUL;
1219 *place++ = NUL;
1220 place = re_put_long(place, (long_u)minval);
1221 place = re_put_long(place, (long_u)maxval);
1222 regtail(opnd, place);
1223}
1224
1225/*
1226 * Return TRUE if the back reference is legal. We must have seen the close
1227 * brace.
1228 * TODO: Should also check that we don't refer to something that is repeated
1229 * (+*=): what instance of the repetition should we match?
1230 */
1231 static int
1232seen_endbrace(int refnum)
1233{
1234 if (!had_endbrace[refnum])
1235 {
1236 char_u *p;
1237
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001238 // Trick: check if "@<=" or "@<!" follows, in which case
1239 // the \1 can appear before the referenced match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001240 for (p = regparse; *p != NUL; ++p)
1241 if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '='))
1242 break;
1243 if (*p == NUL)
1244 {
1245 emsg(_("E65: Illegal back reference"));
1246 rc_did_emsg = TRUE;
1247 return FALSE;
1248 }
1249 }
1250 return TRUE;
1251}
1252
1253/*
1254 * Parse the lowest level.
1255 *
1256 * Optimization: gobbles an entire sequence of ordinary characters so that
1257 * it can turn them into a single node, which is smaller to store and
1258 * faster to run. Don't do this when one_exactly is set.
1259 */
1260 static char_u *
1261regatom(int *flagp)
1262{
1263 char_u *ret;
1264 int flags;
1265 int c;
1266 char_u *p;
1267 int extra = 0;
1268 int save_prev_at_start = prev_at_start;
1269
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001270 *flagp = WORST; // Tentatively.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001271
1272 c = getchr();
1273 switch (c)
1274 {
1275 case Magic('^'):
1276 ret = regnode(BOL);
1277 break;
1278
1279 case Magic('$'):
1280 ret = regnode(EOL);
1281#if defined(FEAT_SYN_HL) || defined(PROTO)
1282 had_eol = TRUE;
1283#endif
1284 break;
1285
1286 case Magic('<'):
1287 ret = regnode(BOW);
1288 break;
1289
1290 case Magic('>'):
1291 ret = regnode(EOW);
1292 break;
1293
1294 case Magic('_'):
1295 c = no_Magic(getchr());
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001296 if (c == '^') // "\_^" is start-of-line
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001297 {
1298 ret = regnode(BOL);
1299 break;
1300 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001301 if (c == '$') // "\_$" is end-of-line
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001302 {
1303 ret = regnode(EOL);
1304#if defined(FEAT_SYN_HL) || defined(PROTO)
1305 had_eol = TRUE;
1306#endif
1307 break;
1308 }
1309
1310 extra = ADD_NL;
1311 *flagp |= HASNL;
1312
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001313 // "\_[" is character range plus newline
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001314 if (c == '[')
1315 goto collection;
1316
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001317 // "\_x" is character class plus newline
1318 // FALLTHROUGH
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001319
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001320 // Character classes.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001321 case Magic('.'):
1322 case Magic('i'):
1323 case Magic('I'):
1324 case Magic('k'):
1325 case Magic('K'):
1326 case Magic('f'):
1327 case Magic('F'):
1328 case Magic('p'):
1329 case Magic('P'):
1330 case Magic('s'):
1331 case Magic('S'):
1332 case Magic('d'):
1333 case Magic('D'):
1334 case Magic('x'):
1335 case Magic('X'):
1336 case Magic('o'):
1337 case Magic('O'):
1338 case Magic('w'):
1339 case Magic('W'):
1340 case Magic('h'):
1341 case Magic('H'):
1342 case Magic('a'):
1343 case Magic('A'):
1344 case Magic('l'):
1345 case Magic('L'):
1346 case Magic('u'):
1347 case Magic('U'):
1348 p = vim_strchr(classchars, no_Magic(c));
1349 if (p == NULL)
1350 EMSG_RET_NULL(_("E63: invalid use of \\_"));
1351
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001352 // When '.' is followed by a composing char ignore the dot, so that
1353 // the composing char is matched here.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001354 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1355 {
1356 c = getchr();
1357 goto do_multibyte;
1358 }
1359 ret = regnode(classcodes[p - classchars] + extra);
1360 *flagp |= HASWIDTH | SIMPLE;
1361 break;
1362
1363 case Magic('n'):
1364 if (reg_string)
1365 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001366 // In a string "\n" matches a newline character.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001367 ret = regnode(EXACTLY);
1368 regc(NL);
1369 regc(NUL);
1370 *flagp |= HASWIDTH | SIMPLE;
1371 }
1372 else
1373 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001374 // In buffer text "\n" matches the end of a line.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001375 ret = regnode(NEWL);
1376 *flagp |= HASWIDTH | HASNL;
1377 }
1378 break;
1379
1380 case Magic('('):
1381 if (one_exactly)
1382 EMSG_ONE_RET_NULL;
1383 ret = reg(REG_PAREN, &flags);
1384 if (ret == NULL)
1385 return NULL;
1386 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1387 break;
1388
1389 case NUL:
1390 case Magic('|'):
1391 case Magic('&'):
1392 case Magic(')'):
1393 if (one_exactly)
1394 EMSG_ONE_RET_NULL;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001395 IEMSG_RET_NULL(_(e_internal)); // Supposed to be caught earlier.
1396 // NOTREACHED
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001397
1398 case Magic('='):
1399 case Magic('?'):
1400 case Magic('+'):
1401 case Magic('@'):
1402 case Magic('{'):
1403 case Magic('*'):
1404 c = no_Magic(c);
1405 EMSG3_RET_NULL(_("E64: %s%c follows nothing"),
1406 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL), c);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001407 // NOTREACHED
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001408
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001409 case Magic('~'): // previous substitute pattern
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001410 if (reg_prev_sub != NULL)
1411 {
1412 char_u *lp;
1413
1414 ret = regnode(EXACTLY);
1415 lp = reg_prev_sub;
1416 while (*lp != NUL)
1417 regc(*lp++);
1418 regc(NUL);
1419 if (*reg_prev_sub != NUL)
1420 {
1421 *flagp |= HASWIDTH;
1422 if ((lp - reg_prev_sub) == 1)
1423 *flagp |= SIMPLE;
1424 }
1425 }
1426 else
1427 EMSG_RET_NULL(_(e_nopresub));
1428 break;
1429
1430 case Magic('1'):
1431 case Magic('2'):
1432 case Magic('3'):
1433 case Magic('4'):
1434 case Magic('5'):
1435 case Magic('6'):
1436 case Magic('7'):
1437 case Magic('8'):
1438 case Magic('9'):
1439 {
1440 int refnum;
1441
1442 refnum = c - Magic('0');
1443 if (!seen_endbrace(refnum))
1444 return NULL;
1445 ret = regnode(BACKREF + refnum);
1446 }
1447 break;
1448
1449 case Magic('z'):
1450 {
1451 c = no_Magic(getchr());
1452 switch (c)
1453 {
1454#ifdef FEAT_SYN_HL
1455 case '(': if ((reg_do_extmatch & REX_SET) == 0)
1456 EMSG_RET_NULL(_(e_z_not_allowed));
1457 if (one_exactly)
1458 EMSG_ONE_RET_NULL;
1459 ret = reg(REG_ZPAREN, &flags);
1460 if (ret == NULL)
1461 return NULL;
1462 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1463 re_has_z = REX_SET;
1464 break;
1465
1466 case '1':
1467 case '2':
1468 case '3':
1469 case '4':
1470 case '5':
1471 case '6':
1472 case '7':
1473 case '8':
1474 case '9': if ((reg_do_extmatch & REX_USE) == 0)
1475 EMSG_RET_NULL(_(e_z1_not_allowed));
1476 ret = regnode(ZREF + c - '0');
1477 re_has_z = REX_USE;
1478 break;
1479#endif
1480
1481 case 's': ret = regnode(MOPEN + 0);
1482 if (re_mult_next("\\zs") == FAIL)
1483 return NULL;
1484 break;
1485
1486 case 'e': ret = regnode(MCLOSE + 0);
1487 if (re_mult_next("\\ze") == FAIL)
1488 return NULL;
1489 break;
1490
1491 default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1492 }
1493 }
1494 break;
1495
1496 case Magic('%'):
1497 {
1498 c = no_Magic(getchr());
1499 switch (c)
1500 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001501 // () without a back reference
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001502 case '(':
1503 if (one_exactly)
1504 EMSG_ONE_RET_NULL;
1505 ret = reg(REG_NPAREN, &flags);
1506 if (ret == NULL)
1507 return NULL;
1508 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1509 break;
1510
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001511 // Catch \%^ and \%$ regardless of where they appear in the
1512 // pattern -- regardless of whether or not it makes sense.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001513 case '^':
1514 ret = regnode(RE_BOF);
1515 break;
1516
1517 case '$':
1518 ret = regnode(RE_EOF);
1519 break;
1520
1521 case '#':
1522 ret = regnode(CURSOR);
1523 break;
1524
1525 case 'V':
1526 ret = regnode(RE_VISUAL);
1527 break;
1528
1529 case 'C':
1530 ret = regnode(RE_COMPOSING);
1531 break;
1532
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001533 // \%[abc]: Emit as a list of branches, all ending at the last
1534 // branch which matches nothing.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001535 case '[':
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001536 if (one_exactly) // doesn't nest
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001537 EMSG_ONE_RET_NULL;
1538 {
1539 char_u *lastbranch;
1540 char_u *lastnode = NULL;
1541 char_u *br;
1542
1543 ret = NULL;
1544 while ((c = getchr()) != ']')
1545 {
1546 if (c == NUL)
1547 EMSG2_RET_NULL(_(e_missing_sb),
1548 reg_magic == MAGIC_ALL);
1549 br = regnode(BRANCH);
1550 if (ret == NULL)
1551 ret = br;
1552 else
1553 {
1554 regtail(lastnode, br);
1555 if (reg_toolong)
1556 return NULL;
1557 }
1558
1559 ungetchr();
1560 one_exactly = TRUE;
1561 lastnode = regatom(flagp);
1562 one_exactly = FALSE;
1563 if (lastnode == NULL)
1564 return NULL;
1565 }
1566 if (ret == NULL)
1567 EMSG2_RET_NULL(_(e_empty_sb),
1568 reg_magic == MAGIC_ALL);
1569 lastbranch = regnode(BRANCH);
1570 br = regnode(NOTHING);
1571 if (ret != JUST_CALC_SIZE)
1572 {
1573 regtail(lastnode, br);
1574 regtail(lastbranch, br);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001575 // connect all branches to the NOTHING
1576 // branch at the end
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001577 for (br = ret; br != lastnode; )
1578 {
1579 if (OP(br) == BRANCH)
1580 {
1581 regtail(br, lastbranch);
1582 if (reg_toolong)
1583 return NULL;
1584 br = OPERAND(br);
1585 }
1586 else
1587 br = regnext(br);
1588 }
1589 }
1590 *flagp &= ~(HASWIDTH | SIMPLE);
1591 break;
1592 }
1593
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001594 case 'd': // %d123 decimal
1595 case 'o': // %o123 octal
1596 case 'x': // %xab hex 2
1597 case 'u': // %uabcd hex 4
1598 case 'U': // %U1234abcd hex 8
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001599 {
1600 long i;
1601
1602 switch (c)
1603 {
1604 case 'd': i = getdecchrs(); break;
1605 case 'o': i = getoctchrs(); break;
1606 case 'x': i = gethexchrs(2); break;
1607 case 'u': i = gethexchrs(4); break;
1608 case 'U': i = gethexchrs(8); break;
1609 default: i = -1; break;
1610 }
1611
1612 if (i < 0 || i > INT_MAX)
1613 EMSG2_RET_NULL(
1614 _("E678: Invalid character after %s%%[dxouU]"),
1615 reg_magic == MAGIC_ALL);
1616 if (use_multibytecode(i))
1617 ret = regnode(MULTIBYTECODE);
1618 else
1619 ret = regnode(EXACTLY);
1620 if (i == 0)
1621 regc(0x0a);
1622 else
1623 regmbc(i);
1624 regc(NUL);
1625 *flagp |= HASWIDTH;
1626 break;
1627 }
1628
1629 default:
1630 if (VIM_ISDIGIT(c) || c == '<' || c == '>'
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001631 || c == '\'' || c == '.')
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001632 {
1633 long_u n = 0;
1634 int cmp;
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001635 int cur = FALSE;
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001636
1637 cmp = c;
1638 if (cmp == '<' || cmp == '>')
1639 c = getchr();
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001640 if (no_Magic(c) == '.')
1641 {
1642 cur = TRUE;
1643 c = getchr();
1644 }
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001645 while (VIM_ISDIGIT(c))
1646 {
1647 n = n * 10 + (c - '0');
1648 c = getchr();
1649 }
1650 if (c == '\'' && n == 0)
1651 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001652 // "\%'m", "\%<'m" and "\%>'m": Mark
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001653 c = getchr();
1654 ret = regnode(RE_MARK);
1655 if (ret == JUST_CALC_SIZE)
1656 regsize += 2;
1657 else
1658 {
1659 *regcode++ = c;
1660 *regcode++ = cmp;
1661 }
1662 break;
1663 }
1664 else if (c == 'l' || c == 'c' || c == 'v')
1665 {
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001666 if (cur && n)
1667 {
1668 semsg(_(e_regexp_number_after_dot_pos_search), no_Magic(c));
1669 rc_did_emsg = TRUE;
1670 return NULL;
1671 }
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001672 if (c == 'l')
1673 {
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001674 if (cur)
1675 n = curwin->w_cursor.lnum;
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001676 ret = regnode(RE_LNUM);
1677 if (save_prev_at_start)
1678 at_start = TRUE;
1679 }
1680 else if (c == 'c')
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001681 {
1682 if (cur)
1683 {
1684 n = curwin->w_cursor.col;
1685 n++;
1686 }
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001687 ret = regnode(RE_COL);
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001688 }
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001689 else
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001690 {
1691 if (cur)
1692 {
1693 colnr_T vcol = 0;
1694
1695 getvvcol(curwin, &curwin->w_cursor,
1696 NULL, NULL, &vcol);
1697 ++vcol;
1698 n = vcol;
1699 }
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001700 ret = regnode(RE_VCOL);
Bram Moolenaar04db26b2021-07-05 20:15:23 +02001701 }
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001702 if (ret == JUST_CALC_SIZE)
1703 regsize += 5;
1704 else
1705 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001706 // put the number and the optional
1707 // comparator after the opcode
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001708 regcode = re_put_long(regcode, n);
1709 *regcode++ = cmp;
1710 }
1711 break;
1712 }
1713 }
1714
1715 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
1716 reg_magic == MAGIC_ALL);
1717 }
1718 }
1719 break;
1720
1721 case Magic('['):
1722collection:
1723 {
1724 char_u *lp;
1725
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001726 // If there is no matching ']', we assume the '[' is a normal
1727 // character. This makes 'incsearch' and ":help [" work.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001728 lp = skip_anyof(regparse);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001729 if (*lp == ']') // there is a matching ']'
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001730 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001731 int startc = -1; // > 0 when next '-' is a range
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001732 int endc;
1733
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001734 // In a character class, different parsing rules apply.
1735 // Not even \ is special anymore, nothing is.
1736 if (*regparse == '^') // Complement of range.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001737 {
1738 ret = regnode(ANYBUT + extra);
1739 regparse++;
1740 }
1741 else
1742 ret = regnode(ANYOF + extra);
1743
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001744 // At the start ']' and '-' mean the literal character.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001745 if (*regparse == ']' || *regparse == '-')
1746 {
1747 startc = *regparse;
1748 regc(*regparse++);
1749 }
1750
1751 while (*regparse != NUL && *regparse != ']')
1752 {
1753 if (*regparse == '-')
1754 {
1755 ++regparse;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001756 // The '-' is not used for a range at the end and
1757 // after or before a '\n'.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001758 if (*regparse == ']' || *regparse == NUL
1759 || startc == -1
1760 || (regparse[0] == '\\' && regparse[1] == 'n'))
1761 {
1762 regc('-');
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001763 startc = '-'; // [--x] is a range
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001764 }
1765 else
1766 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001767 // Also accept "a-[.z.]"
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001768 endc = 0;
1769 if (*regparse == '[')
1770 endc = get_coll_element(&regparse);
1771 if (endc == 0)
1772 {
1773 if (has_mbyte)
1774 endc = mb_ptr2char_adv(&regparse);
1775 else
1776 endc = *regparse++;
1777 }
1778
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001779 // Handle \o40, \x20 and \u20AC style sequences
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001780 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
1781 endc = coll_get_char();
1782
1783 if (startc > endc)
1784 EMSG_RET_NULL(_(e_reverse_range));
1785 if (has_mbyte && ((*mb_char2len)(startc) > 1
1786 || (*mb_char2len)(endc) > 1))
1787 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001788 // Limit to a range of 256 chars.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001789 if (endc > startc + 256)
1790 EMSG_RET_NULL(_(e_large_class));
1791 while (++startc <= endc)
1792 regmbc(startc);
1793 }
1794 else
1795 {
1796#ifdef EBCDIC
1797 int alpha_only = FALSE;
1798
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001799 // for alphabetical range skip the gaps
1800 // 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001801 if (isalpha(startc) && isalpha(endc))
1802 alpha_only = TRUE;
1803#endif
1804 while (++startc <= endc)
1805#ifdef EBCDIC
1806 if (!alpha_only || isalpha(startc))
1807#endif
1808 regc(startc);
1809 }
1810 startc = -1;
1811 }
1812 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001813 // Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
1814 // accepts "\t", "\e", etc., but only when the 'l' flag in
1815 // 'cpoptions' is not included.
1816 // Posix doesn't recognize backslash at all.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001817 else if (*regparse == '\\'
1818 && !reg_cpo_bsl
1819 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
1820 || (!reg_cpo_lit
1821 && vim_strchr(REGEXP_ABBR,
1822 regparse[1]) != NULL)))
1823 {
1824 regparse++;
1825 if (*regparse == 'n')
1826 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001827 // '\n' in range: also match NL
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001828 if (ret != JUST_CALC_SIZE)
1829 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001830 // Using \n inside [^] does not change what
1831 // matches. "[^\n]" is the same as ".".
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001832 if (*ret == ANYOF)
1833 {
1834 *ret = ANYOF + ADD_NL;
1835 *flagp |= HASNL;
1836 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001837 // else: must have had a \n already
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001838 }
1839 regparse++;
1840 startc = -1;
1841 }
1842 else if (*regparse == 'd'
1843 || *regparse == 'o'
1844 || *regparse == 'x'
1845 || *regparse == 'u'
1846 || *regparse == 'U')
1847 {
1848 startc = coll_get_char();
1849 if (startc == 0)
1850 regc(0x0a);
1851 else
1852 regmbc(startc);
1853 }
1854 else
1855 {
1856 startc = backslash_trans(*regparse++);
1857 regc(startc);
1858 }
1859 }
1860 else if (*regparse == '[')
1861 {
1862 int c_class;
1863 int cu;
1864
1865 c_class = get_char_class(&regparse);
1866 startc = -1;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001867 // Characters assumed to be 8 bits!
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001868 switch (c_class)
1869 {
1870 case CLASS_NONE:
1871 c_class = get_equi_class(&regparse);
1872 if (c_class != 0)
1873 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001874 // produce equivalence class
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001875 reg_equi_class(c_class);
1876 }
1877 else if ((c_class =
1878 get_coll_element(&regparse)) != 0)
1879 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001880 // produce a collating element
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001881 regmbc(c_class);
1882 }
1883 else
1884 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001885 // literal '[', allow [[-x] as a range
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001886 startc = *regparse++;
1887 regc(startc);
1888 }
1889 break;
1890 case CLASS_ALNUM:
1891 for (cu = 1; cu < 128; cu++)
1892 if (isalnum(cu))
1893 regmbc(cu);
1894 break;
1895 case CLASS_ALPHA:
1896 for (cu = 1; cu < 128; cu++)
1897 if (isalpha(cu))
1898 regmbc(cu);
1899 break;
1900 case CLASS_BLANK:
1901 regc(' ');
1902 regc('\t');
1903 break;
1904 case CLASS_CNTRL:
1905 for (cu = 1; cu <= 127; cu++)
1906 if (iscntrl(cu))
1907 regmbc(cu);
1908 break;
1909 case CLASS_DIGIT:
1910 for (cu = 1; cu <= 127; cu++)
1911 if (VIM_ISDIGIT(cu))
1912 regmbc(cu);
1913 break;
1914 case CLASS_GRAPH:
1915 for (cu = 1; cu <= 127; cu++)
1916 if (isgraph(cu))
1917 regmbc(cu);
1918 break;
1919 case CLASS_LOWER:
1920 for (cu = 1; cu <= 255; cu++)
1921 if (MB_ISLOWER(cu) && cu != 170
1922 && cu != 186)
1923 regmbc(cu);
1924 break;
1925 case CLASS_PRINT:
1926 for (cu = 1; cu <= 255; cu++)
1927 if (vim_isprintc(cu))
1928 regmbc(cu);
1929 break;
1930 case CLASS_PUNCT:
1931 for (cu = 1; cu < 128; cu++)
1932 if (ispunct(cu))
1933 regmbc(cu);
1934 break;
1935 case CLASS_SPACE:
1936 for (cu = 9; cu <= 13; cu++)
1937 regc(cu);
1938 regc(' ');
1939 break;
1940 case CLASS_UPPER:
1941 for (cu = 1; cu <= 255; cu++)
1942 if (MB_ISUPPER(cu))
1943 regmbc(cu);
1944 break;
1945 case CLASS_XDIGIT:
1946 for (cu = 1; cu <= 255; cu++)
1947 if (vim_isxdigit(cu))
1948 regmbc(cu);
1949 break;
1950 case CLASS_TAB:
1951 regc('\t');
1952 break;
1953 case CLASS_RETURN:
1954 regc('\r');
1955 break;
1956 case CLASS_BACKSPACE:
1957 regc('\b');
1958 break;
1959 case CLASS_ESCAPE:
1960 regc('\033');
1961 break;
1962 case CLASS_IDENT:
1963 for (cu = 1; cu <= 255; cu++)
1964 if (vim_isIDc(cu))
1965 regmbc(cu);
1966 break;
1967 case CLASS_KEYWORD:
1968 for (cu = 1; cu <= 255; cu++)
1969 if (reg_iswordc(cu))
1970 regmbc(cu);
1971 break;
1972 case CLASS_FNAME:
1973 for (cu = 1; cu <= 255; cu++)
1974 if (vim_isfilec(cu))
1975 regmbc(cu);
1976 break;
1977 }
1978 }
1979 else
1980 {
1981 if (has_mbyte)
1982 {
1983 int len;
1984
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001985 // produce a multibyte character, including any
1986 // following composing characters
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001987 startc = mb_ptr2char(regparse);
1988 len = (*mb_ptr2len)(regparse);
1989 if (enc_utf8 && utf_char2len(startc) != len)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02001990 startc = -1; // composing chars
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02001991 while (--len >= 0)
1992 regc(*regparse++);
1993 }
1994 else
1995 {
1996 startc = *regparse++;
1997 regc(startc);
1998 }
1999 }
2000 }
2001 regc(NUL);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002002 prevchr_len = 1; // last char was the ']'
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002003 if (*regparse != ']')
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002004 EMSG_RET_NULL(_(e_toomsbra)); // Cannot happen?
2005 skipchr(); // let's be friends with the lexer again
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002006 *flagp |= HASWIDTH | SIMPLE;
2007 break;
2008 }
2009 else if (reg_strict)
2010 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
2011 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002012 // FALLTHROUGH
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002013
2014 default:
2015 {
2016 int len;
2017
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002018 // A multi-byte character is handled as a separate atom if it's
2019 // before a multi and when it's a composing char.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002020 if (use_multibytecode(c))
2021 {
2022do_multibyte:
2023 ret = regnode(MULTIBYTECODE);
2024 regmbc(c);
2025 *flagp |= HASWIDTH | SIMPLE;
2026 break;
2027 }
2028
2029 ret = regnode(EXACTLY);
2030
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002031 // Append characters as long as:
2032 // - there is no following multi, we then need the character in
2033 // front of it as a single character operand
2034 // - not running into a Magic character
2035 // - "one_exactly" is not set
2036 // But always emit at least one character. Might be a Multi,
2037 // e.g., a "[" without matching "]".
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002038 for (len = 0; c != NUL && (len == 0
2039 || (re_multi_type(peekchr()) == NOT_MULTI
2040 && !one_exactly
2041 && !is_Magic(c))); ++len)
2042 {
2043 c = no_Magic(c);
2044 if (has_mbyte)
2045 {
2046 regmbc(c);
2047 if (enc_utf8)
2048 {
2049 int l;
2050
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002051 // Need to get composing character too.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002052 for (;;)
2053 {
2054 l = utf_ptr2len(regparse);
2055 if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2056 break;
2057 regmbc(utf_ptr2char(regparse));
2058 skipchr();
2059 }
2060 }
2061 }
2062 else
2063 regc(c);
2064 c = getchr();
2065 }
2066 ungetchr();
2067
2068 regc(NUL);
2069 *flagp |= HASWIDTH;
2070 if (len == 1)
2071 *flagp |= SIMPLE;
2072 }
2073 break;
2074 }
2075
2076 return ret;
2077}
2078
2079/*
2080 * Parse something followed by possible [*+=].
2081 *
2082 * Note that the branching code sequences used for = and the general cases
2083 * of * and + are somewhat optimized: they use the same NOTHING node as
2084 * both the endmarker for their branch list and the body of the last branch.
2085 * It might seem that this node could be dispensed with entirely, but the
2086 * endmarker role is not redundant.
2087 */
2088 static char_u *
2089regpiece(int *flagp)
2090{
2091 char_u *ret;
2092 int op;
2093 char_u *next;
2094 int flags;
2095 long minval;
2096 long maxval;
2097
2098 ret = regatom(&flags);
2099 if (ret == NULL)
2100 return NULL;
2101
2102 op = peekchr();
2103 if (re_multi_type(op) == NOT_MULTI)
2104 {
2105 *flagp = flags;
2106 return ret;
2107 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002108 // default flags
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002109 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
2110
2111 skipchr();
2112 switch (op)
2113 {
2114 case Magic('*'):
2115 if (flags & SIMPLE)
2116 reginsert(STAR, ret);
2117 else
2118 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002119 // Emit x* as (x&|), where & means "self".
2120 reginsert(BRANCH, ret); // Either x
2121 regoptail(ret, regnode(BACK)); // and loop
2122 regoptail(ret, ret); // back
2123 regtail(ret, regnode(BRANCH)); // or
2124 regtail(ret, regnode(NOTHING)); // null.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002125 }
2126 break;
2127
2128 case Magic('+'):
2129 if (flags & SIMPLE)
2130 reginsert(PLUS, ret);
2131 else
2132 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002133 // Emit x+ as x(&|), where & means "self".
2134 next = regnode(BRANCH); // Either
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002135 regtail(ret, next);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002136 regtail(regnode(BACK), ret); // loop back
2137 regtail(next, regnode(BRANCH)); // or
2138 regtail(ret, regnode(NOTHING)); // null.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002139 }
2140 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
2141 break;
2142
2143 case Magic('@'):
2144 {
2145 int lop = END;
2146 long nr;
2147
2148 nr = getdecchrs();
2149 switch (no_Magic(getchr()))
2150 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002151 case '=': lop = MATCH; break; // \@=
2152 case '!': lop = NOMATCH; break; // \@!
2153 case '>': lop = SUBPAT; break; // \@>
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002154 case '<': switch (no_Magic(getchr()))
2155 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002156 case '=': lop = BEHIND; break; // \@<=
2157 case '!': lop = NOBEHIND; break; // \@<!
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002158 }
2159 }
2160 if (lop == END)
2161 EMSG2_RET_NULL(_("E59: invalid character after %s@"),
2162 reg_magic == MAGIC_ALL);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002163 // Look behind must match with behind_pos.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002164 if (lop == BEHIND || lop == NOBEHIND)
2165 {
2166 regtail(ret, regnode(BHPOS));
2167 *flagp |= HASLOOKBH;
2168 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002169 regtail(ret, regnode(END)); // operand ends
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002170 if (lop == BEHIND || lop == NOBEHIND)
2171 {
2172 if (nr < 0)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002173 nr = 0; // no limit is same as zero limit
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002174 reginsert_nr(lop, nr, ret);
2175 }
2176 else
2177 reginsert(lop, ret);
2178 break;
2179 }
2180
2181 case Magic('?'):
2182 case Magic('='):
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002183 // Emit x= as (x|)
2184 reginsert(BRANCH, ret); // Either x
2185 regtail(ret, regnode(BRANCH)); // or
2186 next = regnode(NOTHING); // null.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002187 regtail(ret, next);
2188 regoptail(ret, next);
2189 break;
2190
2191 case Magic('{'):
2192 if (!read_limits(&minval, &maxval))
2193 return NULL;
2194 if (flags & SIMPLE)
2195 {
2196 reginsert(BRACE_SIMPLE, ret);
2197 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
2198 }
2199 else
2200 {
2201 if (num_complex_braces >= 10)
2202 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
2203 reg_magic == MAGIC_ALL);
2204 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
2205 regoptail(ret, regnode(BACK));
2206 regoptail(ret, ret);
2207 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
2208 ++num_complex_braces;
2209 }
2210 if (minval > 0 && maxval > 0)
2211 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
2212 break;
2213 }
2214 if (re_multi_type(peekchr()) != NOT_MULTI)
2215 {
2216 // Can't have a multi follow a multi.
2217 if (peekchr() == Magic('*'))
2218 EMSG2_RET_NULL(_("E61: Nested %s*"), reg_magic >= MAGIC_ON);
2219 EMSG3_RET_NULL(_("E62: Nested %s%c"), reg_magic == MAGIC_ALL,
2220 no_Magic(peekchr()));
2221 }
2222
2223 return ret;
2224}
2225
2226/*
2227 * Parse one alternative of an | or & operator.
2228 * Implements the concatenation operator.
2229 */
2230 static char_u *
2231regconcat(int *flagp)
2232{
2233 char_u *first = NULL;
2234 char_u *chain = NULL;
2235 char_u *latest;
2236 int flags;
2237 int cont = TRUE;
2238
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002239 *flagp = WORST; // Tentatively.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002240
2241 while (cont)
2242 {
2243 switch (peekchr())
2244 {
2245 case NUL:
2246 case Magic('|'):
2247 case Magic('&'):
2248 case Magic(')'):
2249 cont = FALSE;
2250 break;
2251 case Magic('Z'):
2252 regflags |= RF_ICOMBINE;
2253 skipchr_keepstart();
2254 break;
2255 case Magic('c'):
2256 regflags |= RF_ICASE;
2257 skipchr_keepstart();
2258 break;
2259 case Magic('C'):
2260 regflags |= RF_NOICASE;
2261 skipchr_keepstart();
2262 break;
2263 case Magic('v'):
2264 reg_magic = MAGIC_ALL;
2265 skipchr_keepstart();
2266 curchr = -1;
2267 break;
2268 case Magic('m'):
2269 reg_magic = MAGIC_ON;
2270 skipchr_keepstart();
2271 curchr = -1;
2272 break;
2273 case Magic('M'):
2274 reg_magic = MAGIC_OFF;
2275 skipchr_keepstart();
2276 curchr = -1;
2277 break;
2278 case Magic('V'):
2279 reg_magic = MAGIC_NONE;
2280 skipchr_keepstart();
2281 curchr = -1;
2282 break;
2283 default:
2284 latest = regpiece(&flags);
2285 if (latest == NULL || reg_toolong)
2286 return NULL;
2287 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002288 if (chain == NULL) // First piece.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002289 *flagp |= flags & SPSTART;
2290 else
2291 regtail(chain, latest);
2292 chain = latest;
2293 if (first == NULL)
2294 first = latest;
2295 break;
2296 }
2297 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002298 if (first == NULL) // Loop ran zero times.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002299 first = regnode(NOTHING);
2300 return first;
2301}
2302
2303/*
2304 * Parse one alternative of an | operator.
2305 * Implements the & operator.
2306 */
2307 static char_u *
2308regbranch(int *flagp)
2309{
2310 char_u *ret;
2311 char_u *chain = NULL;
2312 char_u *latest;
2313 int flags;
2314
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002315 *flagp = WORST | HASNL; // Tentatively.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002316
2317 ret = regnode(BRANCH);
2318 for (;;)
2319 {
2320 latest = regconcat(&flags);
2321 if (latest == NULL)
2322 return NULL;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002323 // If one of the branches has width, the whole thing has. If one of
2324 // the branches anchors at start-of-line, the whole thing does.
2325 // If one of the branches uses look-behind, the whole thing does.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002326 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002327 // If one of the branches doesn't match a line-break, the whole thing
2328 // doesn't.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002329 *flagp &= ~HASNL | (flags & HASNL);
2330 if (chain != NULL)
2331 regtail(chain, latest);
2332 if (peekchr() != Magic('&'))
2333 break;
2334 skipchr();
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002335 regtail(latest, regnode(END)); // operand ends
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002336 if (reg_toolong)
2337 break;
2338 reginsert(MATCH, latest);
2339 chain = latest;
2340 }
2341
2342 return ret;
2343}
2344
2345/*
2346 * Parse regular expression, i.e. main body or parenthesized thing.
2347 *
2348 * Caller must absorb opening parenthesis.
2349 *
2350 * Combining parenthesis handling with the base level of regular expression
2351 * is a trifle forced, but the need to tie the tails of the branches to what
2352 * follows makes it hard to avoid.
2353 */
2354 static char_u *
2355reg(
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002356 int paren, // REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002357 int *flagp)
2358{
2359 char_u *ret;
2360 char_u *br;
2361 char_u *ender;
2362 int parno = 0;
2363 int flags;
2364
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002365 *flagp = HASWIDTH; // Tentatively.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002366
2367#ifdef FEAT_SYN_HL
2368 if (paren == REG_ZPAREN)
2369 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002370 // Make a ZOPEN node.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002371 if (regnzpar >= NSUBEXP)
2372 EMSG_RET_NULL(_("E50: Too many \\z("));
2373 parno = regnzpar;
2374 regnzpar++;
2375 ret = regnode(ZOPEN + parno);
2376 }
2377 else
2378#endif
2379 if (paren == REG_PAREN)
2380 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002381 // Make a MOPEN node.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002382 if (regnpar >= NSUBEXP)
2383 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
2384 parno = regnpar;
2385 ++regnpar;
2386 ret = regnode(MOPEN + parno);
2387 }
2388 else if (paren == REG_NPAREN)
2389 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002390 // Make a NOPEN node.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002391 ret = regnode(NOPEN);
2392 }
2393 else
2394 ret = NULL;
2395
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002396 // Pick up the branches, linking them together.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002397 br = regbranch(&flags);
2398 if (br == NULL)
2399 return NULL;
2400 if (ret != NULL)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002401 regtail(ret, br); // [MZ]OPEN -> first.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002402 else
2403 ret = br;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002404 // If one of the branches can be zero-width, the whole thing can.
2405 // If one of the branches has * at start or matches a line-break, the
2406 // whole thing can.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002407 if (!(flags & HASWIDTH))
2408 *flagp &= ~HASWIDTH;
2409 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
2410 while (peekchr() == Magic('|'))
2411 {
2412 skipchr();
2413 br = regbranch(&flags);
2414 if (br == NULL || reg_toolong)
2415 return NULL;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002416 regtail(ret, br); // BRANCH -> BRANCH.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002417 if (!(flags & HASWIDTH))
2418 *flagp &= ~HASWIDTH;
2419 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
2420 }
2421
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002422 // Make a closing node, and hook it on the end.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002423 ender = regnode(
2424#ifdef FEAT_SYN_HL
2425 paren == REG_ZPAREN ? ZCLOSE + parno :
2426#endif
2427 paren == REG_PAREN ? MCLOSE + parno :
2428 paren == REG_NPAREN ? NCLOSE : END);
2429 regtail(ret, ender);
2430
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002431 // Hook the tails of the branches to the closing node.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002432 for (br = ret; br != NULL; br = regnext(br))
2433 regoptail(br, ender);
2434
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002435 // Check for proper termination.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002436 if (paren != REG_NOPAREN && getchr() != Magic(')'))
2437 {
2438#ifdef FEAT_SYN_HL
2439 if (paren == REG_ZPAREN)
2440 EMSG_RET_NULL(_("E52: Unmatched \\z("));
2441 else
2442#endif
2443 if (paren == REG_NPAREN)
2444 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
2445 else
2446 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
2447 }
2448 else if (paren == REG_NOPAREN && peekchr() != NUL)
2449 {
2450 if (curchr == Magic(')'))
2451 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
2452 else
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002453 EMSG_RET_NULL(_(e_trailing)); // "Can't happen".
2454 // NOTREACHED
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002455 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002456 // Here we set the flag allowing back references to this set of
2457 // parentheses.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002458 if (paren == REG_PAREN)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002459 had_endbrace[parno] = TRUE; // have seen the close paren
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002460 return ret;
2461}
2462
2463/*
2464 * bt_regcomp() - compile a regular expression into internal code for the
2465 * traditional back track matcher.
2466 * Returns the program in allocated space. Returns NULL for an error.
2467 *
2468 * We can't allocate space until we know how big the compiled form will be,
2469 * but we can't compile it (and thus know how big it is) until we've got a
2470 * place to put the code. So we cheat: we compile it twice, once with code
2471 * generation turned off and size counting turned on, and once "for real".
2472 * This also means that we don't allocate space until we are sure that the
2473 * thing really will compile successfully, and we never have to move the
2474 * code and thus invalidate pointers into it. (Note that it has to be in
2475 * one piece because vim_free() must be able to free it all.)
2476 *
2477 * Whether upper/lower case is to be ignored is decided when executing the
2478 * program, it does not matter here.
2479 *
2480 * Beware that the optimization-preparation code in here knows about some
2481 * of the structure of the compiled regexp.
2482 * "re_flags": RE_MAGIC and/or RE_STRING.
2483 */
2484 static regprog_T *
2485bt_regcomp(char_u *expr, int re_flags)
2486{
2487 bt_regprog_T *r;
2488 char_u *scan;
2489 char_u *longest;
2490 int len;
2491 int flags;
2492
2493 if (expr == NULL)
Bram Moolenaare83cca22020-09-07 18:53:21 +02002494 IEMSG_RET_NULL(_(e_null));
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002495
2496 init_class_tab();
2497
2498 // First pass: determine size, legality.
2499 regcomp_start(expr, re_flags);
2500 regcode = JUST_CALC_SIZE;
2501 regc(REGMAGIC);
2502 if (reg(REG_NOPAREN, &flags) == NULL)
2503 return NULL;
2504
2505 // Allocate space.
2506 r = alloc(offsetof(bt_regprog_T, program) + regsize);
2507 if (r == NULL)
2508 return NULL;
2509 r->re_in_use = FALSE;
2510
2511 // Second pass: emit code.
2512 regcomp_start(expr, re_flags);
2513 regcode = r->program;
2514 regc(REGMAGIC);
2515 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
2516 {
2517 vim_free(r);
2518 if (reg_toolong)
2519 EMSG_RET_NULL(_("E339: Pattern too long"));
2520 return NULL;
2521 }
2522
2523 // Dig out information for optimizations.
2524 r->regstart = NUL; // Worst-case defaults.
2525 r->reganch = 0;
2526 r->regmust = NULL;
2527 r->regmlen = 0;
2528 r->regflags = regflags;
2529 if (flags & HASNL)
2530 r->regflags |= RF_HASNL;
2531 if (flags & HASLOOKBH)
2532 r->regflags |= RF_LOOKBH;
2533#ifdef FEAT_SYN_HL
2534 // Remember whether this pattern has any \z specials in it.
2535 r->reghasz = re_has_z;
2536#endif
2537 scan = r->program + 1; // First BRANCH.
2538 if (OP(regnext(scan)) == END) // Only one top-level choice.
2539 {
2540 scan = OPERAND(scan);
2541
2542 // Starting-point info.
2543 if (OP(scan) == BOL || OP(scan) == RE_BOF)
2544 {
2545 r->reganch++;
2546 scan = regnext(scan);
2547 }
2548
2549 if (OP(scan) == EXACTLY)
2550 {
2551 if (has_mbyte)
2552 r->regstart = (*mb_ptr2char)(OPERAND(scan));
2553 else
2554 r->regstart = *OPERAND(scan);
2555 }
2556 else if ((OP(scan) == BOW
2557 || OP(scan) == EOW
2558 || OP(scan) == NOTHING
2559 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
2560 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
2561 && OP(regnext(scan)) == EXACTLY)
2562 {
2563 if (has_mbyte)
2564 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
2565 else
2566 r->regstart = *OPERAND(regnext(scan));
2567 }
2568
2569 // If there's something expensive in the r.e., find the longest
2570 // literal string that must appear and make it the regmust. Resolve
2571 // ties in favor of later strings, since the regstart check works
2572 // with the beginning of the r.e. and avoiding duplication
2573 // strengthens checking. Not a strong reason, but sufficient in the
2574 // absence of others.
2575
2576 // When the r.e. starts with BOW, it is faster to look for a regmust
2577 // first. Used a lot for "#" and "*" commands. (Added by mool).
2578 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
2579 && !(flags & HASNL))
2580 {
2581 longest = NULL;
2582 len = 0;
2583 for (; scan != NULL; scan = regnext(scan))
2584 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
2585 {
2586 longest = OPERAND(scan);
2587 len = (int)STRLEN(OPERAND(scan));
2588 }
2589 r->regmust = longest;
2590 r->regmlen = len;
2591 }
2592 }
2593#ifdef BT_REGEXP_DUMP
2594 regdump(expr, r);
2595#endif
2596 r->engine = &bt_regengine;
2597 return (regprog_T *)r;
2598}
2599
2600#if defined(FEAT_SYN_HL) || defined(PROTO)
2601/*
2602 * Check if during the previous call to vim_regcomp the EOL item "$" has been
2603 * found. This is messy, but it works fine.
2604 */
2605 int
2606vim_regcomp_had_eol(void)
2607{
2608 return had_eol;
2609}
2610#endif
2611
2612/*
2613 * Get a number after a backslash that is inside [].
2614 * When nothing is recognized return a backslash.
2615 */
2616 static int
2617coll_get_char(void)
2618{
2619 long nr = -1;
2620
2621 switch (*regparse++)
2622 {
2623 case 'd': nr = getdecchrs(); break;
2624 case 'o': nr = getoctchrs(); break;
2625 case 'x': nr = gethexchrs(2); break;
2626 case 'u': nr = gethexchrs(4); break;
2627 case 'U': nr = gethexchrs(8); break;
2628 }
2629 if (nr < 0 || nr > INT_MAX)
2630 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002631 // If getting the number fails be backwards compatible: the character
2632 // is a backslash.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002633 --regparse;
2634 nr = '\\';
2635 }
2636 return nr;
2637}
2638
2639/*
2640 * Free a compiled regexp program, returned by bt_regcomp().
2641 */
2642 static void
2643bt_regfree(regprog_T *prog)
2644{
2645 vim_free(prog);
2646}
2647
2648#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
2649
2650/*
2651 * The arguments from BRACE_LIMITS are stored here. They are actually local
2652 * to regmatch(), but they are here to reduce the amount of stack space used
2653 * (it can be called recursively many times).
2654 */
2655static long bl_minval;
2656static long bl_maxval;
2657
2658/*
2659 * Save the input line and position in a regsave_T.
2660 */
2661 static void
2662reg_save(regsave_T *save, garray_T *gap)
2663{
2664 if (REG_MULTI)
2665 {
2666 save->rs_u.pos.col = (colnr_T)(rex.input - rex.line);
2667 save->rs_u.pos.lnum = rex.lnum;
2668 }
2669 else
2670 save->rs_u.ptr = rex.input;
2671 save->rs_len = gap->ga_len;
2672}
2673
2674/*
2675 * Restore the input line and position from a regsave_T.
2676 */
2677 static void
2678reg_restore(regsave_T *save, garray_T *gap)
2679{
2680 if (REG_MULTI)
2681 {
2682 if (rex.lnum != save->rs_u.pos.lnum)
2683 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002684 // only call reg_getline() when the line number changed to save
2685 // a bit of time
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002686 rex.lnum = save->rs_u.pos.lnum;
2687 rex.line = reg_getline(rex.lnum);
2688 }
2689 rex.input = rex.line + save->rs_u.pos.col;
2690 }
2691 else
2692 rex.input = save->rs_u.ptr;
2693 gap->ga_len = save->rs_len;
2694}
2695
2696/*
2697 * Return TRUE if current position is equal to saved position.
2698 */
2699 static int
2700reg_save_equal(regsave_T *save)
2701{
2702 if (REG_MULTI)
2703 return rex.lnum == save->rs_u.pos.lnum
2704 && rex.input == rex.line + save->rs_u.pos.col;
2705 return rex.input == save->rs_u.ptr;
2706}
2707
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002708// Save the sub-expressions before attempting a match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002709#define save_se(savep, posp, pp) \
2710 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
2711
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002712// After a failed match restore the sub-expressions.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002713#define restore_se(savep, posp, pp) { \
2714 if (REG_MULTI) \
2715 *(posp) = (savep)->se_u.pos; \
2716 else \
2717 *(pp) = (savep)->se_u.ptr; }
2718
2719/*
2720 * Tentatively set the sub-expression start to the current position (after
2721 * calling regmatch() they will have changed). Need to save the existing
2722 * values for when there is no match.
2723 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
2724 * depending on REG_MULTI.
2725 */
2726 static void
2727save_se_multi(save_se_T *savep, lpos_T *posp)
2728{
2729 savep->se_u.pos = *posp;
2730 posp->lnum = rex.lnum;
2731 posp->col = (colnr_T)(rex.input - rex.line);
2732}
2733
2734 static void
2735save_se_one(save_se_T *savep, char_u **pp)
2736{
2737 savep->se_u.ptr = *pp;
2738 *pp = rex.input;
2739}
2740
2741/*
2742 * regrepeat - repeatedly match something simple, return how many.
2743 * Advances rex.input (and rex.lnum) to just after the matched chars.
2744 */
2745 static int
2746regrepeat(
2747 char_u *p,
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002748 long maxcount) // maximum number of matches allowed
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002749{
2750 long count = 0;
2751 char_u *scan;
2752 char_u *opnd;
2753 int mask;
2754 int testval = 0;
2755
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002756 scan = rex.input; // Make local copy of rex.input for speed.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002757 opnd = OPERAND(p);
2758 switch (OP(p))
2759 {
2760 case ANY:
2761 case ANY + ADD_NL:
2762 while (count < maxcount)
2763 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002764 // Matching anything means we continue until end-of-line (or
2765 // end-of-file for ANY + ADD_NL), only limited by maxcount.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002766 while (*scan != NUL && count < maxcount)
2767 {
2768 ++count;
2769 MB_PTR_ADV(scan);
2770 }
2771 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2772 || rex.reg_line_lbr || count == maxcount)
2773 break;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002774 ++count; // count the line-break
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002775 reg_nextline();
2776 scan = rex.input;
2777 if (got_int)
2778 break;
2779 }
2780 break;
2781
2782 case IDENT:
2783 case IDENT + ADD_NL:
2784 testval = TRUE;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002785 // FALLTHROUGH
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002786 case SIDENT:
2787 case SIDENT + ADD_NL:
2788 while (count < maxcount)
2789 {
2790 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
2791 {
2792 MB_PTR_ADV(scan);
2793 }
2794 else if (*scan == NUL)
2795 {
2796 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2797 || rex.reg_line_lbr)
2798 break;
2799 reg_nextline();
2800 scan = rex.input;
2801 if (got_int)
2802 break;
2803 }
2804 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2805 ++scan;
2806 else
2807 break;
2808 ++count;
2809 }
2810 break;
2811
2812 case KWORD:
2813 case KWORD + ADD_NL:
2814 testval = TRUE;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002815 // FALLTHROUGH
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002816 case SKWORD:
2817 case SKWORD + ADD_NL:
2818 while (count < maxcount)
2819 {
2820 if (vim_iswordp_buf(scan, rex.reg_buf)
2821 && (testval || !VIM_ISDIGIT(*scan)))
2822 {
2823 MB_PTR_ADV(scan);
2824 }
2825 else if (*scan == NUL)
2826 {
2827 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2828 || rex.reg_line_lbr)
2829 break;
2830 reg_nextline();
2831 scan = rex.input;
2832 if (got_int)
2833 break;
2834 }
2835 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2836 ++scan;
2837 else
2838 break;
2839 ++count;
2840 }
2841 break;
2842
2843 case FNAME:
2844 case FNAME + ADD_NL:
2845 testval = TRUE;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002846 // FALLTHROUGH
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002847 case SFNAME:
2848 case SFNAME + ADD_NL:
2849 while (count < maxcount)
2850 {
2851 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
2852 {
2853 MB_PTR_ADV(scan);
2854 }
2855 else if (*scan == NUL)
2856 {
2857 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2858 || rex.reg_line_lbr)
2859 break;
2860 reg_nextline();
2861 scan = rex.input;
2862 if (got_int)
2863 break;
2864 }
2865 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2866 ++scan;
2867 else
2868 break;
2869 ++count;
2870 }
2871 break;
2872
2873 case PRINT:
2874 case PRINT + ADD_NL:
2875 testval = TRUE;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02002876 // FALLTHROUGH
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02002877 case SPRINT:
2878 case SPRINT + ADD_NL:
2879 while (count < maxcount)
2880 {
2881 if (*scan == NUL)
2882 {
2883 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2884 || rex.reg_line_lbr)
2885 break;
2886 reg_nextline();
2887 scan = rex.input;
2888 if (got_int)
2889 break;
2890 }
2891 else if (vim_isprintc(PTR2CHAR(scan)) == 1
2892 && (testval || !VIM_ISDIGIT(*scan)))
2893 {
2894 MB_PTR_ADV(scan);
2895 }
2896 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2897 ++scan;
2898 else
2899 break;
2900 ++count;
2901 }
2902 break;
2903
2904 case WHITE:
2905 case WHITE + ADD_NL:
2906 testval = mask = RI_WHITE;
2907do_class:
2908 while (count < maxcount)
2909 {
2910 int l;
2911
2912 if (*scan == NUL)
2913 {
2914 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2915 || rex.reg_line_lbr)
2916 break;
2917 reg_nextline();
2918 scan = rex.input;
2919 if (got_int)
2920 break;
2921 }
2922 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
2923 {
2924 if (testval != 0)
2925 break;
2926 scan += l;
2927 }
2928 else if ((class_tab[*scan] & mask) == testval)
2929 ++scan;
2930 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2931 ++scan;
2932 else
2933 break;
2934 ++count;
2935 }
2936 break;
2937
2938 case NWHITE:
2939 case NWHITE + ADD_NL:
2940 mask = RI_WHITE;
2941 goto do_class;
2942 case DIGIT:
2943 case DIGIT + ADD_NL:
2944 testval = mask = RI_DIGIT;
2945 goto do_class;
2946 case NDIGIT:
2947 case NDIGIT + ADD_NL:
2948 mask = RI_DIGIT;
2949 goto do_class;
2950 case HEX:
2951 case HEX + ADD_NL:
2952 testval = mask = RI_HEX;
2953 goto do_class;
2954 case NHEX:
2955 case NHEX + ADD_NL:
2956 mask = RI_HEX;
2957 goto do_class;
2958 case OCTAL:
2959 case OCTAL + ADD_NL:
2960 testval = mask = RI_OCTAL;
2961 goto do_class;
2962 case NOCTAL:
2963 case NOCTAL + ADD_NL:
2964 mask = RI_OCTAL;
2965 goto do_class;
2966 case WORD:
2967 case WORD + ADD_NL:
2968 testval = mask = RI_WORD;
2969 goto do_class;
2970 case NWORD:
2971 case NWORD + ADD_NL:
2972 mask = RI_WORD;
2973 goto do_class;
2974 case HEAD:
2975 case HEAD + ADD_NL:
2976 testval = mask = RI_HEAD;
2977 goto do_class;
2978 case NHEAD:
2979 case NHEAD + ADD_NL:
2980 mask = RI_HEAD;
2981 goto do_class;
2982 case ALPHA:
2983 case ALPHA + ADD_NL:
2984 testval = mask = RI_ALPHA;
2985 goto do_class;
2986 case NALPHA:
2987 case NALPHA + ADD_NL:
2988 mask = RI_ALPHA;
2989 goto do_class;
2990 case LOWER:
2991 case LOWER + ADD_NL:
2992 testval = mask = RI_LOWER;
2993 goto do_class;
2994 case NLOWER:
2995 case NLOWER + ADD_NL:
2996 mask = RI_LOWER;
2997 goto do_class;
2998 case UPPER:
2999 case UPPER + ADD_NL:
3000 testval = mask = RI_UPPER;
3001 goto do_class;
3002 case NUPPER:
3003 case NUPPER + ADD_NL:
3004 mask = RI_UPPER;
3005 goto do_class;
3006
3007 case EXACTLY:
3008 {
3009 int cu, cl;
3010
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003011 // This doesn't do a multi-byte character, because a MULTIBYTECODE
3012 // would have been used for it. It does handle single-byte
3013 // characters, such as latin1.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003014 if (rex.reg_ic)
3015 {
3016 cu = MB_TOUPPER(*opnd);
3017 cl = MB_TOLOWER(*opnd);
3018 while (count < maxcount && (*scan == cu || *scan == cl))
3019 {
3020 count++;
3021 scan++;
3022 }
3023 }
3024 else
3025 {
3026 cu = *opnd;
3027 while (count < maxcount && *scan == cu)
3028 {
3029 count++;
3030 scan++;
3031 }
3032 }
3033 break;
3034 }
3035
3036 case MULTIBYTECODE:
3037 {
3038 int i, len, cf = 0;
3039
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003040 // Safety check (just in case 'encoding' was changed since
3041 // compiling the program).
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003042 if ((len = (*mb_ptr2len)(opnd)) > 1)
3043 {
3044 if (rex.reg_ic && enc_utf8)
3045 cf = utf_fold(utf_ptr2char(opnd));
3046 while (count < maxcount && (*mb_ptr2len)(scan) >= len)
3047 {
3048 for (i = 0; i < len; ++i)
3049 if (opnd[i] != scan[i])
3050 break;
3051 if (i < len && (!rex.reg_ic || !enc_utf8
3052 || utf_fold(utf_ptr2char(scan)) != cf))
3053 break;
3054 scan += len;
3055 ++count;
3056 }
3057 }
3058 }
3059 break;
3060
3061 case ANYOF:
3062 case ANYOF + ADD_NL:
3063 testval = TRUE;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003064 // FALLTHROUGH
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003065
3066 case ANYBUT:
3067 case ANYBUT + ADD_NL:
3068 while (count < maxcount)
3069 {
3070 int len;
3071
3072 if (*scan == NUL)
3073 {
3074 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
3075 || rex.reg_line_lbr)
3076 break;
3077 reg_nextline();
3078 scan = rex.input;
3079 if (got_int)
3080 break;
3081 }
3082 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
3083 ++scan;
3084 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
3085 {
3086 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
3087 break;
3088 scan += len;
3089 }
3090 else
3091 {
3092 if ((cstrchr(opnd, *scan) == NULL) == testval)
3093 break;
3094 ++scan;
3095 }
3096 ++count;
3097 }
3098 break;
3099
3100 case NEWL:
3101 while (count < maxcount
3102 && ((*scan == NUL && rex.lnum <= rex.reg_maxline
3103 && !rex.reg_line_lbr && REG_MULTI)
3104 || (*scan == '\n' && rex.reg_line_lbr)))
3105 {
3106 count++;
3107 if (rex.reg_line_lbr)
3108 ADVANCE_REGINPUT();
3109 else
3110 reg_nextline();
3111 scan = rex.input;
3112 if (got_int)
3113 break;
3114 }
3115 break;
3116
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003117 default: // Oh dear. Called inappropriately.
Bram Moolenaare83cca22020-09-07 18:53:21 +02003118 iemsg(_(e_re_corr));
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003119#ifdef DEBUG
3120 printf("Called regrepeat with op code %d\n", OP(p));
3121#endif
3122 break;
3123 }
3124
3125 rex.input = scan;
3126
3127 return (int)count;
3128}
3129
3130/*
3131 * Push an item onto the regstack.
3132 * Returns pointer to new item. Returns NULL when out of memory.
3133 */
3134 static regitem_T *
3135regstack_push(regstate_T state, char_u *scan)
3136{
3137 regitem_T *rp;
3138
3139 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
3140 {
3141 emsg(_(e_maxmempat));
3142 return NULL;
3143 }
3144 if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
3145 return NULL;
3146
3147 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
3148 rp->rs_state = state;
3149 rp->rs_scan = scan;
3150
3151 regstack.ga_len += sizeof(regitem_T);
3152 return rp;
3153}
3154
3155/*
3156 * Pop an item from the regstack.
3157 */
3158 static void
3159regstack_pop(char_u **scan)
3160{
3161 regitem_T *rp;
3162
3163 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
3164 *scan = rp->rs_scan;
3165
3166 regstack.ga_len -= sizeof(regitem_T);
3167}
3168
3169/*
3170 * Save the current subexpr to "bp", so that they can be restored
3171 * later by restore_subexpr().
3172 */
3173 static void
3174save_subexpr(regbehind_T *bp)
3175{
3176 int i;
3177
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003178 // When "rex.need_clear_subexpr" is set we don't need to save the values,
3179 // only remember that this flag needs to be set again when restoring.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003180 bp->save_need_clear_subexpr = rex.need_clear_subexpr;
3181 if (!rex.need_clear_subexpr)
3182 {
3183 for (i = 0; i < NSUBEXP; ++i)
3184 {
3185 if (REG_MULTI)
3186 {
3187 bp->save_start[i].se_u.pos = rex.reg_startpos[i];
3188 bp->save_end[i].se_u.pos = rex.reg_endpos[i];
3189 }
3190 else
3191 {
3192 bp->save_start[i].se_u.ptr = rex.reg_startp[i];
3193 bp->save_end[i].se_u.ptr = rex.reg_endp[i];
3194 }
3195 }
3196 }
3197}
3198
3199/*
3200 * Restore the subexpr from "bp".
3201 */
3202 static void
3203restore_subexpr(regbehind_T *bp)
3204{
3205 int i;
3206
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003207 // Only need to restore saved values when they are not to be cleared.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003208 rex.need_clear_subexpr = bp->save_need_clear_subexpr;
3209 if (!rex.need_clear_subexpr)
3210 {
3211 for (i = 0; i < NSUBEXP; ++i)
3212 {
3213 if (REG_MULTI)
3214 {
3215 rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
3216 rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
3217 }
3218 else
3219 {
3220 rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
3221 rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
3222 }
3223 }
3224 }
3225}
3226
3227/*
3228 * regmatch - main matching routine
3229 *
3230 * Conceptually the strategy is simple: Check to see whether the current node
3231 * matches, push an item onto the regstack and loop to see whether the rest
3232 * matches, and then act accordingly. In practice we make some effort to
3233 * avoid using the regstack, in particular by going through "ordinary" nodes
3234 * (that don't need to know whether the rest of the match failed) by a nested
3235 * loop.
3236 *
3237 * Returns TRUE when there is a match. Leaves rex.input and rex.lnum just after
3238 * the last matched character.
3239 * Returns FALSE when there is no match. Leaves rex.input and rex.lnum in an
3240 * undefined state!
3241 */
3242 static int
3243regmatch(
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003244 char_u *scan, // Current node.
3245 proftime_T *tm UNUSED, // timeout limit or NULL
3246 int *timed_out UNUSED) // flag set on timeout or NULL
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003247{
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003248 char_u *next; // Next node.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003249 int op;
3250 int c;
3251 regitem_T *rp;
3252 int no;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003253 int status; // one of the RA_ values:
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003254#ifdef FEAT_RELTIME
3255 int tm_count = 0;
3256#endif
3257
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003258 // Make "regstack" and "backpos" empty. They are allocated and freed in
3259 // bt_regexec_both() to reduce malloc()/free() calls.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003260 regstack.ga_len = 0;
3261 backpos.ga_len = 0;
3262
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003263 // Repeat until "regstack" is empty.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003264 for (;;)
3265 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003266 // Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
3267 // Allow interrupting them with CTRL-C.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003268 fast_breakcheck();
3269
3270#ifdef DEBUG
3271 if (scan != NULL && regnarrate)
3272 {
3273 mch_errmsg((char *)regprop(scan));
3274 mch_errmsg("(\n");
3275 }
3276#endif
3277
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003278 // Repeat for items that can be matched sequentially, without using the
3279 // regstack.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003280 for (;;)
3281 {
3282 if (got_int || scan == NULL)
3283 {
3284 status = RA_FAIL;
3285 break;
3286 }
3287#ifdef FEAT_RELTIME
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003288 // Check for timeout once in a 100 times to avoid overhead.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003289 if (tm != NULL && ++tm_count == 100)
3290 {
3291 tm_count = 0;
3292 if (profile_passed_limit(tm))
3293 {
3294 if (timed_out != NULL)
3295 *timed_out = TRUE;
3296 status = RA_FAIL;
3297 break;
3298 }
3299 }
3300#endif
3301 status = RA_CONT;
3302
3303#ifdef DEBUG
3304 if (regnarrate)
3305 {
3306 mch_errmsg((char *)regprop(scan));
3307 mch_errmsg("...\n");
3308# ifdef FEAT_SYN_HL
3309 if (re_extmatch_in != NULL)
3310 {
3311 int i;
3312
3313 mch_errmsg(_("External submatches:\n"));
3314 for (i = 0; i < NSUBEXP; i++)
3315 {
3316 mch_errmsg(" \"");
3317 if (re_extmatch_in->matches[i] != NULL)
3318 mch_errmsg((char *)re_extmatch_in->matches[i]);
3319 mch_errmsg("\"\n");
3320 }
3321 }
3322# endif
3323 }
3324#endif
3325 next = regnext(scan);
3326
3327 op = OP(scan);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003328 // Check for character class with NL added.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003329 if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
3330 && *rex.input == NUL && rex.lnum <= rex.reg_maxline)
3331 {
3332 reg_nextline();
3333 }
3334 else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n')
3335 {
3336 ADVANCE_REGINPUT();
3337 }
3338 else
3339 {
3340 if (WITH_NL(op))
3341 op -= ADD_NL;
3342 if (has_mbyte)
3343 c = (*mb_ptr2char)(rex.input);
3344 else
3345 c = *rex.input;
3346 switch (op)
3347 {
3348 case BOL:
3349 if (rex.input != rex.line)
3350 status = RA_NOMATCH;
3351 break;
3352
3353 case EOL:
3354 if (c != NUL)
3355 status = RA_NOMATCH;
3356 break;
3357
3358 case RE_BOF:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003359 // We're not at the beginning of the file when below the first
3360 // line where we started, not at the start of the line or we
3361 // didn't start at the first line of the buffer.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003362 if (rex.lnum != 0 || rex.input != rex.line
3363 || (REG_MULTI && rex.reg_firstlnum > 1))
3364 status = RA_NOMATCH;
3365 break;
3366
3367 case RE_EOF:
3368 if (rex.lnum != rex.reg_maxline || c != NUL)
3369 status = RA_NOMATCH;
3370 break;
3371
3372 case CURSOR:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003373 // Check if the buffer is in a window and compare the
3374 // rex.reg_win->w_cursor position to the match position.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003375 if (rex.reg_win == NULL
3376 || (rex.lnum + rex.reg_firstlnum
3377 != rex.reg_win->w_cursor.lnum)
3378 || ((colnr_T)(rex.input - rex.line)
3379 != rex.reg_win->w_cursor.col))
3380 status = RA_NOMATCH;
3381 break;
3382
3383 case RE_MARK:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003384 // Compare the mark position to the match position.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003385 {
3386 int mark = OPERAND(scan)[0];
3387 int cmp = OPERAND(scan)[1];
3388 pos_T *pos;
3389
3390 pos = getmark_buf(rex.reg_buf, mark, FALSE);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003391 if (pos == NULL // mark doesn't exist
Bram Moolenaar872bee52021-05-24 22:56:15 +02003392 || pos->lnum <= 0) // mark isn't set in reg_buf
3393 {
3394 status = RA_NOMATCH;
3395 }
3396 else
3397 {
3398 colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum
3399 && pos->col == MAXCOL
3400 ? (colnr_T)STRLEN(reg_getline(
3401 pos->lnum - rex.reg_firstlnum))
3402 : pos->col;
3403
3404 if ((pos->lnum == rex.lnum + rex.reg_firstlnum
3405 ? (pos_col == (colnr_T)(rex.input - rex.line)
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003406 ? (cmp == '<' || cmp == '>')
Bram Moolenaar872bee52021-05-24 22:56:15 +02003407 : (pos_col < (colnr_T)(rex.input - rex.line)
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003408 ? cmp != '>'
3409 : cmp != '<'))
3410 : (pos->lnum < rex.lnum + rex.reg_firstlnum
3411 ? cmp != '>'
3412 : cmp != '<')))
3413 status = RA_NOMATCH;
Bram Moolenaar872bee52021-05-24 22:56:15 +02003414 }
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003415 }
3416 break;
3417
3418 case RE_VISUAL:
3419 if (!reg_match_visual())
3420 status = RA_NOMATCH;
3421 break;
3422
3423 case RE_LNUM:
3424 if (!REG_MULTI || !re_num_cmp((long_u)(rex.lnum + rex.reg_firstlnum),
3425 scan))
3426 status = RA_NOMATCH;
3427 break;
3428
3429 case RE_COL:
3430 if (!re_num_cmp((long_u)(rex.input - rex.line) + 1, scan))
3431 status = RA_NOMATCH;
3432 break;
3433
3434 case RE_VCOL:
3435 if (!re_num_cmp((long_u)win_linetabsize(
3436 rex.reg_win == NULL ? curwin : rex.reg_win,
3437 rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan))
3438 status = RA_NOMATCH;
3439 break;
3440
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003441 case BOW: // \<word; rex.input points to w
3442 if (c == NUL) // Can't match at end of line
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003443 status = RA_NOMATCH;
3444 else if (has_mbyte)
3445 {
3446 int this_class;
3447
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003448 // Get class of current and previous char (if it exists).
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003449 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
3450 if (this_class <= 1)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003451 status = RA_NOMATCH; // not on a word at all
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003452 else if (reg_prev_class() == this_class)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003453 status = RA_NOMATCH; // previous char is in same word
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003454 }
3455 else
3456 {
3457 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line
3458 && vim_iswordc_buf(rex.input[-1], rex.reg_buf)))
3459 status = RA_NOMATCH;
3460 }
3461 break;
3462
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003463 case EOW: // word\>; rex.input points after d
3464 if (rex.input == rex.line) // Can't match at start of line
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003465 status = RA_NOMATCH;
3466 else if (has_mbyte)
3467 {
3468 int this_class, prev_class;
3469
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003470 // Get class of current and previous char (if it exists).
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003471 this_class = mb_get_class_buf(rex.input, rex.reg_buf);
3472 prev_class = reg_prev_class();
3473 if (this_class == prev_class
3474 || prev_class == 0 || prev_class == 1)
3475 status = RA_NOMATCH;
3476 }
3477 else
3478 {
3479 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf)
3480 || (rex.input[0] != NUL
3481 && vim_iswordc_buf(c, rex.reg_buf)))
3482 status = RA_NOMATCH;
3483 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003484 break; // Matched with EOW
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003485
3486 case ANY:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003487 // ANY does not match new lines.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003488 if (c == NUL)
3489 status = RA_NOMATCH;
3490 else
3491 ADVANCE_REGINPUT();
3492 break;
3493
3494 case IDENT:
3495 if (!vim_isIDc(c))
3496 status = RA_NOMATCH;
3497 else
3498 ADVANCE_REGINPUT();
3499 break;
3500
3501 case SIDENT:
3502 if (VIM_ISDIGIT(*rex.input) || !vim_isIDc(c))
3503 status = RA_NOMATCH;
3504 else
3505 ADVANCE_REGINPUT();
3506 break;
3507
3508 case KWORD:
3509 if (!vim_iswordp_buf(rex.input, rex.reg_buf))
3510 status = RA_NOMATCH;
3511 else
3512 ADVANCE_REGINPUT();
3513 break;
3514
3515 case SKWORD:
3516 if (VIM_ISDIGIT(*rex.input)
3517 || !vim_iswordp_buf(rex.input, rex.reg_buf))
3518 status = RA_NOMATCH;
3519 else
3520 ADVANCE_REGINPUT();
3521 break;
3522
3523 case FNAME:
3524 if (!vim_isfilec(c))
3525 status = RA_NOMATCH;
3526 else
3527 ADVANCE_REGINPUT();
3528 break;
3529
3530 case SFNAME:
3531 if (VIM_ISDIGIT(*rex.input) || !vim_isfilec(c))
3532 status = RA_NOMATCH;
3533 else
3534 ADVANCE_REGINPUT();
3535 break;
3536
3537 case PRINT:
3538 if (!vim_isprintc(PTR2CHAR(rex.input)))
3539 status = RA_NOMATCH;
3540 else
3541 ADVANCE_REGINPUT();
3542 break;
3543
3544 case SPRINT:
3545 if (VIM_ISDIGIT(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input)))
3546 status = RA_NOMATCH;
3547 else
3548 ADVANCE_REGINPUT();
3549 break;
3550
3551 case WHITE:
3552 if (!VIM_ISWHITE(c))
3553 status = RA_NOMATCH;
3554 else
3555 ADVANCE_REGINPUT();
3556 break;
3557
3558 case NWHITE:
3559 if (c == NUL || VIM_ISWHITE(c))
3560 status = RA_NOMATCH;
3561 else
3562 ADVANCE_REGINPUT();
3563 break;
3564
3565 case DIGIT:
3566 if (!ri_digit(c))
3567 status = RA_NOMATCH;
3568 else
3569 ADVANCE_REGINPUT();
3570 break;
3571
3572 case NDIGIT:
3573 if (c == NUL || ri_digit(c))
3574 status = RA_NOMATCH;
3575 else
3576 ADVANCE_REGINPUT();
3577 break;
3578
3579 case HEX:
3580 if (!ri_hex(c))
3581 status = RA_NOMATCH;
3582 else
3583 ADVANCE_REGINPUT();
3584 break;
3585
3586 case NHEX:
3587 if (c == NUL || ri_hex(c))
3588 status = RA_NOMATCH;
3589 else
3590 ADVANCE_REGINPUT();
3591 break;
3592
3593 case OCTAL:
3594 if (!ri_octal(c))
3595 status = RA_NOMATCH;
3596 else
3597 ADVANCE_REGINPUT();
3598 break;
3599
3600 case NOCTAL:
3601 if (c == NUL || ri_octal(c))
3602 status = RA_NOMATCH;
3603 else
3604 ADVANCE_REGINPUT();
3605 break;
3606
3607 case WORD:
3608 if (!ri_word(c))
3609 status = RA_NOMATCH;
3610 else
3611 ADVANCE_REGINPUT();
3612 break;
3613
3614 case NWORD:
3615 if (c == NUL || ri_word(c))
3616 status = RA_NOMATCH;
3617 else
3618 ADVANCE_REGINPUT();
3619 break;
3620
3621 case HEAD:
3622 if (!ri_head(c))
3623 status = RA_NOMATCH;
3624 else
3625 ADVANCE_REGINPUT();
3626 break;
3627
3628 case NHEAD:
3629 if (c == NUL || ri_head(c))
3630 status = RA_NOMATCH;
3631 else
3632 ADVANCE_REGINPUT();
3633 break;
3634
3635 case ALPHA:
3636 if (!ri_alpha(c))
3637 status = RA_NOMATCH;
3638 else
3639 ADVANCE_REGINPUT();
3640 break;
3641
3642 case NALPHA:
3643 if (c == NUL || ri_alpha(c))
3644 status = RA_NOMATCH;
3645 else
3646 ADVANCE_REGINPUT();
3647 break;
3648
3649 case LOWER:
3650 if (!ri_lower(c))
3651 status = RA_NOMATCH;
3652 else
3653 ADVANCE_REGINPUT();
3654 break;
3655
3656 case NLOWER:
3657 if (c == NUL || ri_lower(c))
3658 status = RA_NOMATCH;
3659 else
3660 ADVANCE_REGINPUT();
3661 break;
3662
3663 case UPPER:
3664 if (!ri_upper(c))
3665 status = RA_NOMATCH;
3666 else
3667 ADVANCE_REGINPUT();
3668 break;
3669
3670 case NUPPER:
3671 if (c == NUL || ri_upper(c))
3672 status = RA_NOMATCH;
3673 else
3674 ADVANCE_REGINPUT();
3675 break;
3676
3677 case EXACTLY:
3678 {
3679 int len;
3680 char_u *opnd;
3681
3682 opnd = OPERAND(scan);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003683 // Inline the first byte, for speed.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003684 if (*opnd != *rex.input
3685 && (!rex.reg_ic
3686 || (!enc_utf8
3687 && MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
3688 status = RA_NOMATCH;
3689 else if (*opnd == NUL)
3690 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003691 // match empty string always works; happens when "~" is
3692 // empty.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003693 }
3694 else
3695 {
3696 if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic))
3697 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003698 len = 1; // matched a single byte above
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003699 }
3700 else
3701 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003702 // Need to match first byte again for multi-byte.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003703 len = (int)STRLEN(opnd);
3704 if (cstrncmp(opnd, rex.input, &len) != 0)
3705 status = RA_NOMATCH;
3706 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003707 // Check for following composing character, unless %C
3708 // follows (skips over all composing chars).
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003709 if (status != RA_NOMATCH
3710 && enc_utf8
3711 && UTF_COMPOSINGLIKE(rex.input, rex.input + len)
3712 && !rex.reg_icombine
3713 && OP(next) != RE_COMPOSING)
3714 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003715 // raaron: This code makes a composing character get
3716 // ignored, which is the correct behavior (sometimes)
3717 // for voweled Hebrew texts.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003718 status = RA_NOMATCH;
3719 }
3720 if (status != RA_NOMATCH)
3721 rex.input += len;
3722 }
3723 }
3724 break;
3725
3726 case ANYOF:
3727 case ANYBUT:
3728 if (c == NUL)
3729 status = RA_NOMATCH;
3730 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
3731 status = RA_NOMATCH;
3732 else
3733 ADVANCE_REGINPUT();
3734 break;
3735
3736 case MULTIBYTECODE:
3737 if (has_mbyte)
3738 {
3739 int i, len;
3740 char_u *opnd;
3741 int opndc = 0, inpc;
3742
3743 opnd = OPERAND(scan);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003744 // Safety check (just in case 'encoding' was changed since
3745 // compiling the program).
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003746 if ((len = (*mb_ptr2len)(opnd)) < 2)
3747 {
3748 status = RA_NOMATCH;
3749 break;
3750 }
3751 if (enc_utf8)
3752 opndc = utf_ptr2char(opnd);
3753 if (enc_utf8 && utf_iscomposing(opndc))
3754 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003755 // When only a composing char is given match at any
3756 // position where that composing char appears.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003757 status = RA_NOMATCH;
3758 for (i = 0; rex.input[i] != NUL;
3759 i += utf_ptr2len(rex.input + i))
3760 {
3761 inpc = utf_ptr2char(rex.input + i);
3762 if (!utf_iscomposing(inpc))
3763 {
3764 if (i > 0)
3765 break;
3766 }
3767 else if (opndc == inpc)
3768 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003769 // Include all following composing chars.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003770 len = i + utfc_ptr2len(rex.input + i);
3771 status = RA_MATCH;
3772 break;
3773 }
3774 }
3775 }
3776 else
3777 for (i = 0; i < len; ++i)
3778 if (opnd[i] != rex.input[i])
3779 {
3780 status = RA_NOMATCH;
3781 break;
3782 }
3783 rex.input += len;
3784 }
3785 else
3786 status = RA_NOMATCH;
3787 break;
3788 case RE_COMPOSING:
3789 if (enc_utf8)
3790 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003791 // Skip composing characters.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003792 while (utf_iscomposing(utf_ptr2char(rex.input)))
3793 MB_CPTR_ADV(rex.input);
3794 }
3795 break;
3796
3797 case NOTHING:
3798 break;
3799
3800 case BACK:
3801 {
3802 int i;
3803 backpos_T *bp;
3804
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003805 // When we run into BACK we need to check if we don't keep
3806 // looping without matching any input. The second and later
3807 // times a BACK is encountered it fails if the input is still
3808 // at the same position as the previous time.
3809 // The positions are stored in "backpos" and found by the
3810 // current value of "scan", the position in the RE program.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003811 bp = (backpos_T *)backpos.ga_data;
3812 for (i = 0; i < backpos.ga_len; ++i)
3813 if (bp[i].bp_scan == scan)
3814 break;
3815 if (i == backpos.ga_len)
3816 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003817 // First time at this BACK, make room to store the pos.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003818 if (ga_grow(&backpos, 1) == FAIL)
3819 status = RA_FAIL;
3820 else
3821 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003822 // get "ga_data" again, it may have changed
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003823 bp = (backpos_T *)backpos.ga_data;
3824 bp[i].bp_scan = scan;
3825 ++backpos.ga_len;
3826 }
3827 }
3828 else if (reg_save_equal(&bp[i].bp_pos))
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003829 // Still at same position as last time, fail.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003830 status = RA_NOMATCH;
3831
3832 if (status != RA_FAIL && status != RA_NOMATCH)
3833 reg_save(&bp[i].bp_pos, &backpos);
3834 }
3835 break;
3836
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003837 case MOPEN + 0: // Match start: \zs
3838 case MOPEN + 1: // \(
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003839 case MOPEN + 2:
3840 case MOPEN + 3:
3841 case MOPEN + 4:
3842 case MOPEN + 5:
3843 case MOPEN + 6:
3844 case MOPEN + 7:
3845 case MOPEN + 8:
3846 case MOPEN + 9:
3847 {
3848 no = op - MOPEN;
3849 cleanup_subexpr();
3850 rp = regstack_push(RS_MOPEN, scan);
3851 if (rp == NULL)
3852 status = RA_FAIL;
3853 else
3854 {
3855 rp->rs_no = no;
3856 save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
3857 &rex.reg_startp[no]);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003858 // We simply continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003859 }
3860 }
3861 break;
3862
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003863 case NOPEN: // \%(
3864 case NCLOSE: // \) after \%(
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003865 if (regstack_push(RS_NOPEN, scan) == NULL)
3866 status = RA_FAIL;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003867 // We simply continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003868 break;
3869
3870#ifdef FEAT_SYN_HL
3871 case ZOPEN + 1:
3872 case ZOPEN + 2:
3873 case ZOPEN + 3:
3874 case ZOPEN + 4:
3875 case ZOPEN + 5:
3876 case ZOPEN + 6:
3877 case ZOPEN + 7:
3878 case ZOPEN + 8:
3879 case ZOPEN + 9:
3880 {
3881 no = op - ZOPEN;
3882 cleanup_zsubexpr();
3883 rp = regstack_push(RS_ZOPEN, scan);
3884 if (rp == NULL)
3885 status = RA_FAIL;
3886 else
3887 {
3888 rp->rs_no = no;
3889 save_se(&rp->rs_un.sesave, &reg_startzpos[no],
3890 &reg_startzp[no]);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003891 // We simply continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003892 }
3893 }
3894 break;
3895#endif
3896
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003897 case MCLOSE + 0: // Match end: \ze
3898 case MCLOSE + 1: // \)
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003899 case MCLOSE + 2:
3900 case MCLOSE + 3:
3901 case MCLOSE + 4:
3902 case MCLOSE + 5:
3903 case MCLOSE + 6:
3904 case MCLOSE + 7:
3905 case MCLOSE + 8:
3906 case MCLOSE + 9:
3907 {
3908 no = op - MCLOSE;
3909 cleanup_subexpr();
3910 rp = regstack_push(RS_MCLOSE, scan);
3911 if (rp == NULL)
3912 status = RA_FAIL;
3913 else
3914 {
3915 rp->rs_no = no;
3916 save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
3917 &rex.reg_endp[no]);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003918 // We simply continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003919 }
3920 }
3921 break;
3922
3923#ifdef FEAT_SYN_HL
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003924 case ZCLOSE + 1: // \) after \z(
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003925 case ZCLOSE + 2:
3926 case ZCLOSE + 3:
3927 case ZCLOSE + 4:
3928 case ZCLOSE + 5:
3929 case ZCLOSE + 6:
3930 case ZCLOSE + 7:
3931 case ZCLOSE + 8:
3932 case ZCLOSE + 9:
3933 {
3934 no = op - ZCLOSE;
3935 cleanup_zsubexpr();
3936 rp = regstack_push(RS_ZCLOSE, scan);
3937 if (rp == NULL)
3938 status = RA_FAIL;
3939 else
3940 {
3941 rp->rs_no = no;
3942 save_se(&rp->rs_un.sesave, &reg_endzpos[no],
3943 &reg_endzp[no]);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003944 // We simply continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003945 }
3946 }
3947 break;
3948#endif
3949
3950 case BACKREF + 1:
3951 case BACKREF + 2:
3952 case BACKREF + 3:
3953 case BACKREF + 4:
3954 case BACKREF + 5:
3955 case BACKREF + 6:
3956 case BACKREF + 7:
3957 case BACKREF + 8:
3958 case BACKREF + 9:
3959 {
3960 int len;
3961
3962 no = op - BACKREF;
3963 cleanup_subexpr();
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003964 if (!REG_MULTI) // Single-line regexp
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003965 {
3966 if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
3967 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003968 // Backref was not set: Match an empty string.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003969 len = 0;
3970 }
3971 else
3972 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003973 // Compare current input with back-ref in the same
3974 // line.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003975 len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
3976 if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0)
3977 status = RA_NOMATCH;
3978 }
3979 }
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003980 else // Multi-line regexp
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003981 {
3982 if (rex.reg_startpos[no].lnum < 0
3983 || rex.reg_endpos[no].lnum < 0)
3984 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003985 // Backref was not set: Match an empty string.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003986 len = 0;
3987 }
3988 else
3989 {
3990 if (rex.reg_startpos[no].lnum == rex.lnum
3991 && rex.reg_endpos[no].lnum == rex.lnum)
3992 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02003993 // Compare back-ref within the current line.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02003994 len = rex.reg_endpos[no].col
3995 - rex.reg_startpos[no].col;
3996 if (cstrncmp(rex.line + rex.reg_startpos[no].col,
3997 rex.input, &len) != 0)
3998 status = RA_NOMATCH;
3999 }
4000 else
4001 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004002 // Messy situation: Need to compare between two
4003 // lines.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004004 int r = match_with_backref(
4005 rex.reg_startpos[no].lnum,
4006 rex.reg_startpos[no].col,
4007 rex.reg_endpos[no].lnum,
4008 rex.reg_endpos[no].col,
4009 &len);
4010
4011 if (r != RA_MATCH)
4012 status = r;
4013 }
4014 }
4015 }
4016
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004017 // Matched the backref, skip over it.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004018 rex.input += len;
4019 }
4020 break;
4021
4022#ifdef FEAT_SYN_HL
4023 case ZREF + 1:
4024 case ZREF + 2:
4025 case ZREF + 3:
4026 case ZREF + 4:
4027 case ZREF + 5:
4028 case ZREF + 6:
4029 case ZREF + 7:
4030 case ZREF + 8:
4031 case ZREF + 9:
4032 {
4033 int len;
4034
4035 cleanup_zsubexpr();
4036 no = op - ZREF;
4037 if (re_extmatch_in != NULL
4038 && re_extmatch_in->matches[no] != NULL)
4039 {
4040 len = (int)STRLEN(re_extmatch_in->matches[no]);
4041 if (cstrncmp(re_extmatch_in->matches[no],
4042 rex.input, &len) != 0)
4043 status = RA_NOMATCH;
4044 else
4045 rex.input += len;
4046 }
4047 else
4048 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004049 // Backref was not set: Match an empty string.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004050 }
4051 }
4052 break;
4053#endif
4054
4055 case BRANCH:
4056 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004057 if (OP(next) != BRANCH) // No choice.
4058 next = OPERAND(scan); // Avoid recursion.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004059 else
4060 {
4061 rp = regstack_push(RS_BRANCH, scan);
4062 if (rp == NULL)
4063 status = RA_FAIL;
4064 else
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004065 status = RA_BREAK; // rest is below
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004066 }
4067 }
4068 break;
4069
4070 case BRACE_LIMITS:
4071 {
4072 if (OP(next) == BRACE_SIMPLE)
4073 {
4074 bl_minval = OPERAND_MIN(scan);
4075 bl_maxval = OPERAND_MAX(scan);
4076 }
4077 else if (OP(next) >= BRACE_COMPLEX
4078 && OP(next) < BRACE_COMPLEX + 10)
4079 {
4080 no = OP(next) - BRACE_COMPLEX;
4081 brace_min[no] = OPERAND_MIN(scan);
4082 brace_max[no] = OPERAND_MAX(scan);
4083 brace_count[no] = 0;
4084 }
4085 else
4086 {
4087 internal_error("BRACE_LIMITS");
4088 status = RA_FAIL;
4089 }
4090 }
4091 break;
4092
4093 case BRACE_COMPLEX + 0:
4094 case BRACE_COMPLEX + 1:
4095 case BRACE_COMPLEX + 2:
4096 case BRACE_COMPLEX + 3:
4097 case BRACE_COMPLEX + 4:
4098 case BRACE_COMPLEX + 5:
4099 case BRACE_COMPLEX + 6:
4100 case BRACE_COMPLEX + 7:
4101 case BRACE_COMPLEX + 8:
4102 case BRACE_COMPLEX + 9:
4103 {
4104 no = op - BRACE_COMPLEX;
4105 ++brace_count[no];
4106
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004107 // If not matched enough times yet, try one more
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004108 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4109 ? brace_min[no] : brace_max[no]))
4110 {
4111 rp = regstack_push(RS_BRCPLX_MORE, scan);
4112 if (rp == NULL)
4113 status = RA_FAIL;
4114 else
4115 {
4116 rp->rs_no = no;
4117 reg_save(&rp->rs_un.regsave, &backpos);
4118 next = OPERAND(scan);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004119 // We continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004120 }
4121 break;
4122 }
4123
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004124 // If matched enough times, may try matching some more
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004125 if (brace_min[no] <= brace_max[no])
4126 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004127 // Range is the normal way around, use longest match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004128 if (brace_count[no] <= brace_max[no])
4129 {
4130 rp = regstack_push(RS_BRCPLX_LONG, scan);
4131 if (rp == NULL)
4132 status = RA_FAIL;
4133 else
4134 {
4135 rp->rs_no = no;
4136 reg_save(&rp->rs_un.regsave, &backpos);
4137 next = OPERAND(scan);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004138 // We continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004139 }
4140 }
4141 }
4142 else
4143 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004144 // Range is backwards, use shortest match first
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004145 if (brace_count[no] <= brace_min[no])
4146 {
4147 rp = regstack_push(RS_BRCPLX_SHORT, scan);
4148 if (rp == NULL)
4149 status = RA_FAIL;
4150 else
4151 {
4152 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004153 // We continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004154 }
4155 }
4156 }
4157 }
4158 break;
4159
4160 case BRACE_SIMPLE:
4161 case STAR:
4162 case PLUS:
4163 {
4164 regstar_T rst;
4165
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004166 // Lookahead to avoid useless match attempts when we know
4167 // what character comes next.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004168 if (OP(next) == EXACTLY)
4169 {
4170 rst.nextb = *OPERAND(next);
4171 if (rex.reg_ic)
4172 {
4173 if (MB_ISUPPER(rst.nextb))
4174 rst.nextb_ic = MB_TOLOWER(rst.nextb);
4175 else
4176 rst.nextb_ic = MB_TOUPPER(rst.nextb);
4177 }
4178 else
4179 rst.nextb_ic = rst.nextb;
4180 }
4181 else
4182 {
4183 rst.nextb = NUL;
4184 rst.nextb_ic = NUL;
4185 }
4186 if (op != BRACE_SIMPLE)
4187 {
4188 rst.minval = (op == STAR) ? 0 : 1;
4189 rst.maxval = MAX_LIMIT;
4190 }
4191 else
4192 {
4193 rst.minval = bl_minval;
4194 rst.maxval = bl_maxval;
4195 }
4196
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004197 // When maxval > minval, try matching as much as possible, up
4198 // to maxval. When maxval < minval, try matching at least the
4199 // minimal number (since the range is backwards, that's also
4200 // maxval!).
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004201 rst.count = regrepeat(OPERAND(scan), rst.maxval);
4202 if (got_int)
4203 {
4204 status = RA_FAIL;
4205 break;
4206 }
4207 if (rst.minval <= rst.maxval
4208 ? rst.count >= rst.minval : rst.count >= rst.maxval)
4209 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004210 // It could match. Prepare for trying to match what
4211 // follows. The code is below. Parameters are stored in
4212 // a regstar_T on the regstack.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004213 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4214 {
4215 emsg(_(e_maxmempat));
4216 status = RA_FAIL;
4217 }
4218 else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4219 status = RA_FAIL;
4220 else
4221 {
4222 regstack.ga_len += sizeof(regstar_T);
4223 rp = regstack_push(rst.minval <= rst.maxval
4224 ? RS_STAR_LONG : RS_STAR_SHORT, scan);
4225 if (rp == NULL)
4226 status = RA_FAIL;
4227 else
4228 {
4229 *(((regstar_T *)rp) - 1) = rst;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004230 status = RA_BREAK; // skip the restore bits
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004231 }
4232 }
4233 }
4234 else
4235 status = RA_NOMATCH;
4236
4237 }
4238 break;
4239
4240 case NOMATCH:
4241 case MATCH:
4242 case SUBPAT:
4243 rp = regstack_push(RS_NOMATCH, scan);
4244 if (rp == NULL)
4245 status = RA_FAIL;
4246 else
4247 {
4248 rp->rs_no = op;
4249 reg_save(&rp->rs_un.regsave, &backpos);
4250 next = OPERAND(scan);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004251 // We continue and handle the result when done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004252 }
4253 break;
4254
4255 case BEHIND:
4256 case NOBEHIND:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004257 // Need a bit of room to store extra positions.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004258 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4259 {
4260 emsg(_(e_maxmempat));
4261 status = RA_FAIL;
4262 }
4263 else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4264 status = RA_FAIL;
4265 else
4266 {
4267 regstack.ga_len += sizeof(regbehind_T);
4268 rp = regstack_push(RS_BEHIND1, scan);
4269 if (rp == NULL)
4270 status = RA_FAIL;
4271 else
4272 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004273 // Need to save the subexpr to be able to restore them
4274 // when there is a match but we don't use it.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004275 save_subexpr(((regbehind_T *)rp) - 1);
4276
4277 rp->rs_no = op;
4278 reg_save(&rp->rs_un.regsave, &backpos);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004279 // First try if what follows matches. If it does then we
4280 // check the behind match by looping.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004281 }
4282 }
4283 break;
4284
4285 case BHPOS:
4286 if (REG_MULTI)
4287 {
4288 if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line)
4289 || behind_pos.rs_u.pos.lnum != rex.lnum)
4290 status = RA_NOMATCH;
4291 }
4292 else if (behind_pos.rs_u.ptr != rex.input)
4293 status = RA_NOMATCH;
4294 break;
4295
4296 case NEWL:
4297 if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline
4298 || rex.reg_line_lbr)
4299 && (c != '\n' || !rex.reg_line_lbr))
4300 status = RA_NOMATCH;
4301 else if (rex.reg_line_lbr)
4302 ADVANCE_REGINPUT();
4303 else
4304 reg_nextline();
4305 break;
4306
4307 case END:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004308 status = RA_MATCH; // Success!
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004309 break;
4310
4311 default:
Bram Moolenaare83cca22020-09-07 18:53:21 +02004312 iemsg(_(e_re_corr));
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004313#ifdef DEBUG
4314 printf("Illegal op code %d\n", op);
4315#endif
4316 status = RA_FAIL;
4317 break;
4318 }
4319 }
4320
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004321 // If we can't continue sequentially, break the inner loop.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004322 if (status != RA_CONT)
4323 break;
4324
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004325 // Continue in inner loop, advance to next item.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004326 scan = next;
4327
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004328 } // end of inner loop
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004329
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004330 // If there is something on the regstack execute the code for the state.
4331 // If the state is popped then loop and use the older state.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004332 while (regstack.ga_len > 0 && status != RA_FAIL)
4333 {
4334 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4335 switch (rp->rs_state)
4336 {
4337 case RS_NOPEN:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004338 // Result is passed on as-is, simply pop the state.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004339 regstack_pop(&scan);
4340 break;
4341
4342 case RS_MOPEN:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004343 // Pop the state. Restore pointers when there is no match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004344 if (status == RA_NOMATCH)
4345 restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
4346 &rex.reg_startp[rp->rs_no]);
4347 regstack_pop(&scan);
4348 break;
4349
4350#ifdef FEAT_SYN_HL
4351 case RS_ZOPEN:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004352 // Pop the state. Restore pointers when there is no match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004353 if (status == RA_NOMATCH)
4354 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4355 &reg_startzp[rp->rs_no]);
4356 regstack_pop(&scan);
4357 break;
4358#endif
4359
4360 case RS_MCLOSE:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004361 // Pop the state. Restore pointers when there is no match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004362 if (status == RA_NOMATCH)
4363 restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
4364 &rex.reg_endp[rp->rs_no]);
4365 regstack_pop(&scan);
4366 break;
4367
4368#ifdef FEAT_SYN_HL
4369 case RS_ZCLOSE:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004370 // Pop the state. Restore pointers when there is no match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004371 if (status == RA_NOMATCH)
4372 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4373 &reg_endzp[rp->rs_no]);
4374 regstack_pop(&scan);
4375 break;
4376#endif
4377
4378 case RS_BRANCH:
4379 if (status == RA_MATCH)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004380 // this branch matched, use it
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004381 regstack_pop(&scan);
4382 else
4383 {
4384 if (status != RA_BREAK)
4385 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004386 // After a non-matching branch: try next one.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004387 reg_restore(&rp->rs_un.regsave, &backpos);
4388 scan = rp->rs_scan;
4389 }
4390 if (scan == NULL || OP(scan) != BRANCH)
4391 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004392 // no more branches, didn't find a match
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004393 status = RA_NOMATCH;
4394 regstack_pop(&scan);
4395 }
4396 else
4397 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004398 // Prepare to try a branch.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004399 rp->rs_scan = regnext(scan);
4400 reg_save(&rp->rs_un.regsave, &backpos);
4401 scan = OPERAND(scan);
4402 }
4403 }
4404 break;
4405
4406 case RS_BRCPLX_MORE:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004407 // Pop the state. Restore pointers when there is no match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004408 if (status == RA_NOMATCH)
4409 {
4410 reg_restore(&rp->rs_un.regsave, &backpos);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004411 --brace_count[rp->rs_no]; // decrement match count
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004412 }
4413 regstack_pop(&scan);
4414 break;
4415
4416 case RS_BRCPLX_LONG:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004417 // Pop the state. Restore pointers when there is no match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004418 if (status == RA_NOMATCH)
4419 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004420 // There was no match, but we did find enough matches.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004421 reg_restore(&rp->rs_un.regsave, &backpos);
4422 --brace_count[rp->rs_no];
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004423 // continue with the items after "\{}"
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004424 status = RA_CONT;
4425 }
4426 regstack_pop(&scan);
4427 if (status == RA_CONT)
4428 scan = regnext(scan);
4429 break;
4430
4431 case RS_BRCPLX_SHORT:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004432 // Pop the state. Restore pointers when there is no match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004433 if (status == RA_NOMATCH)
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004434 // There was no match, try to match one more item.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004435 reg_restore(&rp->rs_un.regsave, &backpos);
4436 regstack_pop(&scan);
4437 if (status == RA_NOMATCH)
4438 {
4439 scan = OPERAND(scan);
4440 status = RA_CONT;
4441 }
4442 break;
4443
4444 case RS_NOMATCH:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004445 // Pop the state. If the operand matches for NOMATCH or
4446 // doesn't match for MATCH/SUBPAT, we fail. Otherwise backup,
4447 // except for SUBPAT, and continue with the next item.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004448 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
4449 status = RA_NOMATCH;
4450 else
4451 {
4452 status = RA_CONT;
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004453 if (rp->rs_no != SUBPAT) // zero-width
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004454 reg_restore(&rp->rs_un.regsave, &backpos);
4455 }
4456 regstack_pop(&scan);
4457 if (status == RA_CONT)
4458 scan = regnext(scan);
4459 break;
4460
4461 case RS_BEHIND1:
4462 if (status == RA_NOMATCH)
4463 {
4464 regstack_pop(&scan);
4465 regstack.ga_len -= sizeof(regbehind_T);
4466 }
4467 else
4468 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004469 // The stuff after BEHIND/NOBEHIND matches. Now try if
4470 // the behind part does (not) match before the current
4471 // position in the input. This must be done at every
4472 // position in the input and checking if the match ends at
4473 // the current position.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004474
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004475 // save the position after the found match for next
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004476 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
4477
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004478 // Start looking for a match with operand at the current
4479 // position. Go back one character until we find the
4480 // result, hitting the start of the line or the previous
4481 // line (for multi-line matching).
4482 // Set behind_pos to where the match should end, BHPOS
4483 // will match it. Save the current value.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004484 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
4485 behind_pos = rp->rs_un.regsave;
4486
4487 rp->rs_state = RS_BEHIND2;
4488
4489 reg_restore(&rp->rs_un.regsave, &backpos);
4490 scan = OPERAND(rp->rs_scan) + 4;
4491 }
4492 break;
4493
4494 case RS_BEHIND2:
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004495 // Looping for BEHIND / NOBEHIND match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004496 if (status == RA_MATCH && reg_save_equal(&behind_pos))
4497 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004498 // found a match that ends where "next" started
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004499 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4500 if (rp->rs_no == BEHIND)
4501 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4502 &backpos);
4503 else
4504 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004505 // But we didn't want a match. Need to restore the
4506 // subexpr, because what follows matched, so they have
4507 // been set.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004508 status = RA_NOMATCH;
4509 restore_subexpr(((regbehind_T *)rp) - 1);
4510 }
4511 regstack_pop(&scan);
4512 regstack.ga_len -= sizeof(regbehind_T);
4513 }
4514 else
4515 {
4516 long limit;
4517
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004518 // No match or a match that doesn't end where we want it: Go
4519 // back one character. May go to previous line once.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004520 no = OK;
4521 limit = OPERAND_MIN(rp->rs_scan);
4522 if (REG_MULTI)
4523 {
4524 if (limit > 0
4525 && ((rp->rs_un.regsave.rs_u.pos.lnum
4526 < behind_pos.rs_u.pos.lnum
4527 ? (colnr_T)STRLEN(rex.line)
4528 : behind_pos.rs_u.pos.col)
4529 - rp->rs_un.regsave.rs_u.pos.col >= limit))
4530 no = FAIL;
4531 else if (rp->rs_un.regsave.rs_u.pos.col == 0)
4532 {
4533 if (rp->rs_un.regsave.rs_u.pos.lnum
4534 < behind_pos.rs_u.pos.lnum
4535 || reg_getline(
4536 --rp->rs_un.regsave.rs_u.pos.lnum)
4537 == NULL)
4538 no = FAIL;
4539 else
4540 {
4541 reg_restore(&rp->rs_un.regsave, &backpos);
4542 rp->rs_un.regsave.rs_u.pos.col =
4543 (colnr_T)STRLEN(rex.line);
4544 }
4545 }
4546 else
4547 {
4548 if (has_mbyte)
4549 {
4550 char_u *line =
4551 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum);
4552
4553 rp->rs_un.regsave.rs_u.pos.col -=
4554 (*mb_head_off)(line, line
4555 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
4556 }
4557 else
4558 --rp->rs_un.regsave.rs_u.pos.col;
4559 }
4560 }
4561 else
4562 {
4563 if (rp->rs_un.regsave.rs_u.ptr == rex.line)
4564 no = FAIL;
4565 else
4566 {
4567 MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr);
4568 if (limit > 0 && (long)(behind_pos.rs_u.ptr
4569 - rp->rs_un.regsave.rs_u.ptr) > limit)
4570 no = FAIL;
4571 }
4572 }
4573 if (no == OK)
4574 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004575 // Advanced, prepare for finding match again.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004576 reg_restore(&rp->rs_un.regsave, &backpos);
4577 scan = OPERAND(rp->rs_scan) + 4;
4578 if (status == RA_MATCH)
4579 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004580 // We did match, so subexpr may have been changed,
4581 // need to restore them for the next try.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004582 status = RA_NOMATCH;
4583 restore_subexpr(((regbehind_T *)rp) - 1);
4584 }
4585 }
4586 else
4587 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004588 // Can't advance. For NOBEHIND that's a match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004589 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4590 if (rp->rs_no == NOBEHIND)
4591 {
4592 reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4593 &backpos);
4594 status = RA_MATCH;
4595 }
4596 else
4597 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004598 // We do want a proper match. Need to restore the
4599 // subexpr if we had a match, because they may have
4600 // been set.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004601 if (status == RA_MATCH)
4602 {
4603 status = RA_NOMATCH;
4604 restore_subexpr(((regbehind_T *)rp) - 1);
4605 }
4606 }
4607 regstack_pop(&scan);
4608 regstack.ga_len -= sizeof(regbehind_T);
4609 }
4610 }
4611 break;
4612
4613 case RS_STAR_LONG:
4614 case RS_STAR_SHORT:
4615 {
4616 regstar_T *rst = ((regstar_T *)rp) - 1;
4617
4618 if (status == RA_MATCH)
4619 {
4620 regstack_pop(&scan);
4621 regstack.ga_len -= sizeof(regstar_T);
4622 break;
4623 }
4624
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004625 // Tried once already, restore input pointers.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004626 if (status != RA_BREAK)
4627 reg_restore(&rp->rs_un.regsave, &backpos);
4628
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004629 // Repeat until we found a position where it could match.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004630 for (;;)
4631 {
4632 if (status != RA_BREAK)
4633 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004634 // Tried first position already, advance.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004635 if (rp->rs_state == RS_STAR_LONG)
4636 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004637 // Trying for longest match, but couldn't or
4638 // didn't match -- back up one char.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004639 if (--rst->count < rst->minval)
4640 break;
4641 if (rex.input == rex.line)
4642 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004643 // backup to last char of previous line
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004644 --rex.lnum;
4645 rex.line = reg_getline(rex.lnum);
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004646 // Just in case regrepeat() didn't count
4647 // right.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004648 if (rex.line == NULL)
4649 break;
4650 rex.input = rex.line + STRLEN(rex.line);
4651 fast_breakcheck();
4652 }
4653 else
4654 MB_PTR_BACK(rex.line, rex.input);
4655 }
4656 else
4657 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004658 // Range is backwards, use shortest match first.
4659 // Careful: maxval and minval are exchanged!
4660 // Couldn't or didn't match: try advancing one
4661 // char.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004662 if (rst->count == rst->minval
4663 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
4664 break;
4665 ++rst->count;
4666 }
4667 if (got_int)
4668 break;
4669 }
4670 else
4671 status = RA_NOMATCH;
4672
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004673 // If it could match, try it.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004674 if (rst->nextb == NUL || *rex.input == rst->nextb
4675 || *rex.input == rst->nextb_ic)
4676 {
4677 reg_save(&rp->rs_un.regsave, &backpos);
4678 scan = regnext(rp->rs_scan);
4679 status = RA_CONT;
4680 break;
4681 }
4682 }
4683 if (status != RA_CONT)
4684 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004685 // Failed.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004686 regstack_pop(&scan);
4687 regstack.ga_len -= sizeof(regstar_T);
4688 status = RA_NOMATCH;
4689 }
4690 }
4691 break;
4692 }
4693
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004694 // If we want to continue the inner loop or didn't pop a state
4695 // continue matching loop
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004696 if (status == RA_CONT || rp == (regitem_T *)
4697 ((char *)regstack.ga_data + regstack.ga_len) - 1)
4698 break;
4699 }
4700
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004701 // May need to continue with the inner loop, starting at "scan".
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004702 if (status == RA_CONT)
4703 continue;
4704
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004705 // If the regstack is empty or something failed we are done.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004706 if (regstack.ga_len == 0 || status == RA_FAIL)
4707 {
4708 if (scan == NULL)
4709 {
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004710 // We get here only if there's trouble -- normally "case END" is
4711 // the terminating point.
Bram Moolenaare83cca22020-09-07 18:53:21 +02004712 iemsg(_(e_re_corr));
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004713#ifdef DEBUG
4714 printf("Premature EOL\n");
4715#endif
4716 }
4717 return (status == RA_MATCH);
4718 }
4719
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004720 } // End of loop until the regstack is empty.
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004721
Bram Moolenaar9490b9a2019-09-08 17:20:12 +02004722 // NOTREACHED
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004723}
4724
4725/*
4726 * regtry - try match of "prog" with at rex.line["col"].
4727 * Returns 0 for failure, number of lines contained in the match otherwise.
4728 */
4729 static long
4730regtry(
4731 bt_regprog_T *prog,
4732 colnr_T col,
4733 proftime_T *tm, // timeout limit or NULL
4734 int *timed_out) // flag set on timeout or NULL
4735{
4736 rex.input = rex.line + col;
4737 rex.need_clear_subexpr = TRUE;
4738#ifdef FEAT_SYN_HL
4739 // Clear the external match subpointers if necessary.
4740 rex.need_clear_zsubexpr = (prog->reghasz == REX_SET);
4741#endif
4742
4743 if (regmatch(prog->program + 1, tm, timed_out) == 0)
4744 return 0;
4745
4746 cleanup_subexpr();
4747 if (REG_MULTI)
4748 {
4749 if (rex.reg_startpos[0].lnum < 0)
4750 {
4751 rex.reg_startpos[0].lnum = 0;
4752 rex.reg_startpos[0].col = col;
4753 }
4754 if (rex.reg_endpos[0].lnum < 0)
4755 {
4756 rex.reg_endpos[0].lnum = rex.lnum;
4757 rex.reg_endpos[0].col = (int)(rex.input - rex.line);
4758 }
4759 else
4760 // Use line number of "\ze".
4761 rex.lnum = rex.reg_endpos[0].lnum;
4762 }
4763 else
4764 {
4765 if (rex.reg_startp[0] == NULL)
4766 rex.reg_startp[0] = rex.line + col;
4767 if (rex.reg_endp[0] == NULL)
4768 rex.reg_endp[0] = rex.input;
4769 }
4770#ifdef FEAT_SYN_HL
4771 // Package any found \z(...\) matches for export. Default is none.
4772 unref_extmatch(re_extmatch_out);
4773 re_extmatch_out = NULL;
4774
4775 if (prog->reghasz == REX_SET)
4776 {
4777 int i;
4778
4779 cleanup_zsubexpr();
4780 re_extmatch_out = make_extmatch();
Bram Moolenaar7c77b342019-12-22 19:40:40 +01004781 if (re_extmatch_out == NULL)
4782 return 0;
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004783 for (i = 0; i < NSUBEXP; i++)
4784 {
4785 if (REG_MULTI)
4786 {
4787 // Only accept single line matches.
4788 if (reg_startzpos[i].lnum >= 0
4789 && reg_endzpos[i].lnum == reg_startzpos[i].lnum
4790 && reg_endzpos[i].col >= reg_startzpos[i].col)
4791 re_extmatch_out->matches[i] =
4792 vim_strnsave(reg_getline(reg_startzpos[i].lnum)
4793 + reg_startzpos[i].col,
4794 reg_endzpos[i].col - reg_startzpos[i].col);
4795 }
4796 else
4797 {
4798 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4799 re_extmatch_out->matches[i] =
4800 vim_strnsave(reg_startzp[i],
Bram Moolenaar71ccd032020-06-12 22:59:11 +02004801 reg_endzp[i] - reg_startzp[i]);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004802 }
4803 }
4804 }
4805#endif
4806 return 1 + rex.lnum;
4807}
4808
4809/*
4810 * Match a regexp against a string ("line" points to the string) or multiple
Bram Moolenaardf365142021-05-03 20:01:45 +02004811 * lines (if "line" is NULL, use reg_getline()).
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004812 * Returns 0 for failure, number of lines contained in the match otherwise.
4813 */
4814 static long
4815bt_regexec_both(
4816 char_u *line,
4817 colnr_T col, // column to start looking for match
4818 proftime_T *tm, // timeout limit or NULL
4819 int *timed_out) // flag set on timeout or NULL
4820{
4821 bt_regprog_T *prog;
4822 char_u *s;
4823 long retval = 0L;
4824
4825 // Create "regstack" and "backpos" if they are not allocated yet.
4826 // We allocate *_INITIAL amount of bytes first and then set the grow size
4827 // to much bigger value to avoid many malloc calls in case of deep regular
4828 // expressions.
4829 if (regstack.ga_data == NULL)
4830 {
4831 // Use an item size of 1 byte, since we push different things
4832 // onto the regstack.
4833 ga_init2(&regstack, 1, REGSTACK_INITIAL);
4834 (void)ga_grow(&regstack, REGSTACK_INITIAL);
4835 regstack.ga_growsize = REGSTACK_INITIAL * 8;
4836 }
4837
4838 if (backpos.ga_data == NULL)
4839 {
4840 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
4841 (void)ga_grow(&backpos, BACKPOS_INITIAL);
4842 backpos.ga_growsize = BACKPOS_INITIAL * 8;
4843 }
4844
4845 if (REG_MULTI)
4846 {
4847 prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
4848 line = reg_getline((linenr_T)0);
4849 rex.reg_startpos = rex.reg_mmatch->startpos;
4850 rex.reg_endpos = rex.reg_mmatch->endpos;
4851 }
4852 else
4853 {
4854 prog = (bt_regprog_T *)rex.reg_match->regprog;
4855 rex.reg_startp = rex.reg_match->startp;
4856 rex.reg_endp = rex.reg_match->endp;
4857 }
4858
4859 // Be paranoid...
4860 if (prog == NULL || line == NULL)
4861 {
Bram Moolenaare83cca22020-09-07 18:53:21 +02004862 iemsg(_(e_null));
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02004863 goto theend;
4864 }
4865
4866 // Check validity of program.
4867 if (prog_magic_wrong())
4868 goto theend;
4869
4870 // If the start column is past the maximum column: no need to try.
4871 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
4872 goto theend;
4873
4874 // If pattern contains "\c" or "\C": overrule value of rex.reg_ic
4875 if (prog->regflags & RF_ICASE)
4876 rex.reg_ic = TRUE;
4877 else if (prog->regflags & RF_NOICASE)
4878 rex.reg_ic = FALSE;
4879
4880 // If pattern contains "\Z" overrule value of rex.reg_icombine
4881 if (prog->regflags & RF_ICOMBINE)
4882 rex.reg_icombine = TRUE;
4883
4884 // If there is a "must appear" string, look for it.
4885 if (prog->regmust != NULL)
4886 {
4887 int c;
4888
4889 if (has_mbyte)
4890 c = (*mb_ptr2char)(prog->regmust);
4891 else
4892 c = *prog->regmust;
4893 s = line + col;
4894
4895 // This is used very often, esp. for ":global". Use three versions of
4896 // the loop to avoid overhead of conditions.
4897 if (!rex.reg_ic && !has_mbyte)
4898 while ((s = vim_strbyte(s, c)) != NULL)
4899 {
4900 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4901 break; // Found it.
4902 ++s;
4903 }
4904 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
4905 while ((s = vim_strchr(s, c)) != NULL)
4906 {
4907 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4908 break; // Found it.
4909 MB_PTR_ADV(s);
4910 }
4911 else
4912 while ((s = cstrchr(s, c)) != NULL)
4913 {
4914 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4915 break; // Found it.
4916 MB_PTR_ADV(s);
4917 }
4918 if (s == NULL) // Not present.
4919 goto theend;
4920 }
4921
4922 rex.line = line;
4923 rex.lnum = 0;
4924 reg_toolong = FALSE;
4925
4926 // Simplest case: Anchored match need be tried only once.
4927 if (prog->reganch)
4928 {
4929 int c;
4930
4931 if (has_mbyte)
4932 c = (*mb_ptr2char)(rex.line + col);
4933 else
4934 c = rex.line[col];
4935 if (prog->regstart == NUL
4936 || prog->regstart == c
4937 || (rex.reg_ic
4938 && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
4939 || (c < 255 && prog->regstart < 255 &&
4940 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
4941 retval = regtry(prog, col, tm, timed_out);
4942 else
4943 retval = 0;
4944 }
4945 else
4946 {
4947#ifdef FEAT_RELTIME
4948 int tm_count = 0;
4949#endif
4950 // Messy cases: unanchored match.
4951 while (!got_int)
4952 {
4953 if (prog->regstart != NUL)
4954 {
4955 // Skip until the char we know it must start with.
4956 // Used often, do some work to avoid call overhead.
4957 if (!rex.reg_ic && !has_mbyte)
4958 s = vim_strbyte(rex.line + col, prog->regstart);
4959 else
4960 s = cstrchr(rex.line + col, prog->regstart);
4961 if (s == NULL)
4962 {
4963 retval = 0;
4964 break;
4965 }
4966 col = (int)(s - rex.line);
4967 }
4968
4969 // Check for maximum column to try.
4970 if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
4971 {
4972 retval = 0;
4973 break;
4974 }
4975
4976 retval = regtry(prog, col, tm, timed_out);
4977 if (retval > 0)
4978 break;
4979
4980 // if not currently on the first line, get it again
4981 if (rex.lnum != 0)
4982 {
4983 rex.lnum = 0;
4984 rex.line = reg_getline((linenr_T)0);
4985 }
4986 if (rex.line[col] == NUL)
4987 break;
4988 if (has_mbyte)
4989 col += (*mb_ptr2len)(rex.line + col);
4990 else
4991 ++col;
4992#ifdef FEAT_RELTIME
4993 // Check for timeout once in a twenty times to avoid overhead.
4994 if (tm != NULL && ++tm_count == 20)
4995 {
4996 tm_count = 0;
4997 if (profile_passed_limit(tm))
4998 {
4999 if (timed_out != NULL)
5000 *timed_out = TRUE;
5001 break;
5002 }
5003 }
5004#endif
5005 }
5006 }
5007
5008theend:
5009 // Free "reg_tofree" when it's a bit big.
5010 // Free regstack and backpos if they are bigger than their initial size.
5011 if (reg_tofreelen > 400)
5012 VIM_CLEAR(reg_tofree);
5013 if (regstack.ga_maxlen > REGSTACK_INITIAL)
5014 ga_clear(&regstack);
5015 if (backpos.ga_maxlen > BACKPOS_INITIAL)
5016 ga_clear(&backpos);
5017
Bram Moolenaara3d10a52020-12-21 18:24:00 +01005018 if (retval > 0)
Bram Moolenaara7a691c2020-12-09 16:36:04 +01005019 {
Bram Moolenaara3d10a52020-12-21 18:24:00 +01005020 // Make sure the end is never before the start. Can happen when \zs
5021 // and \ze are used.
5022 if (REG_MULTI)
5023 {
5024 lpos_T *start = &rex.reg_mmatch->startpos[0];
5025 lpos_T *end = &rex.reg_mmatch->endpos[0];
Bram Moolenaara7a691c2020-12-09 16:36:04 +01005026
Bram Moolenaara3d10a52020-12-21 18:24:00 +01005027 if (end->lnum < start->lnum
Bram Moolenaara7a691c2020-12-09 16:36:04 +01005028 || (end->lnum == start->lnum && end->col < start->col))
Bram Moolenaara3d10a52020-12-21 18:24:00 +01005029 rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0];
5030 }
5031 else
5032 {
5033 if (rex.reg_match->endp[0] < rex.reg_match->startp[0])
5034 rex.reg_match->endp[0] = rex.reg_match->startp[0];
5035 }
Bram Moolenaara7a691c2020-12-09 16:36:04 +01005036 }
5037
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02005038 return retval;
5039}
5040
5041/*
5042 * Match a regexp against a string.
5043 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
5044 * Uses curbuf for line count and 'iskeyword'.
5045 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
5046 *
5047 * Returns 0 for failure, number of lines contained in the match otherwise.
5048 */
5049 static int
5050bt_regexec_nl(
5051 regmatch_T *rmp,
5052 char_u *line, // string to match against
5053 colnr_T col, // column to start looking for match
5054 int line_lbr)
5055{
5056 rex.reg_match = rmp;
5057 rex.reg_mmatch = NULL;
5058 rex.reg_maxline = 0;
5059 rex.reg_line_lbr = line_lbr;
5060 rex.reg_buf = curbuf;
5061 rex.reg_win = NULL;
5062 rex.reg_ic = rmp->rm_ic;
5063 rex.reg_icombine = FALSE;
5064 rex.reg_maxcol = 0;
5065
5066 return bt_regexec_both(line, col, NULL, NULL);
5067}
5068
5069/*
5070 * Match a regexp against multiple lines.
5071 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
5072 * Uses curbuf for line count and 'iskeyword'.
5073 *
5074 * Return zero if there is no match. Return number of lines contained in the
5075 * match otherwise.
5076 */
5077 static long
5078bt_regexec_multi(
5079 regmmatch_T *rmp,
5080 win_T *win, // window in which to search or NULL
5081 buf_T *buf, // buffer in which to search
5082 linenr_T lnum, // nr of line to start looking for match
5083 colnr_T col, // column to start looking for match
5084 proftime_T *tm, // timeout limit or NULL
5085 int *timed_out) // flag set on timeout or NULL
5086{
Bram Moolenaarf4140482020-02-15 23:06:45 +01005087 init_regexec_multi(rmp, win, buf, lnum);
Bram Moolenaar6d7d7cf2019-09-07 23:16:33 +02005088 return bt_regexec_both(NULL, col, tm, timed_out);
5089}
5090
5091/*
5092 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
5093 */
5094 static int
5095re_num_cmp(long_u val, char_u *scan)
5096{
5097 long_u n = OPERAND_MIN(scan);
5098
5099 if (OPERAND_CMP(scan) == '>')
5100 return val > n;
5101 if (OPERAND_CMP(scan) == '<')
5102 return val < n;
5103 return val == n;
5104}
5105
5106#ifdef BT_REGEXP_DUMP
5107
5108/*
5109 * regdump - dump a regexp onto stdout in vaguely comprehensible form
5110 */
5111 static void
5112regdump(char_u *pattern, bt_regprog_T *r)
5113{
5114 char_u *s;
5115 int op = EXACTLY; // Arbitrary non-END op.
5116 char_u *next;
5117 char_u *end = NULL;
5118 FILE *f;
5119
5120#ifdef BT_REGEXP_LOG
5121 f = fopen("bt_regexp_log.log", "a");
5122#else
5123 f = stdout;
5124#endif
5125 if (f == NULL)
5126 return;
5127 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
5128
5129 s = r->program + 1;
5130 // Loop until we find the END that isn't before a referred next (an END
5131 // can also appear in a NOMATCH operand).
5132 while (op != END || s <= end)
5133 {
5134 op = OP(s);
5135 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); // Where, what.
5136 next = regnext(s);
5137 if (next == NULL) // Next ptr.
5138 fprintf(f, "(0)");
5139 else
5140 fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
5141 if (end < next)
5142 end = next;
5143 if (op == BRACE_LIMITS)
5144 {
5145 // Two ints
5146 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
5147 s += 8;
5148 }
5149 else if (op == BEHIND || op == NOBEHIND)
5150 {
5151 // one int
5152 fprintf(f, " count %ld", OPERAND_MIN(s));
5153 s += 4;
5154 }
5155 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
5156 {
5157 // one int plus comparator
5158 fprintf(f, " count %ld", OPERAND_MIN(s));
5159 s += 5;
5160 }
5161 s += 3;
5162 if (op == ANYOF || op == ANYOF + ADD_NL
5163 || op == ANYBUT || op == ANYBUT + ADD_NL
5164 || op == EXACTLY)
5165 {
5166 // Literal string, where present.
5167 fprintf(f, "\nxxxxxxxxx\n");
5168 while (*s != NUL)
5169 fprintf(f, "%c", *s++);
5170 fprintf(f, "\nxxxxxxxxx\n");
5171 s++;
5172 }
5173 fprintf(f, "\r\n");
5174 }
5175
5176 // Header fields of interest.
5177 if (r->regstart != NUL)
5178 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
5179 ? (char *)transchar(r->regstart)
5180 : "multibyte", r->regstart);
5181 if (r->reganch)
5182 fprintf(f, "anchored; ");
5183 if (r->regmust != NULL)
5184 fprintf(f, "must have \"%s\"", r->regmust);
5185 fprintf(f, "\r\n");
5186
5187#ifdef BT_REGEXP_LOG
5188 fclose(f);
5189#endif
5190}
5191#endif // BT_REGEXP_DUMP
5192
5193#ifdef DEBUG
5194/*
5195 * regprop - printable representation of opcode
5196 */
5197 static char_u *
5198regprop(char_u *op)
5199{
5200 char *p;
5201 static char buf[50];
5202
5203 STRCPY(buf, ":");
5204
5205 switch ((int) OP(op))
5206 {
5207 case BOL:
5208 p = "BOL";
5209 break;
5210 case EOL:
5211 p = "EOL";
5212 break;
5213 case RE_BOF:
5214 p = "BOF";
5215 break;
5216 case RE_EOF:
5217 p = "EOF";
5218 break;
5219 case CURSOR:
5220 p = "CURSOR";
5221 break;
5222 case RE_VISUAL:
5223 p = "RE_VISUAL";
5224 break;
5225 case RE_LNUM:
5226 p = "RE_LNUM";
5227 break;
5228 case RE_MARK:
5229 p = "RE_MARK";
5230 break;
5231 case RE_COL:
5232 p = "RE_COL";
5233 break;
5234 case RE_VCOL:
5235 p = "RE_VCOL";
5236 break;
5237 case BOW:
5238 p = "BOW";
5239 break;
5240 case EOW:
5241 p = "EOW";
5242 break;
5243 case ANY:
5244 p = "ANY";
5245 break;
5246 case ANY + ADD_NL:
5247 p = "ANY+NL";
5248 break;
5249 case ANYOF:
5250 p = "ANYOF";
5251 break;
5252 case ANYOF + ADD_NL:
5253 p = "ANYOF+NL";
5254 break;
5255 case ANYBUT:
5256 p = "ANYBUT";
5257 break;
5258 case ANYBUT + ADD_NL:
5259 p = "ANYBUT+NL";
5260 break;
5261 case IDENT:
5262 p = "IDENT";
5263 break;
5264 case IDENT + ADD_NL:
5265 p = "IDENT+NL";
5266 break;
5267 case SIDENT:
5268 p = "SIDENT";
5269 break;
5270 case SIDENT + ADD_NL:
5271 p = "SIDENT+NL";
5272 break;
5273 case KWORD:
5274 p = "KWORD";
5275 break;
5276 case KWORD + ADD_NL:
5277 p = "KWORD+NL";
5278 break;
5279 case SKWORD:
5280 p = "SKWORD";
5281 break;
5282 case SKWORD + ADD_NL:
5283 p = "SKWORD+NL";
5284 break;
5285 case FNAME:
5286 p = "FNAME";
5287 break;
5288 case FNAME + ADD_NL:
5289 p = "FNAME+NL";
5290 break;
5291 case SFNAME:
5292 p = "SFNAME";
5293 break;
5294 case SFNAME + ADD_NL:
5295 p = "SFNAME+NL";
5296 break;
5297 case PRINT:
5298 p = "PRINT";
5299 break;
5300 case PRINT + ADD_NL:
5301 p = "PRINT+NL";
5302 break;
5303 case SPRINT:
5304 p = "SPRINT";
5305 break;
5306 case SPRINT + ADD_NL:
5307 p = "SPRINT+NL";
5308 break;
5309 case WHITE:
5310 p = "WHITE";
5311 break;
5312 case WHITE + ADD_NL:
5313 p = "WHITE+NL";
5314 break;
5315 case NWHITE:
5316 p = "NWHITE";
5317 break;
5318 case NWHITE + ADD_NL:
5319 p = "NWHITE+NL";
5320 break;
5321 case DIGIT:
5322 p = "DIGIT";
5323 break;
5324 case DIGIT + ADD_NL:
5325 p = "DIGIT+NL";
5326 break;
5327 case NDIGIT:
5328 p = "NDIGIT";
5329 break;
5330 case NDIGIT + ADD_NL:
5331 p = "NDIGIT+NL";
5332 break;
5333 case HEX:
5334 p = "HEX";
5335 break;
5336 case HEX + ADD_NL:
5337 p = "HEX+NL";
5338 break;
5339 case NHEX:
5340 p = "NHEX";
5341 break;
5342 case NHEX + ADD_NL:
5343 p = "NHEX+NL";
5344 break;
5345 case OCTAL:
5346 p = "OCTAL";
5347 break;
5348 case OCTAL + ADD_NL:
5349 p = "OCTAL+NL";
5350 break;
5351 case NOCTAL:
5352 p = "NOCTAL";
5353 break;
5354 case NOCTAL + ADD_NL:
5355 p = "NOCTAL+NL";
5356 break;
5357 case WORD:
5358 p = "WORD";
5359 break;
5360 case WORD + ADD_NL:
5361 p = "WORD+NL";
5362 break;
5363 case NWORD:
5364 p = "NWORD";
5365 break;
5366 case NWORD + ADD_NL:
5367 p = "NWORD+NL";
5368 break;
5369 case HEAD:
5370 p = "HEAD";
5371 break;
5372 case HEAD + ADD_NL:
5373 p = "HEAD+NL";
5374 break;
5375 case NHEAD:
5376 p = "NHEAD";
5377 break;
5378 case NHEAD + ADD_NL:
5379 p = "NHEAD+NL";
5380 break;
5381 case ALPHA:
5382 p = "ALPHA";
5383 break;
5384 case ALPHA + ADD_NL:
5385 p = "ALPHA+NL";
5386 break;
5387 case NALPHA:
5388 p = "NALPHA";
5389 break;
5390 case NALPHA + ADD_NL:
5391 p = "NALPHA+NL";
5392 break;
5393 case LOWER:
5394 p = "LOWER";
5395 break;
5396 case LOWER + ADD_NL:
5397 p = "LOWER+NL";
5398 break;
5399 case NLOWER:
5400 p = "NLOWER";
5401 break;
5402 case NLOWER + ADD_NL:
5403 p = "NLOWER+NL";
5404 break;
5405 case UPPER:
5406 p = "UPPER";
5407 break;
5408 case UPPER + ADD_NL:
5409 p = "UPPER+NL";
5410 break;
5411 case NUPPER:
5412 p = "NUPPER";
5413 break;
5414 case NUPPER + ADD_NL:
5415 p = "NUPPER+NL";
5416 break;
5417 case BRANCH:
5418 p = "BRANCH";
5419 break;
5420 case EXACTLY:
5421 p = "EXACTLY";
5422 break;
5423 case NOTHING:
5424 p = "NOTHING";
5425 break;
5426 case BACK:
5427 p = "BACK";
5428 break;
5429 case END:
5430 p = "END";
5431 break;
5432 case MOPEN + 0:
5433 p = "MATCH START";
5434 break;
5435 case MOPEN + 1:
5436 case MOPEN + 2:
5437 case MOPEN + 3:
5438 case MOPEN + 4:
5439 case MOPEN + 5:
5440 case MOPEN + 6:
5441 case MOPEN + 7:
5442 case MOPEN + 8:
5443 case MOPEN + 9:
5444 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
5445 p = NULL;
5446 break;
5447 case MCLOSE + 0:
5448 p = "MATCH END";
5449 break;
5450 case MCLOSE + 1:
5451 case MCLOSE + 2:
5452 case MCLOSE + 3:
5453 case MCLOSE + 4:
5454 case MCLOSE + 5:
5455 case MCLOSE + 6:
5456 case MCLOSE + 7:
5457 case MCLOSE + 8:
5458 case MCLOSE + 9:
5459 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
5460 p = NULL;
5461 break;
5462 case BACKREF + 1:
5463 case BACKREF + 2:
5464 case BACKREF + 3:
5465 case BACKREF + 4:
5466 case BACKREF + 5:
5467 case BACKREF + 6:
5468 case BACKREF + 7:
5469 case BACKREF + 8:
5470 case BACKREF + 9:
5471 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
5472 p = NULL;
5473 break;
5474 case NOPEN:
5475 p = "NOPEN";
5476 break;
5477 case NCLOSE:
5478 p = "NCLOSE";
5479 break;
5480#ifdef FEAT_SYN_HL
5481 case ZOPEN + 1:
5482 case ZOPEN + 2:
5483 case ZOPEN + 3:
5484 case ZOPEN + 4:
5485 case ZOPEN + 5:
5486 case ZOPEN + 6:
5487 case ZOPEN + 7:
5488 case ZOPEN + 8:
5489 case ZOPEN + 9:
5490 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
5491 p = NULL;
5492 break;
5493 case ZCLOSE + 1:
5494 case ZCLOSE + 2:
5495 case ZCLOSE + 3:
5496 case ZCLOSE + 4:
5497 case ZCLOSE + 5:
5498 case ZCLOSE + 6:
5499 case ZCLOSE + 7:
5500 case ZCLOSE + 8:
5501 case ZCLOSE + 9:
5502 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
5503 p = NULL;
5504 break;
5505 case ZREF + 1:
5506 case ZREF + 2:
5507 case ZREF + 3:
5508 case ZREF + 4:
5509 case ZREF + 5:
5510 case ZREF + 6:
5511 case ZREF + 7:
5512 case ZREF + 8:
5513 case ZREF + 9:
5514 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
5515 p = NULL;
5516 break;
5517#endif
5518 case STAR:
5519 p = "STAR";
5520 break;
5521 case PLUS:
5522 p = "PLUS";
5523 break;
5524 case NOMATCH:
5525 p = "NOMATCH";
5526 break;
5527 case MATCH:
5528 p = "MATCH";
5529 break;
5530 case BEHIND:
5531 p = "BEHIND";
5532 break;
5533 case NOBEHIND:
5534 p = "NOBEHIND";
5535 break;
5536 case SUBPAT:
5537 p = "SUBPAT";
5538 break;
5539 case BRACE_LIMITS:
5540 p = "BRACE_LIMITS";
5541 break;
5542 case BRACE_SIMPLE:
5543 p = "BRACE_SIMPLE";
5544 break;
5545 case BRACE_COMPLEX + 0:
5546 case BRACE_COMPLEX + 1:
5547 case BRACE_COMPLEX + 2:
5548 case BRACE_COMPLEX + 3:
5549 case BRACE_COMPLEX + 4:
5550 case BRACE_COMPLEX + 5:
5551 case BRACE_COMPLEX + 6:
5552 case BRACE_COMPLEX + 7:
5553 case BRACE_COMPLEX + 8:
5554 case BRACE_COMPLEX + 9:
5555 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
5556 p = NULL;
5557 break;
5558 case MULTIBYTECODE:
5559 p = "MULTIBYTECODE";
5560 break;
5561 case NEWL:
5562 p = "NEWL";
5563 break;
5564 default:
5565 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
5566 p = NULL;
5567 break;
5568 }
5569 if (p != NULL)
5570 STRCAT(buf, p);
5571 return (char_u *)buf;
5572}
5573#endif // DEBUG