blob: a8bc82248d28961fa6231cbe8c5d73a11c37cd56 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
4 *
5 * This is NOT the original regular expression code as written by Henry
6 * Spencer. This code has been modified specifically for use with Vim, and
7 * should not be used apart from compiling Vim. If you want a good regular
8 * expression library, get the original code.
9 *
10 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
11 */
12
13#ifndef _REGEXP_H
14#define _REGEXP_H
15
16/*
17 * The number of sub-matches is limited to 10.
18 * The first one (index 0) is the whole match, referenced with "\0".
19 * The second one (index 1) is the first sub-match, referenced with "\1".
20 * This goes up to the tenth (index 9), referenced with "\9".
21 */
22#define NSUBEXP 10
23
24/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020025 * In the NFA engine: how many braces are allowed.
26 * TODO(RE): Use dynamic memory allocation instead of static, like here
27 */
28#define NFA_MAX_BRACES 20
29
Bram Moolenaarfda37292014-11-05 14:27:36 +010030/*
31 * In the NFA engine: how many states are allowed
32 */
33#define NFA_MAX_STATES 100000
kylo2529dac9b12022-03-27 20:05:17 +010034#define NFA_TOO_EXPENSIVE (-1)
Bram Moolenaarfda37292014-11-05 14:27:36 +010035
Bram Moolenaar9bf703d2019-11-30 19:44:38 +010036// Which regexp engine to use? Needed for vim_regcomp().
37// Must match with 'regexpengine'.
Bram Moolenaarfda37292014-11-05 14:27:36 +010038#define AUTOMATIC_ENGINE 0
39#define BACKTRACKING_ENGINE 1
40#define NFA_ENGINE 2
41
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020042typedef struct regengine regengine_T;
43
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020044/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000045 * Structure returned by vim_regcomp() to pass on to vim_regexec().
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020046 * This is the general structure. For the actual matcher, two specific
47 * structures are used. See code below.
48 */
49typedef struct regprog
50{
51 regengine_T *engine;
52 unsigned regflags;
Bram Moolenaar0270f382018-07-17 05:43:58 +020053 unsigned re_engine; // automatic, backtracking or nfa engine
54 unsigned re_flags; // second argument for vim_regcomp()
55 int re_in_use; // prog is being executed
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020056} regprog_T;
57
58/*
59 * Structure used by the back track matcher.
Bram Moolenaar071d4272004-06-13 20:20:40 +000060 * These fields are only to be used in regexp.c!
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020061 * See regexp.c for an explanation.
Bram Moolenaar071d4272004-06-13 20:20:40 +000062 */
63typedef struct
64{
Bram Moolenaar9bf703d2019-11-30 19:44:38 +010065 // These four members implement regprog_T
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020066 regengine_T *engine;
67 unsigned regflags;
Bram Moolenaarfda37292014-11-05 14:27:36 +010068 unsigned re_engine;
Bram Moolenaar0270f382018-07-17 05:43:58 +020069 unsigned re_flags;
70 int re_in_use;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020071
Bram Moolenaar071d4272004-06-13 20:20:40 +000072 int regstart;
73 char_u reganch;
74 char_u *regmust;
75 int regmlen;
Bram Moolenaarefb23f22013-06-01 23:02:54 +020076#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +000077 char_u reghasz;
Bram Moolenaarefb23f22013-06-01 23:02:54 +020078#endif
Bram Moolenaar9bf703d2019-11-30 19:44:38 +010079 char_u program[1]; // actually longer..
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020080} bt_regprog_T;
81
82/*
83 * Structure representing a NFA state.
Bram Moolenaarad3ec762019-04-21 00:00:13 +020084 * An NFA state may have no outgoing edge, when it is a NFA_MATCH state.
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020085 */
86typedef struct nfa_state nfa_state_T;
87struct nfa_state
88{
89 int c;
90 nfa_state_T *out;
91 nfa_state_T *out1;
92 int id;
Bram Moolenaar9bf703d2019-11-30 19:44:38 +010093 int lastlist[2]; // 0: normal, 1: recursive
Bram Moolenaar423532e2013-05-29 21:14:42 +020094 int val;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020095};
96
97/*
98 * Structure used by the NFA matcher.
99 */
100typedef struct
101{
Bram Moolenaar9bf703d2019-11-30 19:44:38 +0100102 // These three members implement regprog_T
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200103 regengine_T *engine;
104 unsigned regflags;
Bram Moolenaarfda37292014-11-05 14:27:36 +0100105 unsigned re_engine;
Bram Moolenaar0270f382018-07-17 05:43:58 +0200106 unsigned re_flags;
107 int re_in_use;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200108
Bram Moolenaar9bf703d2019-11-30 19:44:38 +0100109 nfa_state_T *start; // points into state[]
Bram Moolenaard89616e2013-06-06 18:46:06 +0200110
Bram Moolenaar9bf703d2019-11-30 19:44:38 +0100111 int reganch; // pattern starts with ^
112 int regstart; // char at start of pattern
113 char_u *match_text; // plain text to match with
Bram Moolenaard89616e2013-06-06 18:46:06 +0200114
Bram Moolenaar9bf703d2019-11-30 19:44:38 +0100115 int has_zend; // pattern contains \ze
116 int has_backref; // pattern contains \1 .. \9
Bram Moolenaarefb23f22013-06-01 23:02:54 +0200117#ifdef FEAT_SYN_HL
118 int reghasz;
119#endif
Bram Moolenaar69afb7b2013-06-02 15:55:55 +0200120 char_u *pattern;
Bram Moolenaar9bf703d2019-11-30 19:44:38 +0100121 int nsubexp; // number of ()
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200122 int nstate;
Bram Moolenaar9bf703d2019-11-30 19:44:38 +0100123 nfa_state_T state[1]; // actually longer..
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200124} nfa_regprog_T;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000125
126/*
127 * Structure to be used for single-line matching.
128 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
129 * When there is no match, the pointer is NULL.
130 */
131typedef struct
132{
133 regprog_T *regprog;
134 char_u *startp[NSUBEXP];
135 char_u *endp[NSUBEXP];
136 int rm_ic;
137} regmatch_T;
138
139/*
140 * Structure to be used for multi-line matching.
141 * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
142 * and ends in line "endpos[no].lnum" just before column "endpos[no].col".
143 * The line numbers are relative to the first line, thus startpos[0].lnum is
144 * always 0.
145 * When there is no match, the line number is -1.
146 */
147typedef struct
148{
149 regprog_T *regprog;
150 lpos_T startpos[NSUBEXP];
151 lpos_T endpos[NSUBEXP];
152 int rmm_ic;
Bram Moolenaar9bf703d2019-11-30 19:44:38 +0100153 colnr_T rmm_maxcol; // when not zero: maximum column
Bram Moolenaar071d4272004-06-13 20:20:40 +0000154} regmmatch_T;
155
156/*
157 * Structure used to store external references: "\z\(\)" to "\z\1".
158 * Use a reference count to avoid the need to copy this around. When it goes
159 * from 1 to zero the matches need to be freed.
160 */
161typedef struct
162{
163 short refcnt;
164 char_u *matches[NSUBEXP];
165} reg_extmatch_T;
166
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200167struct regengine
168{
169 regprog_T *(*regcomp)(char_u*, int);
Bram Moolenaar473de612013-06-08 18:19:48 +0200170 void (*regfree)(regprog_T *);
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +0200171 int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, int);
172 long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, proftime_T *, int *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200173 char_u *expr;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200174};
175
Bram Moolenaar9bf703d2019-11-30 19:44:38 +0100176#endif // _REGEXP_H