blob: a3526e69755911c66068a349255f5500bb953c12 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar071d4272004-06-13 20:20:40 +00002 *
3 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
4 *
5 * This is NOT the original regular expression code as written by Henry
6 * Spencer. This code has been modified specifically for use with Vim, and
7 * should not be used apart from compiling Vim. If you want a good regular
8 * expression library, get the original code.
9 *
10 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
11 */
12
13#ifndef _REGEXP_H
14#define _REGEXP_H
15
16/*
17 * The number of sub-matches is limited to 10.
18 * The first one (index 0) is the whole match, referenced with "\0".
19 * The second one (index 1) is the first sub-match, referenced with "\1".
20 * This goes up to the tenth (index 9), referenced with "\9".
21 */
22#define NSUBEXP 10
23
24/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020025 * In the NFA engine: how many braces are allowed.
26 * TODO(RE): Use dynamic memory allocation instead of static, like here
27 */
28#define NFA_MAX_BRACES 20
29
Bram Moolenaarfda37292014-11-05 14:27:36 +010030/*
31 * In the NFA engine: how many states are allowed
32 */
33#define NFA_MAX_STATES 100000
34#define NFA_TOO_EXPENSIVE -1
35
36/* Which regexp engine to use? Needed for vim_regcomp().
37 * Must match with 'regexpengine'. */
38#define AUTOMATIC_ENGINE 0
39#define BACKTRACKING_ENGINE 1
40#define NFA_ENGINE 2
41
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020042typedef struct regengine regengine_T;
43
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020044/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000045 * Structure returned by vim_regcomp() to pass on to vim_regexec().
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020046 * This is the general structure. For the actual matcher, two specific
47 * structures are used. See code below.
48 */
49typedef struct regprog
50{
51 regengine_T *engine;
52 unsigned regflags;
Bram Moolenaarfda37292014-11-05 14:27:36 +010053 unsigned re_engine; /* automatic, backtracking or nfa engine */
54 unsigned re_flags; /* second argument for vim_regcomp() */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020055} regprog_T;
56
57/*
58 * Structure used by the back track matcher.
Bram Moolenaar071d4272004-06-13 20:20:40 +000059 * These fields are only to be used in regexp.c!
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020060 * See regexp.c for an explanation.
Bram Moolenaar071d4272004-06-13 20:20:40 +000061 */
62typedef struct
63{
Bram Moolenaarfda37292014-11-05 14:27:36 +010064 /* These four members implement regprog_T */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020065 regengine_T *engine;
66 unsigned regflags;
Bram Moolenaarfda37292014-11-05 14:27:36 +010067 unsigned re_engine;
68 unsigned re_flags; /* second argument for vim_regcomp() */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020069
Bram Moolenaar071d4272004-06-13 20:20:40 +000070 int regstart;
71 char_u reganch;
72 char_u *regmust;
73 int regmlen;
Bram Moolenaarefb23f22013-06-01 23:02:54 +020074#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +000075 char_u reghasz;
Bram Moolenaarefb23f22013-06-01 23:02:54 +020076#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020077 char_u program[1]; /* actually longer.. */
78} bt_regprog_T;
79
80/*
81 * Structure representing a NFA state.
82 * A NFA state may have no outgoing edge, when it is a NFA_MATCH state.
83 */
84typedef struct nfa_state nfa_state_T;
85struct nfa_state
86{
87 int c;
88 nfa_state_T *out;
89 nfa_state_T *out1;
90 int id;
Bram Moolenaardd2ccdf2013-06-03 12:17:04 +020091 int lastlist[2]; /* 0: normal, 1: recursive */
Bram Moolenaar423532e2013-05-29 21:14:42 +020092 int val;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020093};
94
95/*
96 * Structure used by the NFA matcher.
97 */
98typedef struct
99{
Bram Moolenaarfda37292014-11-05 14:27:36 +0100100 /* These three members implement regprog_T */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200101 regengine_T *engine;
102 unsigned regflags;
Bram Moolenaarfda37292014-11-05 14:27:36 +0100103 unsigned re_engine;
104 unsigned re_flags; /* second argument for vim_regcomp() */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200105
Bram Moolenaar69afb7b2013-06-02 15:55:55 +0200106 nfa_state_T *start; /* points into state[] */
Bram Moolenaard89616e2013-06-06 18:46:06 +0200107
108 int reganch; /* pattern starts with ^ */
109 int regstart; /* char at start of pattern */
Bram Moolenaar473de612013-06-08 18:19:48 +0200110 char_u *match_text; /* plain text to match with */
Bram Moolenaard89616e2013-06-06 18:46:06 +0200111
Bram Moolenaar57a285b2013-05-26 16:57:28 +0200112 int has_zend; /* pattern contains \ze */
Bram Moolenaar428e9872013-05-30 17:05:39 +0200113 int has_backref; /* pattern contains \1 .. \9 */
Bram Moolenaarefb23f22013-06-01 23:02:54 +0200114#ifdef FEAT_SYN_HL
115 int reghasz;
116#endif
Bram Moolenaar69afb7b2013-06-02 15:55:55 +0200117 char_u *pattern;
Bram Moolenaar963fee22013-05-26 21:47:28 +0200118 int nsubexp; /* number of () */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200119 int nstate;
Bram Moolenaar16619a22013-06-11 18:42:36 +0200120 nfa_state_T state[1]; /* actually longer.. */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200121} nfa_regprog_T;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000122
123/*
124 * Structure to be used for single-line matching.
125 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
126 * When there is no match, the pointer is NULL.
127 */
128typedef struct
129{
130 regprog_T *regprog;
131 char_u *startp[NSUBEXP];
132 char_u *endp[NSUBEXP];
133 int rm_ic;
134} regmatch_T;
135
136/*
137 * Structure to be used for multi-line matching.
138 * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
139 * and ends in line "endpos[no].lnum" just before column "endpos[no].col".
140 * The line numbers are relative to the first line, thus startpos[0].lnum is
141 * always 0.
142 * When there is no match, the line number is -1.
143 */
144typedef struct
145{
146 regprog_T *regprog;
147 lpos_T startpos[NSUBEXP];
148 lpos_T endpos[NSUBEXP];
149 int rmm_ic;
Bram Moolenaarbbebc852005-07-18 21:47:53 +0000150 colnr_T rmm_maxcol; /* when not zero: maximum column */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000151} regmmatch_T;
152
153/*
154 * Structure used to store external references: "\z\(\)" to "\z\1".
155 * Use a reference count to avoid the need to copy this around. When it goes
156 * from 1 to zero the matches need to be freed.
157 */
158typedef struct
159{
160 short refcnt;
161 char_u *matches[NSUBEXP];
162} reg_extmatch_T;
163
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200164struct regengine
165{
166 regprog_T *(*regcomp)(char_u*, int);
Bram Moolenaar473de612013-06-08 18:19:48 +0200167 void (*regfree)(regprog_T *);
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +0200168 int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, int);
169 long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, proftime_T *, int *);
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200170 char_u *expr;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200171};
172
Bram Moolenaar071d4272004-06-13 20:20:40 +0000173#endif /* _REGEXP_H */