blob: f44578d8f1e536bc016811c1935fdbc2d59ba034 [file] [log] [blame]
Bram Moolenaar071d4272004-06-13 20:20:40 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
4 *
5 * This is NOT the original regular expression code as written by Henry
6 * Spencer. This code has been modified specifically for use with Vim, and
7 * should not be used apart from compiling Vim. If you want a good regular
8 * expression library, get the original code.
9 *
10 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
11 */
12
13#ifndef _REGEXP_H
14#define _REGEXP_H
15
16/*
17 * The number of sub-matches is limited to 10.
18 * The first one (index 0) is the whole match, referenced with "\0".
19 * The second one (index 1) is the first sub-match, referenced with "\1".
20 * This goes up to the tenth (index 9), referenced with "\9".
21 */
22#define NSUBEXP 10
23
24/*
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020025 * In the NFA engine: how many braces are allowed.
26 * TODO(RE): Use dynamic memory allocation instead of static, like here
27 */
28#define NFA_MAX_BRACES 20
29
30typedef struct regengine regengine_T;
31
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020032/*
Bram Moolenaar071d4272004-06-13 20:20:40 +000033 * Structure returned by vim_regcomp() to pass on to vim_regexec().
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020034 * This is the general structure. For the actual matcher, two specific
35 * structures are used. See code below.
36 */
37typedef struct regprog
38{
39 regengine_T *engine;
40 unsigned regflags;
41} regprog_T;
42
43/*
44 * Structure used by the back track matcher.
Bram Moolenaar071d4272004-06-13 20:20:40 +000045 * These fields are only to be used in regexp.c!
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020046 * See regexp.c for an explanation.
Bram Moolenaar071d4272004-06-13 20:20:40 +000047 */
48typedef struct
49{
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020050 /* These two members implement regprog_T */
51 regengine_T *engine;
52 unsigned regflags;
53
Bram Moolenaar071d4272004-06-13 20:20:40 +000054 int regstart;
55 char_u reganch;
56 char_u *regmust;
57 int regmlen;
Bram Moolenaarefb23f22013-06-01 23:02:54 +020058#ifdef FEAT_SYN_HL
Bram Moolenaar071d4272004-06-13 20:20:40 +000059 char_u reghasz;
Bram Moolenaarefb23f22013-06-01 23:02:54 +020060#endif
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020061 char_u program[1]; /* actually longer.. */
62} bt_regprog_T;
63
64/*
65 * Structure representing a NFA state.
66 * A NFA state may have no outgoing edge, when it is a NFA_MATCH state.
67 */
68typedef struct nfa_state nfa_state_T;
69struct nfa_state
70{
71 int c;
72 nfa_state_T *out;
73 nfa_state_T *out1;
74 int id;
75 int lastlist;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020076 int negated;
Bram Moolenaar423532e2013-05-29 21:14:42 +020077 int val;
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020078};
79
80/*
81 * Structure used by the NFA matcher.
82 */
83typedef struct
84{
85 /* These two members implement regprog_T */
86 regengine_T *engine;
87 unsigned regflags;
88
89 regprog_T regprog;
90 nfa_state_T *start;
Bram Moolenaar57a285b2013-05-26 16:57:28 +020091 int has_zend; /* pattern contains \ze */
Bram Moolenaar428e9872013-05-30 17:05:39 +020092 int has_backref; /* pattern contains \1 .. \9 */
Bram Moolenaarefb23f22013-06-01 23:02:54 +020093#ifdef FEAT_SYN_HL
94 int reghasz;
95#endif
Bram Moolenaar963fee22013-05-26 21:47:28 +020096 int nsubexp; /* number of () */
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +020097 int nstate;
98 nfa_state_T state[0]; /* actually longer.. */
99} nfa_regprog_T;
Bram Moolenaar071d4272004-06-13 20:20:40 +0000100
101/*
102 * Structure to be used for single-line matching.
103 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
104 * When there is no match, the pointer is NULL.
105 */
106typedef struct
107{
108 regprog_T *regprog;
109 char_u *startp[NSUBEXP];
110 char_u *endp[NSUBEXP];
111 int rm_ic;
112} regmatch_T;
113
114/*
115 * Structure to be used for multi-line matching.
116 * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
117 * and ends in line "endpos[no].lnum" just before column "endpos[no].col".
118 * The line numbers are relative to the first line, thus startpos[0].lnum is
119 * always 0.
120 * When there is no match, the line number is -1.
121 */
122typedef struct
123{
124 regprog_T *regprog;
125 lpos_T startpos[NSUBEXP];
126 lpos_T endpos[NSUBEXP];
127 int rmm_ic;
Bram Moolenaarbbebc852005-07-18 21:47:53 +0000128 colnr_T rmm_maxcol; /* when not zero: maximum column */
Bram Moolenaar071d4272004-06-13 20:20:40 +0000129} regmmatch_T;
130
131/*
132 * Structure used to store external references: "\z\(\)" to "\z\1".
133 * Use a reference count to avoid the need to copy this around. When it goes
134 * from 1 to zero the matches need to be freed.
135 */
136typedef struct
137{
138 short refcnt;
139 char_u *matches[NSUBEXP];
140} reg_extmatch_T;
141
Bram Moolenaarfbc0d2e2013-05-19 19:40:29 +0200142struct regengine
143{
144 regprog_T *(*regcomp)(char_u*, int);
145 int (*regexec)(regmatch_T*, char_u*, colnr_T);
146#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
147 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
148 int (*regexec_nl)(regmatch_T*, char_u*, colnr_T);
149#endif
150 long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*);
151#ifdef DEBUG
152 char_u *expr;
153#endif
154};
155
Bram Moolenaar071d4272004-06-13 20:20:40 +0000156#endif /* _REGEXP_H */