Bram Moolenaar | edf3f97 | 2016-08-29 22:49:24 +0200 | [diff] [blame] | 1 | /* vi:set ts=8 sts=4 sw=4 noet: |
Bram Moolenaar | 071d427 | 2004-06-13 20:20:40 +0000 | [diff] [blame] | 2 | * |
| 3 | * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE |
| 4 | * |
| 5 | * This is NOT the original regular expression code as written by Henry |
| 6 | * Spencer. This code has been modified specifically for use with Vim, and |
| 7 | * should not be used apart from compiling Vim. If you want a good regular |
| 8 | * expression library, get the original code. |
| 9 | * |
| 10 | * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE |
| 11 | */ |
| 12 | |
| 13 | #ifndef _REGEXP_H |
| 14 | #define _REGEXP_H |
| 15 | |
| 16 | /* |
| 17 | * The number of sub-matches is limited to 10. |
| 18 | * The first one (index 0) is the whole match, referenced with "\0". |
| 19 | * The second one (index 1) is the first sub-match, referenced with "\1". |
| 20 | * This goes up to the tenth (index 9), referenced with "\9". |
| 21 | */ |
| 22 | #define NSUBEXP 10 |
| 23 | |
| 24 | /* |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 25 | * In the NFA engine: how many braces are allowed. |
| 26 | * TODO(RE): Use dynamic memory allocation instead of static, like here |
| 27 | */ |
| 28 | #define NFA_MAX_BRACES 20 |
| 29 | |
Bram Moolenaar | fda3729 | 2014-11-05 14:27:36 +0100 | [diff] [blame] | 30 | /* |
| 31 | * In the NFA engine: how many states are allowed |
| 32 | */ |
| 33 | #define NFA_MAX_STATES 100000 |
kylo252 | 9dac9b1 | 2022-03-27 20:05:17 +0100 | [diff] [blame] | 34 | #define NFA_TOO_EXPENSIVE (-1) |
Bram Moolenaar | fda3729 | 2014-11-05 14:27:36 +0100 | [diff] [blame] | 35 | |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 36 | // Which regexp engine to use? Needed for vim_regcomp(). |
| 37 | // Must match with 'regexpengine'. |
Bram Moolenaar | fda3729 | 2014-11-05 14:27:36 +0100 | [diff] [blame] | 38 | #define AUTOMATIC_ENGINE 0 |
| 39 | #define BACKTRACKING_ENGINE 1 |
| 40 | #define NFA_ENGINE 2 |
| 41 | |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 42 | typedef struct regengine regengine_T; |
| 43 | |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 44 | /* |
Bram Moolenaar | 071d427 | 2004-06-13 20:20:40 +0000 | [diff] [blame] | 45 | * Structure returned by vim_regcomp() to pass on to vim_regexec(). |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 46 | * This is the general structure. For the actual matcher, two specific |
| 47 | * structures are used. See code below. |
| 48 | */ |
| 49 | typedef struct regprog |
| 50 | { |
| 51 | regengine_T *engine; |
| 52 | unsigned regflags; |
Bram Moolenaar | 0270f38 | 2018-07-17 05:43:58 +0200 | [diff] [blame] | 53 | unsigned re_engine; // automatic, backtracking or nfa engine |
| 54 | unsigned re_flags; // second argument for vim_regcomp() |
| 55 | int re_in_use; // prog is being executed |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 56 | } regprog_T; |
| 57 | |
| 58 | /* |
| 59 | * Structure used by the back track matcher. |
Bram Moolenaar | 071d427 | 2004-06-13 20:20:40 +0000 | [diff] [blame] | 60 | * These fields are only to be used in regexp.c! |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 61 | * See regexp.c for an explanation. |
Bram Moolenaar | 071d427 | 2004-06-13 20:20:40 +0000 | [diff] [blame] | 62 | */ |
| 63 | typedef struct |
| 64 | { |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 65 | // These four members implement regprog_T |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 66 | regengine_T *engine; |
| 67 | unsigned regflags; |
Bram Moolenaar | fda3729 | 2014-11-05 14:27:36 +0100 | [diff] [blame] | 68 | unsigned re_engine; |
Bram Moolenaar | 0270f38 | 2018-07-17 05:43:58 +0200 | [diff] [blame] | 69 | unsigned re_flags; |
| 70 | int re_in_use; |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 71 | |
Bram Moolenaar | 071d427 | 2004-06-13 20:20:40 +0000 | [diff] [blame] | 72 | int regstart; |
| 73 | char_u reganch; |
| 74 | char_u *regmust; |
| 75 | int regmlen; |
Bram Moolenaar | efb23f2 | 2013-06-01 23:02:54 +0200 | [diff] [blame] | 76 | #ifdef FEAT_SYN_HL |
Bram Moolenaar | 071d427 | 2004-06-13 20:20:40 +0000 | [diff] [blame] | 77 | char_u reghasz; |
Bram Moolenaar | efb23f2 | 2013-06-01 23:02:54 +0200 | [diff] [blame] | 78 | #endif |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 79 | char_u program[1]; // actually longer.. |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 80 | } bt_regprog_T; |
| 81 | |
| 82 | /* |
| 83 | * Structure representing a NFA state. |
Bram Moolenaar | ad3ec76 | 2019-04-21 00:00:13 +0200 | [diff] [blame] | 84 | * An NFA state may have no outgoing edge, when it is a NFA_MATCH state. |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 85 | */ |
| 86 | typedef struct nfa_state nfa_state_T; |
| 87 | struct nfa_state |
| 88 | { |
| 89 | int c; |
| 90 | nfa_state_T *out; |
| 91 | nfa_state_T *out1; |
| 92 | int id; |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 93 | int lastlist[2]; // 0: normal, 1: recursive |
Bram Moolenaar | 423532e | 2013-05-29 21:14:42 +0200 | [diff] [blame] | 94 | int val; |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 95 | }; |
| 96 | |
| 97 | /* |
| 98 | * Structure used by the NFA matcher. |
| 99 | */ |
| 100 | typedef struct |
| 101 | { |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 102 | // These three members implement regprog_T |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 103 | regengine_T *engine; |
| 104 | unsigned regflags; |
Bram Moolenaar | fda3729 | 2014-11-05 14:27:36 +0100 | [diff] [blame] | 105 | unsigned re_engine; |
Bram Moolenaar | 0270f38 | 2018-07-17 05:43:58 +0200 | [diff] [blame] | 106 | unsigned re_flags; |
| 107 | int re_in_use; |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 108 | |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 109 | nfa_state_T *start; // points into state[] |
Bram Moolenaar | d89616e | 2013-06-06 18:46:06 +0200 | [diff] [blame] | 110 | |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 111 | int reganch; // pattern starts with ^ |
| 112 | int regstart; // char at start of pattern |
| 113 | char_u *match_text; // plain text to match with |
Bram Moolenaar | d89616e | 2013-06-06 18:46:06 +0200 | [diff] [blame] | 114 | |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 115 | int has_zend; // pattern contains \ze |
| 116 | int has_backref; // pattern contains \1 .. \9 |
Bram Moolenaar | efb23f2 | 2013-06-01 23:02:54 +0200 | [diff] [blame] | 117 | #ifdef FEAT_SYN_HL |
| 118 | int reghasz; |
| 119 | #endif |
Bram Moolenaar | 69afb7b | 2013-06-02 15:55:55 +0200 | [diff] [blame] | 120 | char_u *pattern; |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 121 | int nsubexp; // number of () |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 122 | int nstate; |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 123 | nfa_state_T state[1]; // actually longer.. |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 124 | } nfa_regprog_T; |
Bram Moolenaar | 071d427 | 2004-06-13 20:20:40 +0000 | [diff] [blame] | 125 | |
| 126 | /* |
| 127 | * Structure to be used for single-line matching. |
| 128 | * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". |
| 129 | * When there is no match, the pointer is NULL. |
| 130 | */ |
| 131 | typedef struct |
| 132 | { |
| 133 | regprog_T *regprog; |
| 134 | char_u *startp[NSUBEXP]; |
| 135 | char_u *endp[NSUBEXP]; |
| 136 | int rm_ic; |
| 137 | } regmatch_T; |
| 138 | |
| 139 | /* |
| 140 | * Structure to be used for multi-line matching. |
| 141 | * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col" |
| 142 | * and ends in line "endpos[no].lnum" just before column "endpos[no].col". |
| 143 | * The line numbers are relative to the first line, thus startpos[0].lnum is |
| 144 | * always 0. |
| 145 | * When there is no match, the line number is -1. |
| 146 | */ |
| 147 | typedef struct |
| 148 | { |
| 149 | regprog_T *regprog; |
| 150 | lpos_T startpos[NSUBEXP]; |
| 151 | lpos_T endpos[NSUBEXP]; |
| 152 | int rmm_ic; |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 153 | colnr_T rmm_maxcol; // when not zero: maximum column |
Bram Moolenaar | 071d427 | 2004-06-13 20:20:40 +0000 | [diff] [blame] | 154 | } regmmatch_T; |
| 155 | |
| 156 | /* |
| 157 | * Structure used to store external references: "\z\(\)" to "\z\1". |
| 158 | * Use a reference count to avoid the need to copy this around. When it goes |
| 159 | * from 1 to zero the matches need to be freed. |
| 160 | */ |
| 161 | typedef struct |
| 162 | { |
| 163 | short refcnt; |
| 164 | char_u *matches[NSUBEXP]; |
| 165 | } reg_extmatch_T; |
| 166 | |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 167 | struct regengine |
| 168 | { |
| 169 | regprog_T *(*regcomp)(char_u*, int); |
Bram Moolenaar | 473de61 | 2013-06-08 18:19:48 +0200 | [diff] [blame] | 170 | void (*regfree)(regprog_T *); |
Bram Moolenaar | fbd0b0a | 2017-06-17 18:44:21 +0200 | [diff] [blame] | 171 | int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, int); |
| 172 | long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, proftime_T *, int *); |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 173 | char_u *expr; |
Bram Moolenaar | fbc0d2e | 2013-05-19 19:40:29 +0200 | [diff] [blame] | 174 | }; |
| 175 | |
Bram Moolenaar | 9bf703d | 2019-11-30 19:44:38 +0100 | [diff] [blame] | 176 | #endif // _REGEXP_H |