blob: 0b0024453d2745649c190ccc98c6eaf5f826f7d9 [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in latin1 encoding
Bram Moolenaar6d91bcb2020-08-12 18:50:36 +02002
Bram Moolenaar22e42152016-04-03 14:02:02 +02003set encoding=latin1
4scriptencoding latin1
5
Bram Moolenaar5feabe02020-01-30 18:24:53 +01006source check.vim
7
Bram Moolenaar22e42152016-04-03 14:02:02 +02008func s:equivalence_test()
9 let str = "AÀÁÂÃÄÅ B C D EÈÉÊË F G H IÌÍÎÏ J K L M NÑ OÒÓÔÕÖØ P Q R S T UÙÚÛÜ V W X YÝ Z aàáâãäå b c d eèéêë f g h iìíîï j k l m nñ oòóôõöø p q r s t uùúûü v w x yýÿ z"
10 let groups = split(str)
11 for group1 in groups
12 for c in split(group1, '\zs')
13 " next statement confirms that equivalence class matches every
14 " character in group
15 call assert_match('^[[=' . c . '=]]*$', group1)
16 for group2 in groups
17 if group2 != group1
18 " next statement converts that equivalence class doesn't match
19 " a character in any other group
20 call assert_equal(-1, match(group2, '[[=' . c . '=]]'))
21 endif
22 endfor
23 endfor
24 endfor
25endfunc
26
27func Test_equivalence_re1()
28 set re=1
29 call s:equivalence_test()
30endfunc
31
32func Test_equivalence_re2()
33 set re=2
34 call s:equivalence_test()
35endfunc
Bram Moolenaarf5a39442016-08-16 21:04:41 +020036
37func Test_recursive_substitute()
38 new
39 s/^/\=execute("s#^##gn")
40 " check we are now not in the sandbox
41 call setwinvar(1, 'myvar', 1)
42 bwipe!
43endfunc
Bram Moolenaard5638832016-09-09 17:59:50 +020044
45func Test_nested_backrefs()
46 " Check example in change.txt.
47 new
48 for re in range(0, 2)
49 exe 'set re=' . re
50 call setline(1, 'aa ab x')
51 1s/\(\(a[a-d] \)*\)\(x\)/-\1- -\2- -\3-/
52 call assert_equal('-aa ab - -ab - -x-', getline(1))
53
54 call assert_equal('-aa ab - -ab - -x-', substitute('aa ab x', '\(\(a[a-d] \)*\)\(x\)', '-\1- -\2- -\3-', ''))
55 endfor
56 bwipe!
57 set re=0
58endfunc
Bram Moolenaar16b35782016-09-09 20:29:50 +020059
60func Test_eow_with_optional()
61 let expected = ['abc def', 'abc', 'def', '', '', '', '', '', '', '']
62 for re in range(0, 2)
63 exe 'set re=' . re
64 let actual = matchlist('abc def', '\(abc\>\)\?\s*\(def\)')
65 call assert_equal(expected, actual)
66 endfor
67endfunc
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +020068
69func Test_backref()
70 new
71 call setline(1, ['one', 'two', 'three', 'four', 'five'])
72 call assert_equal(3, search('\%#=1\(e\)\1'))
73 call assert_equal(3, search('\%#=2\(e\)\1'))
74 call assert_fails('call search("\\%#=1\\(e\\1\\)")', 'E65:')
75 call assert_fails('call search("\\%#=2\\(e\\1\\)")', 'E65:')
76 bwipe!
77endfunc
Bram Moolenaar6057ed42019-01-14 23:19:29 +010078
79func Test_multi_failure()
80 set re=1
81 call assert_fails('/a**', 'E61:')
82 call assert_fails('/a*\+', 'E62:')
83 call assert_fails('/a\{a}', 'E554:')
84 set re=2
85 call assert_fails('/a**', 'E871:')
86 call assert_fails('/a*\+', 'E871:')
Bram Moolenaar9b7bf9e2020-07-11 22:14:59 +020087 call assert_fails('/a\{a}', 'E554:')
Bram Moolenaar6057ed42019-01-14 23:19:29 +010088 set re=0
89endfunc
Bram Moolenaar5567ad42019-02-12 23:05:46 +010090
91func Test_recursive_addstate()
92 " This will call addstate() recursively until it runs into the limit.
93 let lnum = search('\v((){328}){389}')
94 call assert_equal(0, lnum)
95endfunc
Bram Moolenaar15bbd6e2019-02-13 20:31:50 +010096
97func Test_out_of_memory()
98 new
99 s/^/,n
100 " This will be slow...
101 call assert_fails('call search("\\v((n||<)+);")', 'E363:')
102endfunc
Bram Moolenaar985079c2019-02-16 17:07:47 +0100103
104func Test_get_equi_class()
105 new
106 " Incomplete equivalence class caused invalid memory access
107 s/^/[[=
108 call assert_equal(1, search(getline(1)))
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100109 s/.*/[[.
110 call assert_equal(1, search(getline(1)))
Bram Moolenaar985079c2019-02-16 17:07:47 +0100111endfunc
Bram Moolenaar8bfd9462019-02-16 18:07:57 +0100112
113func Test_rex_init()
114 set noincsearch
115 set re=1
116 new
117 setlocal iskeyword=a-z
118 call setline(1, ['abc', 'ABC'])
119 call assert_equal(1, search('[[:keyword:]]'))
120 new
121 setlocal iskeyword=A-Z
122 call setline(1, ['abc', 'ABC'])
123 call assert_equal(2, search('[[:keyword:]]'))
124 bwipe!
125 bwipe!
126 set re=0
127endfunc
Bram Moolenaara5483442019-02-17 20:17:02 +0100128
129func Test_range_with_newline()
130 new
131 call setline(1, "a")
132 call assert_equal(0, search("[ -*\\n- ]"))
133 call assert_equal(0, search("[ -*\\t-\\n]"))
134 bwipe!
135endfunc
Bram Moolenaar38f08e72019-02-20 22:04:32 +0100136
137func Test_pattern_compile_speed()
Bram Moolenaar5feabe02020-01-30 18:24:53 +0100138 CheckOption spellcapcheck
139 CheckFunction reltimefloat
140
Bram Moolenaar38f08e72019-02-20 22:04:32 +0100141 let start = reltime()
142 " this used to be very slow, not it should be about a second
143 set spc=\\v(((((Nxxxxxxx&&xxxx){179})+)+)+){179}
144 call assert_inrange(0.01, 10.0, reltimefloat(reltime(start)))
145 set spc=
146endfunc
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100147
148" Tests for regexp patterns without multi-byte support.
149func Test_regexp_single_line_pat()
150 " tl is a List of Lists with:
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200151 " regexp engines to test
152 " 0 - test with 'regexpengine' values 0 and 1
153 " 1 - test with 'regexpengine' values 0 and 2
154 " 2 - test with 'regexpengine' values 0, 1 and 2
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100155 " regexp pattern
156 " text to test the pattern on
157 " expected match (optional)
158 " expected submatch 1 (optional)
159 " expected submatch 2 (optional)
160 " etc.
161 " When there is no match use only the first two items.
162 let tl = []
163
164 call add(tl, [2, 'ab', 'aab', 'ab'])
165 call add(tl, [2, 'b', 'abcdef', 'b'])
166 call add(tl, [2, 'bc*', 'abccccdef', 'bcccc'])
167 call add(tl, [2, 'bc\{-}', 'abccccdef', 'b'])
168 call add(tl, [2, 'bc\{-}\(d\)', 'abccccdef', 'bccccd', 'd'])
169 call add(tl, [2, 'bc*', 'abbdef', 'b'])
170 call add(tl, [2, 'c*', 'ccc', 'ccc'])
171 call add(tl, [2, 'bc*', 'abdef', 'b'])
172 call add(tl, [2, 'c*', 'abdef', ''])
173 call add(tl, [2, 'bc\+', 'abccccdef', 'bcccc'])
174 call add(tl, [2, 'bc\+', 'abdef']) " no match
Bram Moolenaar004a6782020-04-11 17:09:31 +0200175 " match escape character in a string
176 call add(tl, [2, '.\e.', "one\<Esc>two", "e\<Esc>t"])
177 " match backspace character in a string
178 call add(tl, [2, '.\b.', "one\<C-H>two", "e\<C-H>t"])
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200179 " match newline character in a string
180 call add(tl, [2, 'o\nb', "foo\nbar", "o\nb"])
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100181
182 " operator \|
183 call add(tl, [2, 'a\|ab', 'cabd', 'a']) " alternation is ordered
184
185 call add(tl, [2, 'c\?', 'ccb', 'c'])
186 call add(tl, [2, 'bc\?', 'abd', 'b'])
187 call add(tl, [2, 'bc\?', 'abccd', 'bc'])
188
189 call add(tl, [2, '\va{1}', 'ab', 'a'])
190
191 call add(tl, [2, '\va{2}', 'aa', 'aa'])
192 call add(tl, [2, '\va{2}', 'caad', 'aa'])
193 call add(tl, [2, '\va{2}', 'aba'])
194 call add(tl, [2, '\va{2}', 'ab'])
195 call add(tl, [2, '\va{2}', 'abaa', 'aa'])
196 call add(tl, [2, '\va{2}', 'aaa', 'aa'])
197
198 call add(tl, [2, '\vb{1}', 'abca', 'b'])
199 call add(tl, [2, '\vba{2}', 'abaa', 'baa'])
200 call add(tl, [2, '\vba{3}', 'aabaac'])
201
202 call add(tl, [2, '\v(ab){1}', 'ab', 'ab', 'ab'])
203 call add(tl, [2, '\v(ab){1}', 'dabc', 'ab', 'ab'])
204 call add(tl, [2, '\v(ab){1}', 'acb'])
205
206 call add(tl, [2, '\v(ab){0,2}', 'acb', "", ""])
207 call add(tl, [2, '\v(ab){0,2}', 'ab', 'ab', 'ab'])
208 call add(tl, [2, '\v(ab){1,2}', 'ab', 'ab', 'ab'])
209 call add(tl, [2, '\v(ab){1,2}', 'ababc', 'abab', 'ab'])
210 call add(tl, [2, '\v(ab){2,4}', 'ababcab', 'abab', 'ab'])
211 call add(tl, [2, '\v(ab){2,4}', 'abcababa', 'abab', 'ab'])
212
213 call add(tl, [2, '\v(ab){2}', 'abab', 'abab', 'ab'])
214 call add(tl, [2, '\v(ab){2}', 'cdababe', 'abab', 'ab'])
215 call add(tl, [2, '\v(ab){2}', 'abac'])
216 call add(tl, [2, '\v(ab){2}', 'abacabab', 'abab', 'ab'])
217 call add(tl, [2, '\v((ab){2}){2}', 'abababab', 'abababab', 'abab', 'ab'])
218 call add(tl, [2, '\v((ab){2}){2}', 'abacabababab', 'abababab', 'abab', 'ab'])
219
220 call add(tl, [2, '\v(a{1}){1}', 'a', 'a', 'a'])
221 call add(tl, [2, '\v(a{2}){1}', 'aa', 'aa', 'aa'])
222 call add(tl, [2, '\v(a{2}){1}', 'aaac', 'aa', 'aa'])
223 call add(tl, [2, '\v(a{2}){1}', 'daaac', 'aa', 'aa'])
224 call add(tl, [2, '\v(a{1}){2}', 'daaac', 'aa', 'a'])
225 call add(tl, [2, '\v(a{1}){2}', 'aaa', 'aa', 'a'])
226 call add(tl, [2, '\v(a{2})+', 'adaac', 'aa', 'aa'])
227 call add(tl, [2, '\v(a{2})+', 'aa', 'aa', 'aa'])
228 call add(tl, [2, '\v(a{2}){1}', 'aa', 'aa', 'aa'])
229 call add(tl, [2, '\v(a{1}){2}', 'aa', 'aa', 'a'])
230 call add(tl, [2, '\v(a{1}){1}', 'a', 'a', 'a'])
231 call add(tl, [2, '\v(a{2}){2}', 'aaaa', 'aaaa', 'aa'])
232 call add(tl, [2, '\v(a{2}){2}', 'aaabaaaa', 'aaaa', 'aa'])
233
234 call add(tl, [2, '\v(a+){2}', 'dadaac', 'aa', 'a'])
235 call add(tl, [2, '\v(a{3}){2}', 'aaaaaaa', 'aaaaaa', 'aaa'])
236
237 call add(tl, [2, '\v(a{1,2}){2}', 'daaac', 'aaa', 'a'])
238 call add(tl, [2, '\v(a{1,3}){2}', 'daaaac', 'aaaa', 'a'])
239 call add(tl, [2, '\v(a{1,3}){2}', 'daaaaac', 'aaaaa', 'aa'])
240 call add(tl, [2, '\v(a{1,3}){3}', 'daac'])
241 call add(tl, [2, '\v(a{1,2}){2}', 'dac'])
242 call add(tl, [2, '\v(a+)+', 'daac', 'aa', 'aa'])
243 call add(tl, [2, '\v(a+)+', 'aaa', 'aaa', 'aaa'])
244 call add(tl, [2, '\v(a+){1,2}', 'aaa', 'aaa', 'aaa'])
245 call add(tl, [2, '\v(a+)(a+)', 'aaa', 'aaa', 'aa', 'a'])
246 call add(tl, [2, '\v(a{3})+', 'daaaac', 'aaa', 'aaa'])
247 call add(tl, [2, '\v(a|b|c)+', 'aacb', 'aacb', 'b'])
248 call add(tl, [2, '\v(a|b|c){2}', 'abcb', 'ab', 'b'])
249 call add(tl, [2, '\v(abc){2}', 'abcabd', ])
250 call add(tl, [2, '\v(abc){2}', 'abdabcabc','abcabc', 'abc'])
251
252 call add(tl, [2, 'a*', 'cc', ''])
253 call add(tl, [2, '\v(a*)+', 'cc', ''])
254 call add(tl, [2, '\v((ab)+)+', 'ab', 'ab', 'ab', 'ab'])
255 call add(tl, [2, '\v(((ab)+)+)+', 'ab', 'ab', 'ab', 'ab', 'ab'])
256 call add(tl, [2, '\v(((ab)+)+)+', 'dababc', 'abab', 'abab', 'abab', 'ab'])
257 call add(tl, [2, '\v(a{0,2})+', 'cc', ''])
258 call add(tl, [2, '\v(a*)+', '', ''])
259 call add(tl, [2, '\v((a*)+)+', '', ''])
260 call add(tl, [2, '\v((ab)*)+', '', ''])
261 call add(tl, [2, '\va{1,3}', 'aab', 'aa'])
262 call add(tl, [2, '\va{2,3}', 'abaa', 'aa'])
263
264 call add(tl, [2, '\v((ab)+|c*)+', 'abcccaba', 'abcccab', '', 'ab'])
265 call add(tl, [2, '\v(a{2})|(b{3})', 'bbabbbb', 'bbb', '', 'bbb'])
266 call add(tl, [2, '\va{2}|b{2}', 'abab'])
267 call add(tl, [2, '\v(a)+|(c)+', 'bbacbaacbbb', 'a', 'a'])
268 call add(tl, [2, '\vab{2,3}c', 'aabbccccccccccccc', 'abbc'])
269 call add(tl, [2, '\vab{2,3}c', 'aabbbccccccccccccc', 'abbbc'])
270 call add(tl, [2, '\vab{2,3}cd{2,3}e', 'aabbbcddee', 'abbbcdde'])
271 call add(tl, [2, '\va(bc){2}d', 'aabcbfbc' ])
272 call add(tl, [2, '\va*a{2}', 'a', ])
273 call add(tl, [2, '\va*a{2}', 'aa', 'aa' ])
274 call add(tl, [2, '\va*a{2}', 'aaa', 'aaa' ])
275 call add(tl, [2, '\va*a{2}', 'bbbabcc', ])
276 call add(tl, [2, '\va*b*|a*c*', 'a', 'a'])
277 call add(tl, [2, '\va{1}b{1}|a{1}b{1}', ''])
278
279 " submatches
280 call add(tl, [2, '\v(a)', 'ab', 'a', 'a'])
281 call add(tl, [2, '\v(a)(b)', 'ab', 'ab', 'a', 'b'])
282 call add(tl, [2, '\v(ab)(b)(c)', 'abbc', 'abbc', 'ab', 'b', 'c'])
283 call add(tl, [2, '\v((a)(b))', 'ab', 'ab', 'ab', 'a', 'b'])
284 call add(tl, [2, '\v(a)|(b)', 'ab', 'a', 'a'])
285
286 call add(tl, [2, '\v(a*)+', 'aaaa', 'aaaa', ''])
287 call add(tl, [2, 'x', 'abcdef'])
288
289 "
290 " Simple tests
291 "
292
293 " Search single groups
294 call add(tl, [2, 'ab', 'aab', 'ab'])
295 call add(tl, [2, 'ab', 'baced'])
296 call add(tl, [2, 'ab', ' ab ', 'ab'])
297
298 " Search multi-modifiers
299 call add(tl, [2, 'x*', 'xcd', 'x'])
300 call add(tl, [2, 'x*', 'xxxxxxxxxxxxxxxxsofijiojgf', 'xxxxxxxxxxxxxxxx'])
301 " empty match is good
302 call add(tl, [2, 'x*', 'abcdoij', ''])
303 " no match here
304 call add(tl, [2, 'x\+', 'abcdoin'])
305 call add(tl, [2, 'x\+', 'abcdeoijdfxxiuhfij', 'xx'])
306 call add(tl, [2, 'x\+', 'xxxxx', 'xxxxx'])
307 call add(tl, [2, 'x\+', 'abc x siufhiush xxxxxxxxx', 'x'])
308 call add(tl, [2, 'x\=', 'x sdfoij', 'x'])
309 call add(tl, [2, 'x\=', 'abc sfoij', '']) " empty match is good
310 call add(tl, [2, 'x\=', 'xxxxxxxxx c', 'x'])
311 call add(tl, [2, 'x\?', 'x sdfoij', 'x'])
312 " empty match is good
313 call add(tl, [2, 'x\?', 'abc sfoij', ''])
314 call add(tl, [2, 'x\?', 'xxxxxxxxxx c', 'x'])
315
316 call add(tl, [2, 'a\{0,0}', 'abcdfdoij', ''])
317 " same thing as 'a?'
318 call add(tl, [2, 'a\{0,1}', 'asiubid axxxaaa', 'a'])
319 " same thing as 'a\{0,1}'
320 call add(tl, [2, 'a\{1,0}', 'asiubid axxxaaa', 'a'])
321 call add(tl, [2, 'a\{3,6}', 'aa siofuh'])
322 call add(tl, [2, 'a\{3,6}', 'aaaaa asfoij afaa', 'aaaaa'])
323 call add(tl, [2, 'a\{3,6}', 'aaaaaaaa', 'aaaaaa'])
324 call add(tl, [2, 'a\{0}', 'asoiuj', ''])
325 call add(tl, [2, 'a\{2}', 'aaaa', 'aa'])
326 call add(tl, [2, 'a\{2}', 'iuash fiusahfliusah fiushfilushfi uhsaifuh askfj nasfvius afg aaaa sfiuhuhiushf', 'aa'])
327 call add(tl, [2, 'a\{2}', 'abcdefghijklmnopqrestuvwxyz1234567890'])
328 " same thing as 'a*'
329 call add(tl, [2, 'a\{0,}', 'oij sdigfusnf', ''])
330 call add(tl, [2, 'a\{0,}', 'aaaaa aa', 'aaaaa'])
331 call add(tl, [2, 'a\{2,}', 'sdfiougjdsafg'])
332 call add(tl, [2, 'a\{2,}', 'aaaaasfoij ', 'aaaaa'])
333 call add(tl, [2, 'a\{5,}', 'xxaaaaxxx '])
334 call add(tl, [2, 'a\{5,}', 'xxaaaaaxxx ', 'aaaaa'])
335 call add(tl, [2, 'a\{,0}', 'oidfguih iuhi hiu aaaa', ''])
336 call add(tl, [2, 'a\{,5}', 'abcd', 'a'])
337 call add(tl, [2, 'a\{,5}', 'aaaaaaaaaa', 'aaaaa'])
338 " leading star as normal char when \{} follows
339 call add(tl, [2, '^*\{4,}$', '***'])
340 call add(tl, [2, '^*\{4,}$', '****', '****'])
341 call add(tl, [2, '^*\{4,}$', '*****', '*****'])
342 " same thing as 'a*'
343 call add(tl, [2, 'a\{}', 'bbbcddiuhfcd', ''])
344 call add(tl, [2, 'a\{}', 'aaaaioudfh coisf jda', 'aaaa'])
345
346 call add(tl, [2, 'a\{-0,0}', 'abcdfdoij', ''])
347 " anti-greedy version of 'a?'
348 call add(tl, [2, 'a\{-0,1}', 'asiubid axxxaaa', ''])
349 call add(tl, [2, 'a\{-3,6}', 'aa siofuh'])
350 call add(tl, [2, 'a\{-3,6}', 'aaaaa asfoij afaa', 'aaa'])
351 call add(tl, [2, 'a\{-3,6}', 'aaaaaaaa', 'aaa'])
352 call add(tl, [2, 'a\{-0}', 'asoiuj', ''])
353 call add(tl, [2, 'a\{-2}', 'aaaa', 'aa'])
354 call add(tl, [2, 'a\{-2}', 'abcdefghijklmnopqrestuvwxyz1234567890'])
355 call add(tl, [2, 'a\{-0,}', 'oij sdigfusnf', ''])
356 call add(tl, [2, 'a\{-0,}', 'aaaaa aa', ''])
357 call add(tl, [2, 'a\{-2,}', 'sdfiougjdsafg'])
358 call add(tl, [2, 'a\{-2,}', 'aaaaasfoij ', 'aa'])
359 call add(tl, [2, 'a\{-,0}', 'oidfguih iuhi hiu aaaa', ''])
360 call add(tl, [2, 'a\{-,5}', 'abcd', ''])
361 call add(tl, [2, 'a\{-,5}', 'aaaaaaaaaa', ''])
362 " anti-greedy version of 'a*'
363 call add(tl, [2, 'a\{-}', 'bbbcddiuhfcd', ''])
364 call add(tl, [2, 'a\{-}', 'aaaaioudfh coisf jda', ''])
365
366 " Test groups of characters and submatches
367 call add(tl, [2, '\(abc\)*', 'abcabcabc', 'abcabcabc', 'abc'])
368 call add(tl, [2, '\(ab\)\+', 'abababaaaaa', 'ababab', 'ab'])
369 call add(tl, [2, '\(abaaaaa\)*cd', 'cd', 'cd', ''])
370 call add(tl, [2, '\(test1\)\? \(test2\)\?', 'test1 test3', 'test1 ', 'test1', ''])
371 call add(tl, [2, '\(test1\)\= \(test2\) \(test4443\)\=', ' test2 test4443 yupiiiiiiiiiii', ' test2 test4443', '', 'test2', 'test4443'])
372 call add(tl, [2, '\(\(sub1\) hello \(sub 2\)\)', 'asterix sub1 hello sub 2 obelix', 'sub1 hello sub 2', 'sub1 hello sub 2', 'sub1', 'sub 2'])
373 call add(tl, [2, '\(\(\(yyxxzz\)\)\)', 'abcdddsfiusfyyzzxxyyxxzz', 'yyxxzz', 'yyxxzz', 'yyxxzz', 'yyxxzz'])
374 call add(tl, [2, '\v((ab)+|c+)+', 'abcccaba', 'abcccab', 'ab', 'ab'])
375 call add(tl, [2, '\v((ab)|c*)+', 'abcccaba', 'abcccab', '', 'ab'])
376 call add(tl, [2, '\v(a(c*)+b)+', 'acbababaaa', 'acbabab', 'ab', ''])
377 call add(tl, [2, '\v(a|b*)+', 'aaaa', 'aaaa', ''])
378 call add(tl, [2, '\p*', 'aá ', 'aá '])
379
380 " Test greedy-ness and lazy-ness
381 call add(tl, [2, 'a\{-2,7}','aaaaaaaaaaaaa', 'aa'])
382 call add(tl, [2, 'a\{-2,7}x','aaaaaaaaax', 'aaaaaaax'])
383 call add(tl, [2, 'a\{2,7}','aaaaaaaaaaaaaaaaaaaa', 'aaaaaaa'])
384 call add(tl, [2, 'a\{2,7}x','aaaaaaaaax', 'aaaaaaax'])
385 call add(tl, [2, '\vx(.{-,8})yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz','ayxa','xayzxayz'])
386 call add(tl, [2, '\vx(.*)yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz', 'ayxayzxayzxa',''])
387 call add(tl, [2, '\v(a{1,2}){-2,3}','aaaaaaa','aaaa','aa'])
388 call add(tl, [2, '\v(a{-1,3})+', 'aa', 'aa', 'a'])
389 call add(tl, [2, '^\s\{-}\zs\( x\|x$\)', ' x', ' x', ' x'])
390 call add(tl, [2, '^\s\{-}\zs\(x\| x$\)', ' x', ' x', ' x'])
391 call add(tl, [2, '^\s\{-}\ze\(x\| x$\)', ' x', '', ' x'])
392 call add(tl, [2, '^\(\s\{-}\)\(x\| x$\)', ' x', ' x', '', ' x'])
393
394 " Test Character classes
395 call add(tl, [2, '\d\+e\d\d','test 10e23 fd','10e23'])
396
397 " Test collections and character range []
398 call add(tl, [2, '\v[a]', 'abcd', 'a'])
399 call add(tl, [2, 'a[bcd]', 'abcd', 'ab'])
400 call add(tl, [2, 'a[b-d]', 'acbd', 'ac'])
401 call add(tl, [2, '[a-d][e-f][x-x]d', 'cexdxx', 'cexd'])
402 call add(tl, [2, '\v[[:alpha:]]+', 'abcdefghijklmnopqrstuvwxyz6','abcdefghijklmnopqrstuvwxyz'])
403 call add(tl, [2, '[[:alpha:]\+]', '6x8','x'])
404 call add(tl, [2, '[^abc]\+','abcabcabc'])
405 call add(tl, [2, '[^abc]','defghiasijvoinasoiunbvb','d'])
406 call add(tl, [2, '[^abc]\+','ddddddda','ddddddd'])
407 call add(tl, [2, '[^a-d]\+','aaaAAAZIHFNCddd','AAAZIHFNC'])
408 call add(tl, [2, '[a-f]*','iiiiiiii',''])
409 call add(tl, [2, '[a-f]*','abcdefgh','abcdef'])
410 call add(tl, [2, '[^a-f]\+','abcdefgh','gh'])
411 call add(tl, [2, '[a-c]\{-3,6}','abcabc','abc'])
412 call add(tl, [2, '[^[:alpha:]]\+','abcccadfoij7787ysf287yrnccdu','7787'])
413 call add(tl, [2, '[-a]', '-', '-'])
414 call add(tl, [2, '[a-]', '-', '-'])
415 call add(tl, [2, '[a-f]*\c','ABCDEFGH','ABCDEF'])
416 call add(tl, [2, '[abc][xyz]\c','-af-AF-BY--','BY'])
417 " filename regexp
418 call add(tl, [2, '[-./[:alnum:]_~]\+', 'log13.file', 'log13.file'])
419 " special chars
420 call add(tl, [2, '[\]\^\-\\]\+', '\^\\\-\---^', '\^\\\-\---^'])
421 " collation elem
422 call add(tl, [2, '[[.a.]]\+', 'aa', 'aa'])
423 " middle of regexp
424 call add(tl, [2, 'abc[0-9]*ddd', 'siuhabc ii'])
425 call add(tl, [2, 'abc[0-9]*ddd', 'adf abc44482ddd oijs', 'abc44482ddd'])
426 call add(tl, [2, '\_[0-9]\+', 'asfi9888u', '9888'])
427 call add(tl, [2, '[0-9\n]\+', 'asfi9888u', '9888'])
428 call add(tl, [2, '\_[0-9]\+', "asfi\n9888u", "\n9888"])
429 call add(tl, [2, '\_f', " \na ", "\n"])
430 call add(tl, [2, '\_f\+', " \na ", "\na"])
431 call add(tl, [2, '[0-9A-Za-z-_.]\+', " @0_a.A-{ ", "0_a.A-"])
432
433 " Test start/end of line, start/end of file
434 call add(tl, [2, '^a.', "a_\nb ", "a_"])
435 call add(tl, [2, '^a.', "b a \na_"])
436 call add(tl, [2, '.a$', " a\n "])
437 call add(tl, [2, '.a$', " a b\n_a", "_a"])
438 call add(tl, [2, '\%^a.', "a a\na", "a "])
439 call add(tl, [2, '\%^a', " a \na "])
440 call add(tl, [2, '.a\%$', " a\n "])
441 call add(tl, [2, '.a\%$', " a\n_a", "_a"])
442
443 " Test recognition of character classes
444 call add(tl, [2, '[0-7]\+', 'x0123456789x', '01234567'])
445 call add(tl, [2, '[^0-7]\+', '0a;X+% 897', 'a;X+% 89'])
446 call add(tl, [2, '[0-9]\+', 'x0123456789x', '0123456789'])
447 call add(tl, [2, '[^0-9]\+', '0a;X+% 9', 'a;X+% '])
448 call add(tl, [2, '[0-9a-fA-F]\+', 'x0189abcdefg', '0189abcdef'])
449 call add(tl, [2, '[^0-9A-Fa-f]\+', '0189g;X+% ab', 'g;X+% '])
450 call add(tl, [2, '[a-z_A-Z0-9]\+', ';+aso_SfOij ', 'aso_SfOij'])
451 call add(tl, [2, '[^a-z_A-Z0-9]\+', 'aSo_;+% sfOij', ';+% '])
452 call add(tl, [2, '[a-z_A-Z]\+', '0abyz_ABYZ;', 'abyz_ABYZ'])
453 call add(tl, [2, '[^a-z_A-Z]\+', 'abAB_09;+% yzYZ', '09;+% '])
454 call add(tl, [2, '[a-z]\+', '0abcxyz1', 'abcxyz'])
455 call add(tl, [2, '[a-z]\+', 'AabxyzZ', 'abxyz'])
456 call add(tl, [2, '[^a-z]\+', 'a;X09+% x', ';X09+% '])
457 call add(tl, [2, '[^a-z]\+', 'abX0;%yz', 'X0;%'])
458 call add(tl, [2, '[a-zA-Z]\+', '0abABxzXZ9', 'abABxzXZ'])
459 call add(tl, [2, '[^a-zA-Z]\+', 'ab09_;+ XZ', '09_;+ '])
460 call add(tl, [2, '[A-Z]\+', 'aABXYZz', 'ABXYZ'])
461 call add(tl, [2, '[^A-Z]\+', 'ABx0;%YZ', 'x0;%'])
462 call add(tl, [2, '[a-z]\+\c', '0abxyzABXYZ;', 'abxyzABXYZ'])
463 call add(tl, [2, '[A-Z]\+\c', '0abABxzXZ9', 'abABxzXZ'])
464 call add(tl, [2, '\c[^a-z]\+', 'ab09_;+ XZ', '09_;+ '])
465 call add(tl, [2, '\c[^A-Z]\+', 'ab09_;+ XZ', '09_;+ '])
466 call add(tl, [2, '\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa'])
467
468 " Tests for \z features
469 " match ends at \ze
470 call add(tl, [2, 'xx \ze test', 'xx '])
471 call add(tl, [2, 'abc\zeend', 'oij abcend', 'abc'])
472 call add(tl, [2, 'aa\zebb\|aaxx', ' aabb ', 'aa'])
473 call add(tl, [2, 'aa\zebb\|aaxx', ' aaxx ', 'aaxx'])
474 call add(tl, [2, 'aabb\|aa\zebb', ' aabb ', 'aabb'])
475 call add(tl, [2, 'aa\zebb\|aaebb', ' aabb ', 'aa'])
476 " match starts at \zs
477 call add(tl, [2, 'abc\zsdd', 'ddabcddxyzt', 'dd'])
478 call add(tl, [2, 'aa \zsax', ' ax'])
479 call add(tl, [2, 'abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match'])
480 call add(tl, [2, '\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last'])
481 call add(tl, [2, '\>\zs.', 'aword. ', '.'])
482 call add(tl, [2, '\s\+\ze\[/\|\s\zs\s\+', 'is [a t', ' '])
483
484 " Tests for \@= and \& features
485 call add(tl, [2, 'abc\@=', 'abc', 'ab'])
486 call add(tl, [2, 'abc\@=cd', 'abcd', 'abcd'])
487 call add(tl, [2, 'abc\@=', 'ababc', 'ab'])
488 " will never match, no matter the input text
489 call add(tl, [2, 'abcd\@=e', 'abcd'])
490 " will never match
491 call add(tl, [2, 'abcd\@=e', 'any text in here ... '])
492 call add(tl, [2, '\v(abc)@=..', 'xabcd', 'ab', 'abc'])
493 call add(tl, [2, '\(.*John\)\@=.*Bob', 'here is John, and here is B'])
494 call add(tl, [2, '\(John.*\)\@=.*Bob', 'John is Bobs friend', 'John is Bob', 'John is Bobs friend'])
495 call add(tl, [2, '\<\S\+\())\)\@=', '$((i=i+1))', 'i=i+1', '))'])
496 call add(tl, [2, '.*John\&.*Bob', 'here is John, and here is B'])
497 call add(tl, [2, '.*John\&.*Bob', 'John is Bobs friend', 'John is Bob'])
498 call add(tl, [2, '\v(test1)@=.*yep', 'this is a test1, yep it is', 'test1, yep', 'test1'])
499 call add(tl, [2, 'foo\(bar\)\@!', 'foobar'])
500 call add(tl, [2, 'foo\(bar\)\@!', 'foo bar', 'foo'])
501 call add(tl, [2, 'if \(\(then\)\@!.\)*$', ' if then else'])
502 call add(tl, [2, 'if \(\(then\)\@!.\)*$', ' if else ', 'if else ', ' '])
503 call add(tl, [2, '\(foo\)\@!bar', 'foobar', 'bar'])
504 call add(tl, [2, '\(foo\)\@!...bar', 'foobar'])
505 call add(tl, [2, '^\%(.*bar\)\@!.*\zsfoo', ' bar foo '])
506 call add(tl, [2, '^\%(.*bar\)\@!.*\zsfoo', ' foo bar '])
507 call add(tl, [2, '^\%(.*bar\)\@!.*\zsfoo', ' foo xxx ', 'foo'])
508 call add(tl, [2, '[ ]\@!\p\%([ ]\@!\p\)*:', 'implicit mappings:', 'mappings:'])
509 call add(tl, [2, '[ ]\@!\p\([ ]\@!\p\)*:', 'implicit mappings:', 'mappings:', 's'])
510 call add(tl, [2, 'm\k\+_\@=\%(_\@!\k\)\@<=\k\+e', 'mx__xe', 'mx__xe'])
511 call add(tl, [2, '\%(\U\@<=S\k*\|S\l\)R', 'SuR', 'SuR'])
512
513 " Combining different tests and features
514 call add(tl, [2, '[[:alpha:]]\{-2,6}', '787abcdiuhsasiuhb4', 'ab'])
515 call add(tl, [2, '', 'abcd', ''])
516 call add(tl, [2, '\v(())', 'any possible text', ''])
517 call add(tl, [2, '\v%(ab(xyz)c)', ' abxyzc ', 'abxyzc', 'xyz'])
518 call add(tl, [2, '\v(test|)empty', 'tesempty', 'empty', ''])
519 call add(tl, [2, '\v(a|aa)(a|aa)', 'aaa', 'aa', 'a', 'a'])
520
521 " \%u and friends
522 call add(tl, [2, '\%d32', 'yes no', ' '])
523 call add(tl, [2, '\%o40', 'yes no', ' '])
524 call add(tl, [2, '\%x20', 'yes no', ' '])
525 call add(tl, [2, '\%u0020', 'yes no', ' '])
526 call add(tl, [2, '\%U00000020', 'yes no', ' '])
527 call add(tl, [2, '\%d0', "yes\x0ano", "\x0a"])
528
529 "" \%[abc]
530 call add(tl, [2, 'foo\%[bar]', 'fobar'])
531 call add(tl, [2, 'foo\%[bar]', 'foobar', 'foobar'])
532 call add(tl, [2, 'foo\%[bar]', 'fooxx', 'foo'])
533 call add(tl, [2, 'foo\%[bar]', 'foobxx', 'foob'])
534 call add(tl, [2, 'foo\%[bar]', 'foobaxx', 'fooba'])
535 call add(tl, [2, 'foo\%[bar]', 'foobarxx', 'foobar'])
536 call add(tl, [2, 'foo\%[bar]x', 'foobxx', 'foobx'])
537 call add(tl, [2, 'foo\%[bar]x', 'foobarxx', 'foobarx'])
538 call add(tl, [2, '\%[bar]x', 'barxx', 'barx'])
539 call add(tl, [2, '\%[bar]x', 'bxx', 'bx'])
540 call add(tl, [2, '\%[bar]x', 'xxx', 'x'])
541 call add(tl, [2, 'b\%[[ao]r]', 'bar bor', 'bar'])
542 call add(tl, [2, 'b\%[[]]r]', 'b]r bor', 'b]r'])
543 call add(tl, [2, '@\%[\w\-]*', '<http://john.net/pandoc/>[@pandoc]', '@pandoc'])
544
545 " Alternatives, must use first longest match
546 call add(tl, [2, 'goo\|go', 'google', 'goo'])
547 call add(tl, [2, '\<goo\|\<go', 'google', 'goo'])
548 call add(tl, [2, '\<goo\|go', 'google', 'goo'])
549
550 " Back references
551 call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc'])
552 call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
553 call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
554 call add(tl, [2, '\(\d*\)a \1b', ' a b ', 'a b', ''])
555 call add(tl, [2, '^.\(.\).\_..\1.', "aaa\naaa\nb", "aaa\naaa", 'a'])
556 call add(tl, [2, '^.*\.\(.*\)/.\+\(\1\)\@<!$', 'foo.bat/foo.com', 'foo.bat/foo.com', 'bat'])
557 call add(tl, [2, '^.*\.\(.*\)/.\+\(\1\)\@<!$', 'foo.bat/foo.bat'])
558 call add(tl, [2, '^.*\.\(.*\)/.\+\(\1\)\@<=$', 'foo.bat/foo.bat', 'foo.bat/foo.bat', 'bat', 'bat'])
559 call add(tl, [2, '\\\@<!\${\(\d\+\%(:.\{-}\)\?\\\@<!\)}', '2013-06-27${0}', '${0}', '0'])
560 call add(tl, [2, '^\(a*\)\1$', 'aaaaaaaa', 'aaaaaaaa', 'aaaa'])
561 call add(tl, [2, '^\(a\{-2,}\)\1\+$', 'aaaaaaaaa', 'aaaaaaaaa', 'aaa'])
562
563 " Look-behind with limit
564 call add(tl, [2, '<\@<=span.', 'xxspanxx<spanyyy', 'spany'])
565 call add(tl, [2, '<\@1<=span.', 'xxspanxx<spanyyy', 'spany'])
566 call add(tl, [2, '<\@2<=span.', 'xxspanxx<spanyyy', 'spany'])
567 call add(tl, [2, '\(<<\)\@<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
568 call add(tl, [2, '\(<<\)\@1<=span.', 'xxspanxxxx<spanxx<<spanyyy'])
569 call add(tl, [2, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
570 call add(tl, [2, '\(foo\)\@<!bar.', 'xx foobar1 xbar2 xx', 'bar2'])
571
572 " look-behind match in front of a zero-width item
573 call add(tl, [2, '\v\C%(<Last Changed:\s+)@<=.*$', '" test header'])
574 call add(tl, [2, '\v\C%(<Last Changed:\s+)@<=.*$', '" Last Changed: 1970', '1970'])
575 call add(tl, [2, '\(foo\)\@<=\>', 'foobar'])
576 call add(tl, [2, '\(foo\)\@<=\>', 'barfoo', '', 'foo'])
577 call add(tl, [2, '\(foo\)\@<=.*', 'foobar', 'bar', 'foo'])
578
579 " complicated look-behind match
580 call add(tl, [2, '\(r\@<=\|\w\@<!\)\/', 'x = /word/;', '/'])
581 call add(tl, [2, '^[a-z]\+\ze \&\(asdf\)\@<!', 'foo bar', 'foo'])
582
583 "" \@>
584 call add(tl, [2, '\(a*\)\@>a', 'aaaa'])
585 call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa'])
586 call add(tl, [2, '^\(.\{-}b\)\@>.', ' abcbd', ' abc', ' ab'])
587 call add(tl, [2, '\(.\{-}\)\(\)\@>$', 'abc', 'abc', 'abc', ''])
588 " TODO: BT engine does not restore submatch after failure
589 call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa'])
590
591 " "\_" prepended negated collection matches EOL
592 call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
593 call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
594
595 " Requiring lots of states.
596 call add(tl, [2, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"])
597
598 " Skip adding state twice
599 call add(tl, [2, '^\%(\%(^\s*#\s*if\>\|#\s*if\)\)\(\%>1c.*$\)\@=', "#if FOO", "#if", ' FOO'])
600
601 " Test \%V atom
602 call add(tl, [2, '\%>70vGesamt', 'Jean-Michel Charlier & Victor Hubinon\Gesamtausgabe [Salleck] Buck Danny {Jean-Michel Charlier & Victor Hubinon}\Gesamtausgabe', 'Gesamt'])
603
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200604 " Test for ignoring case and matching repeated characters
605 call add(tl, [2, '\cb\+', 'aAbBbBcC', 'bBbB'])
606
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100607 " Run the tests
608 for t in tl
609 let re = t[0]
610 let pat = t[1]
611 let text = t[2]
612 let matchidx = 3
613 for engine in [0, 1, 2]
614 if engine == 2 && re == 0 || engine == 1 && re == 1
615 continue
616 endif
617 let &regexpengine = engine
618 try
619 let l = matchlist(text, pat)
620 catch
621 call assert_report('Error ' . engine . ': pat: \"' . pat
622 \ . '\", text: \"' . text . '\", caused an exception: \"'
623 \ . v:exception . '\"')
624 endtry
625 " check the match itself
626 if len(l) == 0 && len(t) > matchidx
627 call assert_report('Error ' . engine . ': pat: \"' . pat
628 \ . '\", text: \"' . text . '\", did not match, expected: \"'
629 \ . t[matchidx] . '\"')
630 elseif len(l) > 0 && len(t) == matchidx
631 call assert_report('Error ' . engine . ': pat: \"' . pat
632 \ . '\", text: \"' . text . '\", match: \"' . l[0]
633 \ . '\", expected no match')
634 elseif len(t) > matchidx && l[0] != t[matchidx]
635 call assert_report('Error ' . engine . ': pat: \"' . pat
636 \ . '\", text: \"' . text . '\", match: \"' . l[0]
637 \ . '\", expected: \"' . t[matchidx] . '\"')
638 else
639 " Test passed
640 endif
641
642 " check all the nine submatches
643 if len(l) > 0
644 for i in range(1, 9)
645 if len(t) <= matchidx + i
646 let e = ''
647 else
648 let e = t[matchidx + i]
649 endif
650 if l[i] != e
651 call assert_report('Error ' . engine . ': pat: \"' . pat
652 \ . '\", text: \"' . text . '\", submatch ' . i . ': \"'
653 \ . l[i] . '\", expected: \"' . e . '\"')
654 endif
655 endfor
656 unlet i
657 endif
658 endfor
659 endfor
660
661 unlet t tl e l
662endfunc
663
664" Tests for multi-line regexp patterns without multi-byte support.
665func Test_regexp_multiline_pat()
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200666 " tl is a List of Lists with:
667 " regexp engines to test
668 " 0 - test with 'regexpengine' values 0 and 1
669 " 1 - test with 'regexpengine' values 0 and 2
670 " 2 - test with 'regexpengine' values 0, 1 and 2
671 " regexp pattern
672 " List with text to test the pattern on
673 " List with the expected match
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100674 let tl = []
675
676 " back references
677 call add(tl, [2, '^.\(.\).\_..\1.', ['aaa', 'aaa', 'b'], ['XX', 'b']])
678 call add(tl, [2, '\v.*\/(.*)\n.*\/\1$', ['./Dir1/Dir2/zyxwvuts.txt', './Dir1/Dir2/abcdefgh.bat', '', './Dir1/Dir2/file1.txt', './OtherDir1/OtherDir2/file1.txt'], ['./Dir1/Dir2/zyxwvuts.txt', './Dir1/Dir2/abcdefgh.bat', '', 'XX']])
679
680 " line breaks
681 call add(tl, [2, '\S.*\nx', ['abc', 'def', 'ghi', 'xjk', 'lmn'], ['abc', 'def', 'XXjk', 'lmn']])
682
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200683 " Any single character or end-of-line
684 call add(tl, [2, '\_.\+', ['a', 'b', 'c'], ['XX']])
685 " Any identifier or end-of-line
686 call add(tl, [2, '\_i\+', ['a', 'b', ';', '2'], ['XX;XX']])
687 " Any identifier but excluding digits or end-of-line
688 call add(tl, [2, '\_I\+', ['a', 'b', ';', '2'], ['XX;XX2XX']])
689 " Any keyword or end-of-line
690 call add(tl, [2, '\_k\+', ['a', 'b', '=', '2'], ['XX=XX']])
691 " Any keyword but excluding digits or end-of-line
692 call add(tl, [2, '\_K\+', ['a', 'b', '=', '2'], ['XX=XX2XX']])
693 " Any filename character or end-of-line
694 call add(tl, [2, '\_f\+', ['a', 'b', '.', '5'], ['XX']])
695 " Any filename character but excluding digits or end-of-line
696 call add(tl, [2, '\_F\+', ['a', 'b', '.', '5'], ['XX5XX']])
697 " Any printable character or end-of-line
698 call add(tl, [2, '\_p\+', ['a', 'b', '=', '4'], ['XX']])
699 " Any printable character excluding digits or end-of-line
700 call add(tl, [2, '\_P\+', ['a', 'b', '=', '4'], ['XX4XX']])
701 " Any whitespace character or end-of-line
702 call add(tl, [2, '\_s\+', [' ', ' ', 'a', 'b'], ['XXaXXbXX']])
703 " Any non-whitespace character or end-of-line
704 call add(tl, [2, '\_S\+', [' ', ' ', 'a', 'b'], [' XX XX']])
705 " Any decimal digit or end-of-line
706 call add(tl, [2, '\_d\+', ['1', 'a', '2', 'b', '3'], ['XXaXXbXX']])
707 " Any non-decimal digit or end-of-line
708 call add(tl, [2, '\_D\+', ['1', 'a', '2', 'b', '3'], ['1XX2XX3XX']])
709 " Any hexadecimal digit or end-of-line
710 call add(tl, [2, '\_x\+', ['1', 'a', 'g', '9', '8'], ['XXgXX']])
711 " Any non-hexadecimal digit or end-of-line
712 call add(tl, [2, '\_X\+', ['1', 'a', 'g', '9', '8'], ['1XXaXX9XX8XX']])
713 " Any octal digit or end-of-line
714 call add(tl, [2, '\_o\+', ['0', '7', '8', '9', '0'], ['XX8XX9XX']])
715 " Any non-octal digit or end-of-line
716 call add(tl, [2, '\_O\+', ['0', '7', '8', '9', '0'], ['0XX7XX0XX']])
717 " Any word character or end-of-line
718 call add(tl, [2, '\_w\+', ['A', 'B', '=', 'C', 'D'], ['XX=XX']])
719 " Any non-word character or end-of-line
720 call add(tl, [2, '\_W\+', ['A', 'B', '=', 'C', 'D'], ['AXXBXXCXXDXX']])
721 " Any head-of-word character or end-of-line
722 call add(tl, [2, '\_h\+', ['a', '1', 'b', '2', 'c'], ['XX1XX2XX']])
723 " Any non-head-of-word character or end-of-line
724 call add(tl, [2, '\_H\+', ['a', '1', 'b', '2', 'c'], ['aXXbXXcXX']])
725 " Any alphabetic character or end-of-line
726 call add(tl, [2, '\_a\+', ['a', '1', 'b', '2', 'c'], ['XX1XX2XX']])
727 " Any non-alphabetic character or end-of-line
728 call add(tl, [2, '\_A\+', ['a', '1', 'b', '2', 'c'], ['aXXbXXcXX']])
729 " Any lowercase character or end-of-line
730 call add(tl, [2, '\_l\+', ['a', 'A', 'b', 'B'], ['XXAXXBXX']])
731 " Any non-lowercase character or end-of-line
732 call add(tl, [2, '\_L\+', ['a', 'A', 'b', 'B'], ['aXXbXX']])
733 " Any uppercase character or end-of-line
734 call add(tl, [2, '\_u\+', ['a', 'A', 'b', 'B'], ['aXXbXX']])
735 " Any non-uppercase character or end-of-line
736 call add(tl, [2, '\_U\+', ['a', 'A', 'b', 'B'], ['XXAXXBXX']])
737 " Collection or end-of-line
738 call add(tl, [2, '\_[a-z]\+', ['a', 'A', 'b', 'B'], ['XXAXXBXX']])
739 " start of line anywhere in the text
740 call add(tl, [2, 'one\zs\_s*\_^\zetwo',
741 \ ['', 'one', ' two', 'one', '', 'two'],
742 \ ['', 'one', ' two', 'oneXXtwo']])
743 " end of line anywhere in the text
744 call add(tl, [2, 'one\zs\_$\_s*two',
745 \ ['', 'one', ' two', 'one', '', 'two'], ['', 'oneXX', 'oneXX']])
746
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100747 " Check that \_[0-9] matching EOL does not break a following \>
748 call add(tl, [2, '\<\(\(25\_[0-5]\|2\_[0-4]\_[0-9]\|\_[01]\?\_[0-9]\_[0-9]\?\)\.\)\{3\}\(25\_[0-5]\|2\_[0-4]\_[0-9]\|\_[01]\?\_[0-9]\_[0-9]\?\)\>', ['', 'localnet/192.168.0.1', ''], ['', 'localnet/XX', '']])
749
750 " Check a pattern with a line break and ^ and $
751 call add(tl, [2, 'a\n^b$\n^c', ['a', 'b', 'c'], ['XX']])
752
753 call add(tl, [2, '\(^.\+\n\)\1', [' dog', ' dog', 'asdf'], ['XXasdf']])
754
755 " Run the multi-line tests
756 for t in tl
757 let re = t[0]
758 let pat = t[1]
759 let before = t[2]
760 let after = t[3]
761 for engine in [0, 1, 2]
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200762 if engine == 2 && re == 0 || engine == 1 && re == 1
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100763 continue
764 endif
765 let &regexpengine = engine
766 new
767 call setline(1, before)
768 exe '%s/' . pat . '/XX/'
769 let result = getline(1, '$')
770 q!
771 if result != after
772 call assert_report('Error: pat: \"' . pat . '\", text: \"'
773 \ . string(before) . '\", expected: \"' . string(after)
774 \ . '\", got: \"' . string(result) . '\"')
775 else
776 " Test passed
777 endif
778 endfor
779 endfor
780 unlet t tl
781endfunc
782
783" Check that using a pattern on two lines doesn't get messed up by using
784" matchstr() with \ze in between.
785func Test_matchstr_with_ze()
786 new
787 call append(0, ['Substitute here:', '<T="">Ta 5</Title>',
788 \ '<T="">Ac 7</Title>'])
789 call cursor(1, 1)
790 set re=0
791
792 .+1,.+2s/""/\='"' . matchstr(getline("."), '\d\+\ze<') . '"'
793 call assert_equal(['Substitute here:', '<T="5">Ta 5</Title>',
794 \ '<T="7">Ac 7</Title>', ''], getline(1, '$'))
795
796 bwipe!
797endfunc
798
799" Check a pattern with a look beind crossing a line boundary
800func Test_lookbehind_across_line()
801 new
802 call append(0, ['Behind:', 'asdfasd<yyy', 'xxstart1', 'asdfasd<yy',
803 \ 'xxxstart2', 'asdfasd<yy', 'xxstart3'])
804 call cursor(1, 1)
805 call search('\(<\_[xy]\+\)\@3<=start')
806 call assert_equal([0, 7, 3, 0], getpos('.'))
807 bwipe!
808endfunc
809
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200810" Test for the \%V atom (match inside the visual area)
811func Regex_Match_Visual_Area()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100812 call append(0, ['Visual:', 'thexe the thexethe', 'andaxand andaxand',
813 \ 'oooxofor foroxooo', 'oooxofor foroxooo'])
814 call cursor(1, 1)
815 exe "normal jfxvfx:s/\\%Ve/E/g\<CR>"
816 exe "normal jV:s/\\%Va/A/g\<CR>"
817 exe "normal jfx\<C-V>fxj:s/\\%Vo/O/g\<CR>"
818 call assert_equal(['Visual:', 'thexE thE thExethe', 'AndAxAnd AndAxAnd',
819 \ 'oooxOfOr fOrOxooo', 'oooxOfOr fOrOxooo', ''], getline(1, '$'))
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200820 %d
821endfunc
822
823" Check matching Visual area
824func Test_matching_visual_area()
825 new
826 set regexpengine=1
827 call Regex_Match_Visual_Area()
828 set regexpengine=2
829 call Regex_Match_Visual_Area()
830 set regexpengine&
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100831 bwipe!
832endfunc
833
834" Check matching marks
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200835func Regex_Mark()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100836 call append(0, ['', '', '', 'Marks:', 'asdfSasdfsadfEasdf', 'asdfSas',
837 \ 'dfsadfEasdf', '', '', '', '', ''])
838 call cursor(4, 1)
839 exe "normal jfSmsfEme:.-4,.+6s/.\\%>'s.*\\%<'e../here/\<CR>"
840 exe "normal jfSmsj0fEme:.-4,.+6s/.\\%>'s\\_.*\\%<'e../again/\<CR>"
841 call assert_equal(['', '', '', 'Marks:', 'asdfhereasdf', 'asdfagainasdf',
842 \ '', '', '', '', '', ''], getline(1, '$'))
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200843 %d
844endfunc
845
846func Test_matching_marks()
847 new
848 set regexpengine=1
849 call Regex_Mark()
850 set regexpengine=2
851 call Regex_Mark()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100852 bwipe!
853endfunc
854
855" Check patterns matching cursor position.
856func s:curpos_test()
857 new
858 call setline(1, ['ffooooo', 'boboooo', 'zoooooo', 'koooooo', 'moooooo',
859 \ "\t\t\tfoo", 'abababababababfoo', 'bababababababafoo', '********_',
860 \ ' xxxxxxxxxxxx xxxx xxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxx xxxxx xxxxxxx xx xxxx xxxxxxxx xxxx xxxxxxxxxxx xxx xxxxxxx xxxxxxxxx xx xxxxxx xx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxx xxxxxxxx xxxxxxxxx xxxx xxx xxxx xxx xxx xxxxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxxxxxxx xx xxxxx xxx xxxxxxxx xxxxxx xxx xxx xxxxxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxxx xxxxxxxxxxxxxxxxxx xxxxxxx xxxxxxx xxx xxx xxxxxxxx xxxxxxx xxxx xxx xxxxxx xxxxx xxxxx xx xxxxxx xxxxxxx xxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxx xxxxxx xxxxx xxx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxx xxxx xx xxxxxxxx xxx xxxxxxxxxxx xxxxx'])
861 call setpos('.', [0, 1, 0, 0])
862 s/\%>3c.//g
863 call setpos('.', [0, 2, 4, 0])
864 s/\%#.*$//g
865 call setpos('.', [0, 3, 0, 0])
866 s/\%<3c./_/g
867 %s/\%4l\%>5c./_/g
868 %s/\%6l\%>25v./_/g
869 %s/\%>6l\%3c./!/g
870 %s/\%>7l\%12c./?/g
871 %s/\%>7l\%<9l\%>5v\%<8v./#/g
872 $s/\%(|\u.*\)\@<=[^|\t]\+$//ge
873 call assert_equal(['ffo', 'bob', '__ooooo', 'koooo__', 'moooooo',
874 \ ' f__', 'ab!babababababfoo',
875 \ 'ba!ab##abab?bafoo', '**!*****_',
876 \ ' ! xxx?xxxxxxxx xxxx xxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxx xxxxx xxxxxxx xx xxxx xxxxxxxx xxxx xxxxxxxxxxx xxx xxxxxxx xxxxxxxxx xx xxxxxx xx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxx xxxxxxxx xxxxxxxxx xxxx xxx xxxx xxx xxx xxxxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxxxxxxx xx xxxxx xxx xxxxxxxx xxxxxx xxx xxx xxxxxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxxx xxxxxxxxxxxxxxxxxx xxxxxxx xxxxxxx xxx xxx xxxxxxxx xxxxxxx xxxx xxx xxxxxx xxxxx xxxxx xx xxxxxx xxxxxxx xxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxx xxxxxx xxxxx xxx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxx xxxx xx xxxxxxxx xxx xxxxxxxxxxx xxxxx'],
877 \ getline(1, '$'))
878 bwipe!
879endfunc
880
881func Test_matching_curpos()
882 set re=0
883 call s:curpos_test()
884 set re=1
885 call s:curpos_test()
886 set re=2
887 call s:curpos_test()
888 set re&
889endfunc
890
891" Test for matching the start and end of a buffer
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200892func Regex_start_end_buffer()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100893 call setline(1, repeat(['vim edit'], 20))
894 /\%^
895 call assert_equal([0, 1, 1, 0], getpos('.'))
896 exe "normal 50%/\\%^..\<CR>"
897 call assert_equal([0, 1, 1, 0], getpos('.'))
898 exe "normal 50%/\\%$\<CR>"
899 call assert_equal([0, 20, 8, 0], getpos('.'))
900 exe "normal 6gg/..\\%$\<CR>"
901 call assert_equal([0, 20, 7, 0], getpos('.'))
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200902 %d
903endfunc
904
905func Test_start_end_of_buffer_match()
906 new
907 set regexpengine=1
908 call Regex_start_end_buffer()
909 set regexpengine=2
910 call Regex_start_end_buffer()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100911 bwipe!
912endfunc
913
Bram Moolenaara7a691c2020-12-09 16:36:04 +0100914func Test_ze_before_zs()
915 call assert_equal('', matchstr(' ', '\%#=1\ze \zs'))
916 call assert_equal('', matchstr(' ', '\%#=2\ze \zs'))
917 call assert_equal(repeat([''], 10), matchlist(' ', '\%#=1\ze \zs'))
918 call assert_equal(repeat([''], 10), matchlist(' ', '\%#=2\ze \zs'))
919endfunc
920
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100921" Check for detecting error
922func Test_regexp_error()
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200923 call assert_fails("call matchlist('x x', '\\%#=1 \\zs*')", 'E888:')
924 call assert_fails("call matchlist('x x', '\\%#=1 \\ze*')", 'E888:')
925 call assert_fails("call matchlist('x x', '\\%#=2 \\zs*')", 'E888:')
926 call assert_fails("call matchlist('x x', '\\%#=2 \\ze*')", 'E888:')
927 call assert_fails('exe "normal /\\%#=1\\%[x\\%[x]]\<CR>"', 'E369:')
Bram Moolenaar004a6782020-04-11 17:09:31 +0200928 call assert_fails("call matchstr('abcd', '\\%o841\\%o142')", 'E678:')
929 call assert_equal('', matchstr('abcd', '\%o181\%o142'))
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200930endfunc
931
932" Test for using the last substitute string pattern (~)
933func Test_regexp_last_subst_string()
934 new
935 s/bar/baz/e
936 call assert_equal(matchstr("foo\nbaz\nbar", "\\%#=1\~"), "baz")
937 call assert_equal(matchstr("foo\nbaz\nbar", "\\%#=2\~"), "baz")
938 close!
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100939endfunc
940
941" vim: shiftwidth=2 sts=2 expandtab