blob: 166488b5384aa3fd8638b91bcdf3617e52933c9e [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in latin1 encoding
Bram Moolenaar6d91bcb2020-08-12 18:50:36 +02002
Bram Moolenaar22e42152016-04-03 14:02:02 +02003set encoding=latin1
4scriptencoding latin1
5
Bram Moolenaar5feabe02020-01-30 18:24:53 +01006source check.vim
7
Bram Moolenaar22e42152016-04-03 14:02:02 +02008func s:equivalence_test()
Dominique Pelle8bfa0eb2022-01-02 16:16:33 +00009 let str = 'AÀÁÂÃÄÅ B C D EÈÉÊË F G H IÌÍÎÏ J K L M NÑ OÒÓÔÕÖØ P Q R S T UÙÚÛÜ V W X YÝ Z '
10 \ .. 'aàáâãäå b c d eèéêë f g h iìíîï j k l m nñ oòóôõöø p q r s t uùúûü v w x yýÿ z '
11 \ .. "0 1 2 3 4 5 6 7 8 9 "
12 \ .. "` ~ ! ? ; : . , / \\ ' \" | < > [ ] { } ( ) @ # $ % ^ & * _ - + \b \e \f \n \r \t"
Bram Moolenaar22e42152016-04-03 14:02:02 +020013 let groups = split(str)
14 for group1 in groups
15 for c in split(group1, '\zs')
16 " next statement confirms that equivalence class matches every
17 " character in group
18 call assert_match('^[[=' . c . '=]]*$', group1)
19 for group2 in groups
20 if group2 != group1
21 " next statement converts that equivalence class doesn't match
22 " a character in any other group
23 call assert_equal(-1, match(group2, '[[=' . c . '=]]'))
24 endif
25 endfor
26 endfor
27 endfor
28endfunc
29
30func Test_equivalence_re1()
31 set re=1
32 call s:equivalence_test()
33endfunc
34
35func Test_equivalence_re2()
36 set re=2
37 call s:equivalence_test()
38endfunc
Bram Moolenaarf5a39442016-08-16 21:04:41 +020039
40func Test_recursive_substitute()
41 new
42 s/^/\=execute("s#^##gn")
43 " check we are now not in the sandbox
44 call setwinvar(1, 'myvar', 1)
45 bwipe!
46endfunc
Bram Moolenaard5638832016-09-09 17:59:50 +020047
48func Test_nested_backrefs()
49 " Check example in change.txt.
50 new
51 for re in range(0, 2)
52 exe 'set re=' . re
53 call setline(1, 'aa ab x')
54 1s/\(\(a[a-d] \)*\)\(x\)/-\1- -\2- -\3-/
55 call assert_equal('-aa ab - -ab - -x-', getline(1))
56
57 call assert_equal('-aa ab - -ab - -x-', substitute('aa ab x', '\(\(a[a-d] \)*\)\(x\)', '-\1- -\2- -\3-', ''))
58 endfor
59 bwipe!
60 set re=0
61endfunc
Bram Moolenaar16b35782016-09-09 20:29:50 +020062
63func Test_eow_with_optional()
64 let expected = ['abc def', 'abc', 'def', '', '', '', '', '', '', '']
65 for re in range(0, 2)
66 exe 'set re=' . re
67 let actual = matchlist('abc def', '\(abc\>\)\?\s*\(def\)')
68 call assert_equal(expected, actual)
69 endfor
70endfunc
Bram Moolenaar1ef9bbe2017-06-17 20:08:20 +020071
72func Test_backref()
73 new
74 call setline(1, ['one', 'two', 'three', 'four', 'five'])
75 call assert_equal(3, search('\%#=1\(e\)\1'))
76 call assert_equal(3, search('\%#=2\(e\)\1'))
77 call assert_fails('call search("\\%#=1\\(e\\1\\)")', 'E65:')
78 call assert_fails('call search("\\%#=2\\(e\\1\\)")', 'E65:')
79 bwipe!
80endfunc
Bram Moolenaar6057ed42019-01-14 23:19:29 +010081
82func Test_multi_failure()
83 set re=1
84 call assert_fails('/a**', 'E61:')
85 call assert_fails('/a*\+', 'E62:')
86 call assert_fails('/a\{a}', 'E554:')
87 set re=2
88 call assert_fails('/a**', 'E871:')
89 call assert_fails('/a*\+', 'E871:')
Bram Moolenaar9b7bf9e2020-07-11 22:14:59 +020090 call assert_fails('/a\{a}', 'E554:')
Bram Moolenaar6057ed42019-01-14 23:19:29 +010091 set re=0
92endfunc
Bram Moolenaar5567ad42019-02-12 23:05:46 +010093
94func Test_recursive_addstate()
95 " This will call addstate() recursively until it runs into the limit.
96 let lnum = search('\v((){328}){389}')
97 call assert_equal(0, lnum)
98endfunc
Bram Moolenaar15bbd6e2019-02-13 20:31:50 +010099
100func Test_out_of_memory()
101 new
102 s/^/,n
103 " This will be slow...
104 call assert_fails('call search("\\v((n||<)+);")', 'E363:')
105endfunc
Bram Moolenaar985079c2019-02-16 17:07:47 +0100106
107func Test_get_equi_class()
108 new
109 " Incomplete equivalence class caused invalid memory access
110 s/^/[[=
111 call assert_equal(1, search(getline(1)))
Bram Moolenaarf1b57ab2019-02-17 13:53:34 +0100112 s/.*/[[.
113 call assert_equal(1, search(getline(1)))
Bram Moolenaar985079c2019-02-16 17:07:47 +0100114endfunc
Bram Moolenaar8bfd9462019-02-16 18:07:57 +0100115
116func Test_rex_init()
117 set noincsearch
118 set re=1
119 new
120 setlocal iskeyword=a-z
121 call setline(1, ['abc', 'ABC'])
122 call assert_equal(1, search('[[:keyword:]]'))
123 new
124 setlocal iskeyword=A-Z
125 call setline(1, ['abc', 'ABC'])
126 call assert_equal(2, search('[[:keyword:]]'))
127 bwipe!
128 bwipe!
129 set re=0
130endfunc
Bram Moolenaara5483442019-02-17 20:17:02 +0100131
132func Test_range_with_newline()
133 new
134 call setline(1, "a")
135 call assert_equal(0, search("[ -*\\n- ]"))
136 call assert_equal(0, search("[ -*\\t-\\n]"))
137 bwipe!
138endfunc
Bram Moolenaar38f08e72019-02-20 22:04:32 +0100139
140func Test_pattern_compile_speed()
Bram Moolenaar5feabe02020-01-30 18:24:53 +0100141 CheckOption spellcapcheck
142 CheckFunction reltimefloat
143
Bram Moolenaar38f08e72019-02-20 22:04:32 +0100144 let start = reltime()
145 " this used to be very slow, not it should be about a second
146 set spc=\\v(((((Nxxxxxxx&&xxxx){179})+)+)+){179}
147 call assert_inrange(0.01, 10.0, reltimefloat(reltime(start)))
148 set spc=
149endfunc
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100150
151" Tests for regexp patterns without multi-byte support.
152func Test_regexp_single_line_pat()
153 " tl is a List of Lists with:
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200154 " regexp engines to test
155 " 0 - test with 'regexpengine' values 0 and 1
156 " 1 - test with 'regexpengine' values 0 and 2
157 " 2 - test with 'regexpengine' values 0, 1 and 2
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100158 " regexp pattern
159 " text to test the pattern on
160 " expected match (optional)
161 " expected submatch 1 (optional)
162 " expected submatch 2 (optional)
163 " etc.
164 " When there is no match use only the first two items.
165 let tl = []
166
167 call add(tl, [2, 'ab', 'aab', 'ab'])
168 call add(tl, [2, 'b', 'abcdef', 'b'])
169 call add(tl, [2, 'bc*', 'abccccdef', 'bcccc'])
170 call add(tl, [2, 'bc\{-}', 'abccccdef', 'b'])
171 call add(tl, [2, 'bc\{-}\(d\)', 'abccccdef', 'bccccd', 'd'])
172 call add(tl, [2, 'bc*', 'abbdef', 'b'])
173 call add(tl, [2, 'c*', 'ccc', 'ccc'])
174 call add(tl, [2, 'bc*', 'abdef', 'b'])
175 call add(tl, [2, 'c*', 'abdef', ''])
176 call add(tl, [2, 'bc\+', 'abccccdef', 'bcccc'])
177 call add(tl, [2, 'bc\+', 'abdef']) " no match
Bram Moolenaar004a6782020-04-11 17:09:31 +0200178 " match escape character in a string
179 call add(tl, [2, '.\e.', "one\<Esc>two", "e\<Esc>t"])
180 " match backspace character in a string
181 call add(tl, [2, '.\b.', "one\<C-H>two", "e\<C-H>t"])
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200182 " match newline character in a string
183 call add(tl, [2, 'o\nb', "foo\nbar", "o\nb"])
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100184
185 " operator \|
186 call add(tl, [2, 'a\|ab', 'cabd', 'a']) " alternation is ordered
187
188 call add(tl, [2, 'c\?', 'ccb', 'c'])
189 call add(tl, [2, 'bc\?', 'abd', 'b'])
190 call add(tl, [2, 'bc\?', 'abccd', 'bc'])
191
192 call add(tl, [2, '\va{1}', 'ab', 'a'])
193
194 call add(tl, [2, '\va{2}', 'aa', 'aa'])
195 call add(tl, [2, '\va{2}', 'caad', 'aa'])
196 call add(tl, [2, '\va{2}', 'aba'])
197 call add(tl, [2, '\va{2}', 'ab'])
198 call add(tl, [2, '\va{2}', 'abaa', 'aa'])
199 call add(tl, [2, '\va{2}', 'aaa', 'aa'])
200
201 call add(tl, [2, '\vb{1}', 'abca', 'b'])
202 call add(tl, [2, '\vba{2}', 'abaa', 'baa'])
203 call add(tl, [2, '\vba{3}', 'aabaac'])
204
205 call add(tl, [2, '\v(ab){1}', 'ab', 'ab', 'ab'])
206 call add(tl, [2, '\v(ab){1}', 'dabc', 'ab', 'ab'])
207 call add(tl, [2, '\v(ab){1}', 'acb'])
208
209 call add(tl, [2, '\v(ab){0,2}', 'acb', "", ""])
210 call add(tl, [2, '\v(ab){0,2}', 'ab', 'ab', 'ab'])
211 call add(tl, [2, '\v(ab){1,2}', 'ab', 'ab', 'ab'])
212 call add(tl, [2, '\v(ab){1,2}', 'ababc', 'abab', 'ab'])
213 call add(tl, [2, '\v(ab){2,4}', 'ababcab', 'abab', 'ab'])
214 call add(tl, [2, '\v(ab){2,4}', 'abcababa', 'abab', 'ab'])
215
216 call add(tl, [2, '\v(ab){2}', 'abab', 'abab', 'ab'])
217 call add(tl, [2, '\v(ab){2}', 'cdababe', 'abab', 'ab'])
218 call add(tl, [2, '\v(ab){2}', 'abac'])
219 call add(tl, [2, '\v(ab){2}', 'abacabab', 'abab', 'ab'])
220 call add(tl, [2, '\v((ab){2}){2}', 'abababab', 'abababab', 'abab', 'ab'])
221 call add(tl, [2, '\v((ab){2}){2}', 'abacabababab', 'abababab', 'abab', 'ab'])
222
223 call add(tl, [2, '\v(a{1}){1}', 'a', 'a', 'a'])
224 call add(tl, [2, '\v(a{2}){1}', 'aa', 'aa', 'aa'])
225 call add(tl, [2, '\v(a{2}){1}', 'aaac', 'aa', 'aa'])
226 call add(tl, [2, '\v(a{2}){1}', 'daaac', 'aa', 'aa'])
227 call add(tl, [2, '\v(a{1}){2}', 'daaac', 'aa', 'a'])
228 call add(tl, [2, '\v(a{1}){2}', 'aaa', 'aa', 'a'])
229 call add(tl, [2, '\v(a{2})+', 'adaac', 'aa', 'aa'])
230 call add(tl, [2, '\v(a{2})+', 'aa', 'aa', 'aa'])
231 call add(tl, [2, '\v(a{2}){1}', 'aa', 'aa', 'aa'])
232 call add(tl, [2, '\v(a{1}){2}', 'aa', 'aa', 'a'])
233 call add(tl, [2, '\v(a{1}){1}', 'a', 'a', 'a'])
234 call add(tl, [2, '\v(a{2}){2}', 'aaaa', 'aaaa', 'aa'])
235 call add(tl, [2, '\v(a{2}){2}', 'aaabaaaa', 'aaaa', 'aa'])
236
237 call add(tl, [2, '\v(a+){2}', 'dadaac', 'aa', 'a'])
238 call add(tl, [2, '\v(a{3}){2}', 'aaaaaaa', 'aaaaaa', 'aaa'])
239
240 call add(tl, [2, '\v(a{1,2}){2}', 'daaac', 'aaa', 'a'])
241 call add(tl, [2, '\v(a{1,3}){2}', 'daaaac', 'aaaa', 'a'])
242 call add(tl, [2, '\v(a{1,3}){2}', 'daaaaac', 'aaaaa', 'aa'])
243 call add(tl, [2, '\v(a{1,3}){3}', 'daac'])
244 call add(tl, [2, '\v(a{1,2}){2}', 'dac'])
245 call add(tl, [2, '\v(a+)+', 'daac', 'aa', 'aa'])
246 call add(tl, [2, '\v(a+)+', 'aaa', 'aaa', 'aaa'])
247 call add(tl, [2, '\v(a+){1,2}', 'aaa', 'aaa', 'aaa'])
248 call add(tl, [2, '\v(a+)(a+)', 'aaa', 'aaa', 'aa', 'a'])
249 call add(tl, [2, '\v(a{3})+', 'daaaac', 'aaa', 'aaa'])
250 call add(tl, [2, '\v(a|b|c)+', 'aacb', 'aacb', 'b'])
251 call add(tl, [2, '\v(a|b|c){2}', 'abcb', 'ab', 'b'])
252 call add(tl, [2, '\v(abc){2}', 'abcabd', ])
253 call add(tl, [2, '\v(abc){2}', 'abdabcabc','abcabc', 'abc'])
254
255 call add(tl, [2, 'a*', 'cc', ''])
256 call add(tl, [2, '\v(a*)+', 'cc', ''])
257 call add(tl, [2, '\v((ab)+)+', 'ab', 'ab', 'ab', 'ab'])
258 call add(tl, [2, '\v(((ab)+)+)+', 'ab', 'ab', 'ab', 'ab', 'ab'])
259 call add(tl, [2, '\v(((ab)+)+)+', 'dababc', 'abab', 'abab', 'abab', 'ab'])
260 call add(tl, [2, '\v(a{0,2})+', 'cc', ''])
261 call add(tl, [2, '\v(a*)+', '', ''])
262 call add(tl, [2, '\v((a*)+)+', '', ''])
263 call add(tl, [2, '\v((ab)*)+', '', ''])
264 call add(tl, [2, '\va{1,3}', 'aab', 'aa'])
265 call add(tl, [2, '\va{2,3}', 'abaa', 'aa'])
266
267 call add(tl, [2, '\v((ab)+|c*)+', 'abcccaba', 'abcccab', '', 'ab'])
268 call add(tl, [2, '\v(a{2})|(b{3})', 'bbabbbb', 'bbb', '', 'bbb'])
269 call add(tl, [2, '\va{2}|b{2}', 'abab'])
270 call add(tl, [2, '\v(a)+|(c)+', 'bbacbaacbbb', 'a', 'a'])
271 call add(tl, [2, '\vab{2,3}c', 'aabbccccccccccccc', 'abbc'])
272 call add(tl, [2, '\vab{2,3}c', 'aabbbccccccccccccc', 'abbbc'])
273 call add(tl, [2, '\vab{2,3}cd{2,3}e', 'aabbbcddee', 'abbbcdde'])
274 call add(tl, [2, '\va(bc){2}d', 'aabcbfbc' ])
275 call add(tl, [2, '\va*a{2}', 'a', ])
276 call add(tl, [2, '\va*a{2}', 'aa', 'aa' ])
277 call add(tl, [2, '\va*a{2}', 'aaa', 'aaa' ])
278 call add(tl, [2, '\va*a{2}', 'bbbabcc', ])
279 call add(tl, [2, '\va*b*|a*c*', 'a', 'a'])
280 call add(tl, [2, '\va{1}b{1}|a{1}b{1}', ''])
281
282 " submatches
283 call add(tl, [2, '\v(a)', 'ab', 'a', 'a'])
284 call add(tl, [2, '\v(a)(b)', 'ab', 'ab', 'a', 'b'])
285 call add(tl, [2, '\v(ab)(b)(c)', 'abbc', 'abbc', 'ab', 'b', 'c'])
286 call add(tl, [2, '\v((a)(b))', 'ab', 'ab', 'ab', 'a', 'b'])
287 call add(tl, [2, '\v(a)|(b)', 'ab', 'a', 'a'])
288
289 call add(tl, [2, '\v(a*)+', 'aaaa', 'aaaa', ''])
290 call add(tl, [2, 'x', 'abcdef'])
291
292 "
293 " Simple tests
294 "
295
296 " Search single groups
297 call add(tl, [2, 'ab', 'aab', 'ab'])
298 call add(tl, [2, 'ab', 'baced'])
299 call add(tl, [2, 'ab', ' ab ', 'ab'])
300
301 " Search multi-modifiers
302 call add(tl, [2, 'x*', 'xcd', 'x'])
303 call add(tl, [2, 'x*', 'xxxxxxxxxxxxxxxxsofijiojgf', 'xxxxxxxxxxxxxxxx'])
304 " empty match is good
305 call add(tl, [2, 'x*', 'abcdoij', ''])
306 " no match here
307 call add(tl, [2, 'x\+', 'abcdoin'])
308 call add(tl, [2, 'x\+', 'abcdeoijdfxxiuhfij', 'xx'])
309 call add(tl, [2, 'x\+', 'xxxxx', 'xxxxx'])
310 call add(tl, [2, 'x\+', 'abc x siufhiush xxxxxxxxx', 'x'])
311 call add(tl, [2, 'x\=', 'x sdfoij', 'x'])
312 call add(tl, [2, 'x\=', 'abc sfoij', '']) " empty match is good
313 call add(tl, [2, 'x\=', 'xxxxxxxxx c', 'x'])
314 call add(tl, [2, 'x\?', 'x sdfoij', 'x'])
315 " empty match is good
316 call add(tl, [2, 'x\?', 'abc sfoij', ''])
317 call add(tl, [2, 'x\?', 'xxxxxxxxxx c', 'x'])
318
319 call add(tl, [2, 'a\{0,0}', 'abcdfdoij', ''])
320 " same thing as 'a?'
321 call add(tl, [2, 'a\{0,1}', 'asiubid axxxaaa', 'a'])
322 " same thing as 'a\{0,1}'
323 call add(tl, [2, 'a\{1,0}', 'asiubid axxxaaa', 'a'])
324 call add(tl, [2, 'a\{3,6}', 'aa siofuh'])
325 call add(tl, [2, 'a\{3,6}', 'aaaaa asfoij afaa', 'aaaaa'])
326 call add(tl, [2, 'a\{3,6}', 'aaaaaaaa', 'aaaaaa'])
327 call add(tl, [2, 'a\{0}', 'asoiuj', ''])
328 call add(tl, [2, 'a\{2}', 'aaaa', 'aa'])
329 call add(tl, [2, 'a\{2}', 'iuash fiusahfliusah fiushfilushfi uhsaifuh askfj nasfvius afg aaaa sfiuhuhiushf', 'aa'])
330 call add(tl, [2, 'a\{2}', 'abcdefghijklmnopqrestuvwxyz1234567890'])
331 " same thing as 'a*'
332 call add(tl, [2, 'a\{0,}', 'oij sdigfusnf', ''])
333 call add(tl, [2, 'a\{0,}', 'aaaaa aa', 'aaaaa'])
334 call add(tl, [2, 'a\{2,}', 'sdfiougjdsafg'])
335 call add(tl, [2, 'a\{2,}', 'aaaaasfoij ', 'aaaaa'])
336 call add(tl, [2, 'a\{5,}', 'xxaaaaxxx '])
337 call add(tl, [2, 'a\{5,}', 'xxaaaaaxxx ', 'aaaaa'])
338 call add(tl, [2, 'a\{,0}', 'oidfguih iuhi hiu aaaa', ''])
339 call add(tl, [2, 'a\{,5}', 'abcd', 'a'])
340 call add(tl, [2, 'a\{,5}', 'aaaaaaaaaa', 'aaaaa'])
341 " leading star as normal char when \{} follows
342 call add(tl, [2, '^*\{4,}$', '***'])
343 call add(tl, [2, '^*\{4,}$', '****', '****'])
344 call add(tl, [2, '^*\{4,}$', '*****', '*****'])
345 " same thing as 'a*'
346 call add(tl, [2, 'a\{}', 'bbbcddiuhfcd', ''])
347 call add(tl, [2, 'a\{}', 'aaaaioudfh coisf jda', 'aaaa'])
348
349 call add(tl, [2, 'a\{-0,0}', 'abcdfdoij', ''])
350 " anti-greedy version of 'a?'
351 call add(tl, [2, 'a\{-0,1}', 'asiubid axxxaaa', ''])
352 call add(tl, [2, 'a\{-3,6}', 'aa siofuh'])
353 call add(tl, [2, 'a\{-3,6}', 'aaaaa asfoij afaa', 'aaa'])
354 call add(tl, [2, 'a\{-3,6}', 'aaaaaaaa', 'aaa'])
355 call add(tl, [2, 'a\{-0}', 'asoiuj', ''])
356 call add(tl, [2, 'a\{-2}', 'aaaa', 'aa'])
357 call add(tl, [2, 'a\{-2}', 'abcdefghijklmnopqrestuvwxyz1234567890'])
358 call add(tl, [2, 'a\{-0,}', 'oij sdigfusnf', ''])
359 call add(tl, [2, 'a\{-0,}', 'aaaaa aa', ''])
360 call add(tl, [2, 'a\{-2,}', 'sdfiougjdsafg'])
361 call add(tl, [2, 'a\{-2,}', 'aaaaasfoij ', 'aa'])
362 call add(tl, [2, 'a\{-,0}', 'oidfguih iuhi hiu aaaa', ''])
363 call add(tl, [2, 'a\{-,5}', 'abcd', ''])
364 call add(tl, [2, 'a\{-,5}', 'aaaaaaaaaa', ''])
365 " anti-greedy version of 'a*'
366 call add(tl, [2, 'a\{-}', 'bbbcddiuhfcd', ''])
367 call add(tl, [2, 'a\{-}', 'aaaaioudfh coisf jda', ''])
368
369 " Test groups of characters and submatches
370 call add(tl, [2, '\(abc\)*', 'abcabcabc', 'abcabcabc', 'abc'])
371 call add(tl, [2, '\(ab\)\+', 'abababaaaaa', 'ababab', 'ab'])
372 call add(tl, [2, '\(abaaaaa\)*cd', 'cd', 'cd', ''])
373 call add(tl, [2, '\(test1\)\? \(test2\)\?', 'test1 test3', 'test1 ', 'test1', ''])
374 call add(tl, [2, '\(test1\)\= \(test2\) \(test4443\)\=', ' test2 test4443 yupiiiiiiiiiii', ' test2 test4443', '', 'test2', 'test4443'])
375 call add(tl, [2, '\(\(sub1\) hello \(sub 2\)\)', 'asterix sub1 hello sub 2 obelix', 'sub1 hello sub 2', 'sub1 hello sub 2', 'sub1', 'sub 2'])
376 call add(tl, [2, '\(\(\(yyxxzz\)\)\)', 'abcdddsfiusfyyzzxxyyxxzz', 'yyxxzz', 'yyxxzz', 'yyxxzz', 'yyxxzz'])
377 call add(tl, [2, '\v((ab)+|c+)+', 'abcccaba', 'abcccab', 'ab', 'ab'])
378 call add(tl, [2, '\v((ab)|c*)+', 'abcccaba', 'abcccab', '', 'ab'])
379 call add(tl, [2, '\v(a(c*)+b)+', 'acbababaaa', 'acbabab', 'ab', ''])
380 call add(tl, [2, '\v(a|b*)+', 'aaaa', 'aaaa', ''])
381 call add(tl, [2, '\p*', 'aá ', 'aá '])
382
383 " Test greedy-ness and lazy-ness
384 call add(tl, [2, 'a\{-2,7}','aaaaaaaaaaaaa', 'aa'])
385 call add(tl, [2, 'a\{-2,7}x','aaaaaaaaax', 'aaaaaaax'])
386 call add(tl, [2, 'a\{2,7}','aaaaaaaaaaaaaaaaaaaa', 'aaaaaaa'])
387 call add(tl, [2, 'a\{2,7}x','aaaaaaaaax', 'aaaaaaax'])
388 call add(tl, [2, '\vx(.{-,8})yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz','ayxa','xayzxayz'])
389 call add(tl, [2, '\vx(.*)yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz', 'ayxayzxayzxa',''])
390 call add(tl, [2, '\v(a{1,2}){-2,3}','aaaaaaa','aaaa','aa'])
391 call add(tl, [2, '\v(a{-1,3})+', 'aa', 'aa', 'a'])
392 call add(tl, [2, '^\s\{-}\zs\( x\|x$\)', ' x', ' x', ' x'])
393 call add(tl, [2, '^\s\{-}\zs\(x\| x$\)', ' x', ' x', ' x'])
394 call add(tl, [2, '^\s\{-}\ze\(x\| x$\)', ' x', '', ' x'])
395 call add(tl, [2, '^\(\s\{-}\)\(x\| x$\)', ' x', ' x', '', ' x'])
396
397 " Test Character classes
398 call add(tl, [2, '\d\+e\d\d','test 10e23 fd','10e23'])
399
400 " Test collections and character range []
401 call add(tl, [2, '\v[a]', 'abcd', 'a'])
402 call add(tl, [2, 'a[bcd]', 'abcd', 'ab'])
403 call add(tl, [2, 'a[b-d]', 'acbd', 'ac'])
404 call add(tl, [2, '[a-d][e-f][x-x]d', 'cexdxx', 'cexd'])
405 call add(tl, [2, '\v[[:alpha:]]+', 'abcdefghijklmnopqrstuvwxyz6','abcdefghijklmnopqrstuvwxyz'])
406 call add(tl, [2, '[[:alpha:]\+]', '6x8','x'])
407 call add(tl, [2, '[^abc]\+','abcabcabc'])
408 call add(tl, [2, '[^abc]','defghiasijvoinasoiunbvb','d'])
409 call add(tl, [2, '[^abc]\+','ddddddda','ddddddd'])
410 call add(tl, [2, '[^a-d]\+','aaaAAAZIHFNCddd','AAAZIHFNC'])
411 call add(tl, [2, '[a-f]*','iiiiiiii',''])
412 call add(tl, [2, '[a-f]*','abcdefgh','abcdef'])
413 call add(tl, [2, '[^a-f]\+','abcdefgh','gh'])
414 call add(tl, [2, '[a-c]\{-3,6}','abcabc','abc'])
415 call add(tl, [2, '[^[:alpha:]]\+','abcccadfoij7787ysf287yrnccdu','7787'])
416 call add(tl, [2, '[-a]', '-', '-'])
417 call add(tl, [2, '[a-]', '-', '-'])
418 call add(tl, [2, '[a-f]*\c','ABCDEFGH','ABCDEF'])
419 call add(tl, [2, '[abc][xyz]\c','-af-AF-BY--','BY'])
420 " filename regexp
421 call add(tl, [2, '[-./[:alnum:]_~]\+', 'log13.file', 'log13.file'])
422 " special chars
423 call add(tl, [2, '[\]\^\-\\]\+', '\^\\\-\---^', '\^\\\-\---^'])
424 " collation elem
425 call add(tl, [2, '[[.a.]]\+', 'aa', 'aa'])
426 " middle of regexp
427 call add(tl, [2, 'abc[0-9]*ddd', 'siuhabc ii'])
428 call add(tl, [2, 'abc[0-9]*ddd', 'adf abc44482ddd oijs', 'abc44482ddd'])
429 call add(tl, [2, '\_[0-9]\+', 'asfi9888u', '9888'])
430 call add(tl, [2, '[0-9\n]\+', 'asfi9888u', '9888'])
431 call add(tl, [2, '\_[0-9]\+', "asfi\n9888u", "\n9888"])
432 call add(tl, [2, '\_f', " \na ", "\n"])
433 call add(tl, [2, '\_f\+', " \na ", "\na"])
434 call add(tl, [2, '[0-9A-Za-z-_.]\+', " @0_a.A-{ ", "0_a.A-"])
435
436 " Test start/end of line, start/end of file
437 call add(tl, [2, '^a.', "a_\nb ", "a_"])
438 call add(tl, [2, '^a.', "b a \na_"])
439 call add(tl, [2, '.a$', " a\n "])
440 call add(tl, [2, '.a$', " a b\n_a", "_a"])
441 call add(tl, [2, '\%^a.', "a a\na", "a "])
442 call add(tl, [2, '\%^a', " a \na "])
443 call add(tl, [2, '.a\%$', " a\n "])
444 call add(tl, [2, '.a\%$', " a\n_a", "_a"])
445
446 " Test recognition of character classes
447 call add(tl, [2, '[0-7]\+', 'x0123456789x', '01234567'])
448 call add(tl, [2, '[^0-7]\+', '0a;X+% 897', 'a;X+% 89'])
449 call add(tl, [2, '[0-9]\+', 'x0123456789x', '0123456789'])
450 call add(tl, [2, '[^0-9]\+', '0a;X+% 9', 'a;X+% '])
451 call add(tl, [2, '[0-9a-fA-F]\+', 'x0189abcdefg', '0189abcdef'])
452 call add(tl, [2, '[^0-9A-Fa-f]\+', '0189g;X+% ab', 'g;X+% '])
453 call add(tl, [2, '[a-z_A-Z0-9]\+', ';+aso_SfOij ', 'aso_SfOij'])
454 call add(tl, [2, '[^a-z_A-Z0-9]\+', 'aSo_;+% sfOij', ';+% '])
455 call add(tl, [2, '[a-z_A-Z]\+', '0abyz_ABYZ;', 'abyz_ABYZ'])
456 call add(tl, [2, '[^a-z_A-Z]\+', 'abAB_09;+% yzYZ', '09;+% '])
457 call add(tl, [2, '[a-z]\+', '0abcxyz1', 'abcxyz'])
458 call add(tl, [2, '[a-z]\+', 'AabxyzZ', 'abxyz'])
459 call add(tl, [2, '[^a-z]\+', 'a;X09+% x', ';X09+% '])
460 call add(tl, [2, '[^a-z]\+', 'abX0;%yz', 'X0;%'])
461 call add(tl, [2, '[a-zA-Z]\+', '0abABxzXZ9', 'abABxzXZ'])
462 call add(tl, [2, '[^a-zA-Z]\+', 'ab09_;+ XZ', '09_;+ '])
463 call add(tl, [2, '[A-Z]\+', 'aABXYZz', 'ABXYZ'])
464 call add(tl, [2, '[^A-Z]\+', 'ABx0;%YZ', 'x0;%'])
465 call add(tl, [2, '[a-z]\+\c', '0abxyzABXYZ;', 'abxyzABXYZ'])
466 call add(tl, [2, '[A-Z]\+\c', '0abABxzXZ9', 'abABxzXZ'])
467 call add(tl, [2, '\c[^a-z]\+', 'ab09_;+ XZ', '09_;+ '])
468 call add(tl, [2, '\c[^A-Z]\+', 'ab09_;+ XZ', '09_;+ '])
469 call add(tl, [2, '\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa'])
470
471 " Tests for \z features
472 " match ends at \ze
473 call add(tl, [2, 'xx \ze test', 'xx '])
474 call add(tl, [2, 'abc\zeend', 'oij abcend', 'abc'])
475 call add(tl, [2, 'aa\zebb\|aaxx', ' aabb ', 'aa'])
476 call add(tl, [2, 'aa\zebb\|aaxx', ' aaxx ', 'aaxx'])
477 call add(tl, [2, 'aabb\|aa\zebb', ' aabb ', 'aabb'])
478 call add(tl, [2, 'aa\zebb\|aaebb', ' aabb ', 'aa'])
479 " match starts at \zs
480 call add(tl, [2, 'abc\zsdd', 'ddabcddxyzt', 'dd'])
481 call add(tl, [2, 'aa \zsax', ' ax'])
482 call add(tl, [2, 'abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match'])
483 call add(tl, [2, '\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last'])
484 call add(tl, [2, '\>\zs.', 'aword. ', '.'])
485 call add(tl, [2, '\s\+\ze\[/\|\s\zs\s\+', 'is [a t', ' '])
486
487 " Tests for \@= and \& features
488 call add(tl, [2, 'abc\@=', 'abc', 'ab'])
489 call add(tl, [2, 'abc\@=cd', 'abcd', 'abcd'])
490 call add(tl, [2, 'abc\@=', 'ababc', 'ab'])
491 " will never match, no matter the input text
492 call add(tl, [2, 'abcd\@=e', 'abcd'])
493 " will never match
494 call add(tl, [2, 'abcd\@=e', 'any text in here ... '])
495 call add(tl, [2, '\v(abc)@=..', 'xabcd', 'ab', 'abc'])
496 call add(tl, [2, '\(.*John\)\@=.*Bob', 'here is John, and here is B'])
497 call add(tl, [2, '\(John.*\)\@=.*Bob', 'John is Bobs friend', 'John is Bob', 'John is Bobs friend'])
498 call add(tl, [2, '\<\S\+\())\)\@=', '$((i=i+1))', 'i=i+1', '))'])
499 call add(tl, [2, '.*John\&.*Bob', 'here is John, and here is B'])
500 call add(tl, [2, '.*John\&.*Bob', 'John is Bobs friend', 'John is Bob'])
501 call add(tl, [2, '\v(test1)@=.*yep', 'this is a test1, yep it is', 'test1, yep', 'test1'])
502 call add(tl, [2, 'foo\(bar\)\@!', 'foobar'])
503 call add(tl, [2, 'foo\(bar\)\@!', 'foo bar', 'foo'])
504 call add(tl, [2, 'if \(\(then\)\@!.\)*$', ' if then else'])
505 call add(tl, [2, 'if \(\(then\)\@!.\)*$', ' if else ', 'if else ', ' '])
506 call add(tl, [2, '\(foo\)\@!bar', 'foobar', 'bar'])
507 call add(tl, [2, '\(foo\)\@!...bar', 'foobar'])
508 call add(tl, [2, '^\%(.*bar\)\@!.*\zsfoo', ' bar foo '])
509 call add(tl, [2, '^\%(.*bar\)\@!.*\zsfoo', ' foo bar '])
510 call add(tl, [2, '^\%(.*bar\)\@!.*\zsfoo', ' foo xxx ', 'foo'])
511 call add(tl, [2, '[ ]\@!\p\%([ ]\@!\p\)*:', 'implicit mappings:', 'mappings:'])
512 call add(tl, [2, '[ ]\@!\p\([ ]\@!\p\)*:', 'implicit mappings:', 'mappings:', 's'])
513 call add(tl, [2, 'm\k\+_\@=\%(_\@!\k\)\@<=\k\+e', 'mx__xe', 'mx__xe'])
514 call add(tl, [2, '\%(\U\@<=S\k*\|S\l\)R', 'SuR', 'SuR'])
515
516 " Combining different tests and features
517 call add(tl, [2, '[[:alpha:]]\{-2,6}', '787abcdiuhsasiuhb4', 'ab'])
518 call add(tl, [2, '', 'abcd', ''])
519 call add(tl, [2, '\v(())', 'any possible text', ''])
520 call add(tl, [2, '\v%(ab(xyz)c)', ' abxyzc ', 'abxyzc', 'xyz'])
521 call add(tl, [2, '\v(test|)empty', 'tesempty', 'empty', ''])
522 call add(tl, [2, '\v(a|aa)(a|aa)', 'aaa', 'aa', 'a', 'a'])
523
524 " \%u and friends
525 call add(tl, [2, '\%d32', 'yes no', ' '])
526 call add(tl, [2, '\%o40', 'yes no', ' '])
527 call add(tl, [2, '\%x20', 'yes no', ' '])
528 call add(tl, [2, '\%u0020', 'yes no', ' '])
529 call add(tl, [2, '\%U00000020', 'yes no', ' '])
530 call add(tl, [2, '\%d0', "yes\x0ano", "\x0a"])
531
532 "" \%[abc]
533 call add(tl, [2, 'foo\%[bar]', 'fobar'])
534 call add(tl, [2, 'foo\%[bar]', 'foobar', 'foobar'])
535 call add(tl, [2, 'foo\%[bar]', 'fooxx', 'foo'])
536 call add(tl, [2, 'foo\%[bar]', 'foobxx', 'foob'])
537 call add(tl, [2, 'foo\%[bar]', 'foobaxx', 'fooba'])
538 call add(tl, [2, 'foo\%[bar]', 'foobarxx', 'foobar'])
539 call add(tl, [2, 'foo\%[bar]x', 'foobxx', 'foobx'])
540 call add(tl, [2, 'foo\%[bar]x', 'foobarxx', 'foobarx'])
541 call add(tl, [2, '\%[bar]x', 'barxx', 'barx'])
542 call add(tl, [2, '\%[bar]x', 'bxx', 'bx'])
543 call add(tl, [2, '\%[bar]x', 'xxx', 'x'])
544 call add(tl, [2, 'b\%[[ao]r]', 'bar bor', 'bar'])
545 call add(tl, [2, 'b\%[[]]r]', 'b]r bor', 'b]r'])
546 call add(tl, [2, '@\%[\w\-]*', '<http://john.net/pandoc/>[@pandoc]', '@pandoc'])
547
548 " Alternatives, must use first longest match
549 call add(tl, [2, 'goo\|go', 'google', 'goo'])
550 call add(tl, [2, '\<goo\|\<go', 'google', 'goo'])
551 call add(tl, [2, '\<goo\|go', 'google', 'goo'])
552
553 " Back references
554 call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc'])
555 call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
556 call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
557 call add(tl, [2, '\(\d*\)a \1b', ' a b ', 'a b', ''])
558 call add(tl, [2, '^.\(.\).\_..\1.', "aaa\naaa\nb", "aaa\naaa", 'a'])
559 call add(tl, [2, '^.*\.\(.*\)/.\+\(\1\)\@<!$', 'foo.bat/foo.com', 'foo.bat/foo.com', 'bat'])
560 call add(tl, [2, '^.*\.\(.*\)/.\+\(\1\)\@<!$', 'foo.bat/foo.bat'])
561 call add(tl, [2, '^.*\.\(.*\)/.\+\(\1\)\@<=$', 'foo.bat/foo.bat', 'foo.bat/foo.bat', 'bat', 'bat'])
562 call add(tl, [2, '\\\@<!\${\(\d\+\%(:.\{-}\)\?\\\@<!\)}', '2013-06-27${0}', '${0}', '0'])
563 call add(tl, [2, '^\(a*\)\1$', 'aaaaaaaa', 'aaaaaaaa', 'aaaa'])
564 call add(tl, [2, '^\(a\{-2,}\)\1\+$', 'aaaaaaaaa', 'aaaaaaaaa', 'aaa'])
565
566 " Look-behind with limit
567 call add(tl, [2, '<\@<=span.', 'xxspanxx<spanyyy', 'spany'])
568 call add(tl, [2, '<\@1<=span.', 'xxspanxx<spanyyy', 'spany'])
569 call add(tl, [2, '<\@2<=span.', 'xxspanxx<spanyyy', 'spany'])
570 call add(tl, [2, '\(<<\)\@<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
571 call add(tl, [2, '\(<<\)\@1<=span.', 'xxspanxxxx<spanxx<<spanyyy'])
572 call add(tl, [2, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
573 call add(tl, [2, '\(foo\)\@<!bar.', 'xx foobar1 xbar2 xx', 'bar2'])
574
575 " look-behind match in front of a zero-width item
576 call add(tl, [2, '\v\C%(<Last Changed:\s+)@<=.*$', '" test header'])
577 call add(tl, [2, '\v\C%(<Last Changed:\s+)@<=.*$', '" Last Changed: 1970', '1970'])
578 call add(tl, [2, '\(foo\)\@<=\>', 'foobar'])
579 call add(tl, [2, '\(foo\)\@<=\>', 'barfoo', '', 'foo'])
580 call add(tl, [2, '\(foo\)\@<=.*', 'foobar', 'bar', 'foo'])
581
582 " complicated look-behind match
583 call add(tl, [2, '\(r\@<=\|\w\@<!\)\/', 'x = /word/;', '/'])
584 call add(tl, [2, '^[a-z]\+\ze \&\(asdf\)\@<!', 'foo bar', 'foo'])
585
586 "" \@>
587 call add(tl, [2, '\(a*\)\@>a', 'aaaa'])
588 call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa'])
589 call add(tl, [2, '^\(.\{-}b\)\@>.', ' abcbd', ' abc', ' ab'])
590 call add(tl, [2, '\(.\{-}\)\(\)\@>$', 'abc', 'abc', 'abc', ''])
591 " TODO: BT engine does not restore submatch after failure
592 call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa'])
593
594 " "\_" prepended negated collection matches EOL
595 call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
596 call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
597
598 " Requiring lots of states.
599 call add(tl, [2, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"])
600
601 " Skip adding state twice
602 call add(tl, [2, '^\%(\%(^\s*#\s*if\>\|#\s*if\)\)\(\%>1c.*$\)\@=', "#if FOO", "#if", ' FOO'])
603
604 " Test \%V atom
605 call add(tl, [2, '\%>70vGesamt', 'Jean-Michel Charlier & Victor Hubinon\Gesamtausgabe [Salleck] Buck Danny {Jean-Michel Charlier & Victor Hubinon}\Gesamtausgabe', 'Gesamt'])
606
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200607 " Test for ignoring case and matching repeated characters
608 call add(tl, [2, '\cb\+', 'aAbBbBcC', 'bBbB'])
609
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100610 " Run the tests
611 for t in tl
612 let re = t[0]
613 let pat = t[1]
614 let text = t[2]
615 let matchidx = 3
616 for engine in [0, 1, 2]
617 if engine == 2 && re == 0 || engine == 1 && re == 1
618 continue
619 endif
620 let &regexpengine = engine
621 try
622 let l = matchlist(text, pat)
623 catch
624 call assert_report('Error ' . engine . ': pat: \"' . pat
625 \ . '\", text: \"' . text . '\", caused an exception: \"'
626 \ . v:exception . '\"')
627 endtry
628 " check the match itself
629 if len(l) == 0 && len(t) > matchidx
630 call assert_report('Error ' . engine . ': pat: \"' . pat
631 \ . '\", text: \"' . text . '\", did not match, expected: \"'
632 \ . t[matchidx] . '\"')
633 elseif len(l) > 0 && len(t) == matchidx
634 call assert_report('Error ' . engine . ': pat: \"' . pat
635 \ . '\", text: \"' . text . '\", match: \"' . l[0]
636 \ . '\", expected no match')
637 elseif len(t) > matchidx && l[0] != t[matchidx]
638 call assert_report('Error ' . engine . ': pat: \"' . pat
639 \ . '\", text: \"' . text . '\", match: \"' . l[0]
640 \ . '\", expected: \"' . t[matchidx] . '\"')
641 else
642 " Test passed
643 endif
644
645 " check all the nine submatches
646 if len(l) > 0
647 for i in range(1, 9)
648 if len(t) <= matchidx + i
649 let e = ''
650 else
651 let e = t[matchidx + i]
652 endif
653 if l[i] != e
654 call assert_report('Error ' . engine . ': pat: \"' . pat
655 \ . '\", text: \"' . text . '\", submatch ' . i . ': \"'
656 \ . l[i] . '\", expected: \"' . e . '\"')
657 endif
658 endfor
659 unlet i
660 endif
661 endfor
662 endfor
663
664 unlet t tl e l
665endfunc
666
667" Tests for multi-line regexp patterns without multi-byte support.
668func Test_regexp_multiline_pat()
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200669 " tl is a List of Lists with:
670 " regexp engines to test
671 " 0 - test with 'regexpengine' values 0 and 1
672 " 1 - test with 'regexpengine' values 0 and 2
673 " 2 - test with 'regexpengine' values 0, 1 and 2
674 " regexp pattern
675 " List with text to test the pattern on
676 " List with the expected match
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100677 let tl = []
678
679 " back references
680 call add(tl, [2, '^.\(.\).\_..\1.', ['aaa', 'aaa', 'b'], ['XX', 'b']])
681 call add(tl, [2, '\v.*\/(.*)\n.*\/\1$', ['./Dir1/Dir2/zyxwvuts.txt', './Dir1/Dir2/abcdefgh.bat', '', './Dir1/Dir2/file1.txt', './OtherDir1/OtherDir2/file1.txt'], ['./Dir1/Dir2/zyxwvuts.txt', './Dir1/Dir2/abcdefgh.bat', '', 'XX']])
682
683 " line breaks
684 call add(tl, [2, '\S.*\nx', ['abc', 'def', 'ghi', 'xjk', 'lmn'], ['abc', 'def', 'XXjk', 'lmn']])
685
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200686 " Any single character or end-of-line
687 call add(tl, [2, '\_.\+', ['a', 'b', 'c'], ['XX']])
688 " Any identifier or end-of-line
689 call add(tl, [2, '\_i\+', ['a', 'b', ';', '2'], ['XX;XX']])
690 " Any identifier but excluding digits or end-of-line
691 call add(tl, [2, '\_I\+', ['a', 'b', ';', '2'], ['XX;XX2XX']])
692 " Any keyword or end-of-line
693 call add(tl, [2, '\_k\+', ['a', 'b', '=', '2'], ['XX=XX']])
694 " Any keyword but excluding digits or end-of-line
695 call add(tl, [2, '\_K\+', ['a', 'b', '=', '2'], ['XX=XX2XX']])
696 " Any filename character or end-of-line
697 call add(tl, [2, '\_f\+', ['a', 'b', '.', '5'], ['XX']])
698 " Any filename character but excluding digits or end-of-line
699 call add(tl, [2, '\_F\+', ['a', 'b', '.', '5'], ['XX5XX']])
700 " Any printable character or end-of-line
701 call add(tl, [2, '\_p\+', ['a', 'b', '=', '4'], ['XX']])
702 " Any printable character excluding digits or end-of-line
703 call add(tl, [2, '\_P\+', ['a', 'b', '=', '4'], ['XX4XX']])
704 " Any whitespace character or end-of-line
705 call add(tl, [2, '\_s\+', [' ', ' ', 'a', 'b'], ['XXaXXbXX']])
706 " Any non-whitespace character or end-of-line
707 call add(tl, [2, '\_S\+', [' ', ' ', 'a', 'b'], [' XX XX']])
708 " Any decimal digit or end-of-line
709 call add(tl, [2, '\_d\+', ['1', 'a', '2', 'b', '3'], ['XXaXXbXX']])
710 " Any non-decimal digit or end-of-line
711 call add(tl, [2, '\_D\+', ['1', 'a', '2', 'b', '3'], ['1XX2XX3XX']])
712 " Any hexadecimal digit or end-of-line
713 call add(tl, [2, '\_x\+', ['1', 'a', 'g', '9', '8'], ['XXgXX']])
714 " Any non-hexadecimal digit or end-of-line
715 call add(tl, [2, '\_X\+', ['1', 'a', 'g', '9', '8'], ['1XXaXX9XX8XX']])
716 " Any octal digit or end-of-line
717 call add(tl, [2, '\_o\+', ['0', '7', '8', '9', '0'], ['XX8XX9XX']])
718 " Any non-octal digit or end-of-line
719 call add(tl, [2, '\_O\+', ['0', '7', '8', '9', '0'], ['0XX7XX0XX']])
720 " Any word character or end-of-line
721 call add(tl, [2, '\_w\+', ['A', 'B', '=', 'C', 'D'], ['XX=XX']])
722 " Any non-word character or end-of-line
723 call add(tl, [2, '\_W\+', ['A', 'B', '=', 'C', 'D'], ['AXXBXXCXXDXX']])
724 " Any head-of-word character or end-of-line
725 call add(tl, [2, '\_h\+', ['a', '1', 'b', '2', 'c'], ['XX1XX2XX']])
726 " Any non-head-of-word character or end-of-line
727 call add(tl, [2, '\_H\+', ['a', '1', 'b', '2', 'c'], ['aXXbXXcXX']])
728 " Any alphabetic character or end-of-line
729 call add(tl, [2, '\_a\+', ['a', '1', 'b', '2', 'c'], ['XX1XX2XX']])
730 " Any non-alphabetic character or end-of-line
731 call add(tl, [2, '\_A\+', ['a', '1', 'b', '2', 'c'], ['aXXbXXcXX']])
732 " Any lowercase character or end-of-line
733 call add(tl, [2, '\_l\+', ['a', 'A', 'b', 'B'], ['XXAXXBXX']])
734 " Any non-lowercase character or end-of-line
735 call add(tl, [2, '\_L\+', ['a', 'A', 'b', 'B'], ['aXXbXX']])
736 " Any uppercase character or end-of-line
737 call add(tl, [2, '\_u\+', ['a', 'A', 'b', 'B'], ['aXXbXX']])
738 " Any non-uppercase character or end-of-line
739 call add(tl, [2, '\_U\+', ['a', 'A', 'b', 'B'], ['XXAXXBXX']])
740 " Collection or end-of-line
741 call add(tl, [2, '\_[a-z]\+', ['a', 'A', 'b', 'B'], ['XXAXXBXX']])
742 " start of line anywhere in the text
743 call add(tl, [2, 'one\zs\_s*\_^\zetwo',
744 \ ['', 'one', ' two', 'one', '', 'two'],
745 \ ['', 'one', ' two', 'oneXXtwo']])
746 " end of line anywhere in the text
747 call add(tl, [2, 'one\zs\_$\_s*two',
748 \ ['', 'one', ' two', 'one', '', 'two'], ['', 'oneXX', 'oneXX']])
749
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100750 " Check that \_[0-9] matching EOL does not break a following \>
751 call add(tl, [2, '\<\(\(25\_[0-5]\|2\_[0-4]\_[0-9]\|\_[01]\?\_[0-9]\_[0-9]\?\)\.\)\{3\}\(25\_[0-5]\|2\_[0-4]\_[0-9]\|\_[01]\?\_[0-9]\_[0-9]\?\)\>', ['', 'localnet/192.168.0.1', ''], ['', 'localnet/XX', '']])
752
753 " Check a pattern with a line break and ^ and $
754 call add(tl, [2, 'a\n^b$\n^c', ['a', 'b', 'c'], ['XX']])
755
756 call add(tl, [2, '\(^.\+\n\)\1', [' dog', ' dog', 'asdf'], ['XXasdf']])
757
758 " Run the multi-line tests
759 for t in tl
760 let re = t[0]
761 let pat = t[1]
762 let before = t[2]
763 let after = t[3]
764 for engine in [0, 1, 2]
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200765 if engine == 2 && re == 0 || engine == 1 && re == 1
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100766 continue
767 endif
768 let &regexpengine = engine
769 new
770 call setline(1, before)
771 exe '%s/' . pat . '/XX/'
772 let result = getline(1, '$')
773 q!
774 if result != after
775 call assert_report('Error: pat: \"' . pat . '\", text: \"'
776 \ . string(before) . '\", expected: \"' . string(after)
777 \ . '\", got: \"' . string(result) . '\"')
778 else
779 " Test passed
780 endif
781 endfor
782 endfor
783 unlet t tl
784endfunc
785
786" Check that using a pattern on two lines doesn't get messed up by using
787" matchstr() with \ze in between.
788func Test_matchstr_with_ze()
789 new
790 call append(0, ['Substitute here:', '<T="">Ta 5</Title>',
791 \ '<T="">Ac 7</Title>'])
792 call cursor(1, 1)
793 set re=0
794
795 .+1,.+2s/""/\='"' . matchstr(getline("."), '\d\+\ze<') . '"'
796 call assert_equal(['Substitute here:', '<T="5">Ta 5</Title>',
797 \ '<T="7">Ac 7</Title>', ''], getline(1, '$'))
798
799 bwipe!
800endfunc
801
802" Check a pattern with a look beind crossing a line boundary
803func Test_lookbehind_across_line()
804 new
805 call append(0, ['Behind:', 'asdfasd<yyy', 'xxstart1', 'asdfasd<yy',
806 \ 'xxxstart2', 'asdfasd<yy', 'xxstart3'])
807 call cursor(1, 1)
808 call search('\(<\_[xy]\+\)\@3<=start')
809 call assert_equal([0, 7, 3, 0], getpos('.'))
810 bwipe!
811endfunc
812
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200813" Test for the \%V atom (match inside the visual area)
814func Regex_Match_Visual_Area()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100815 call append(0, ['Visual:', 'thexe the thexethe', 'andaxand andaxand',
816 \ 'oooxofor foroxooo', 'oooxofor foroxooo'])
817 call cursor(1, 1)
818 exe "normal jfxvfx:s/\\%Ve/E/g\<CR>"
819 exe "normal jV:s/\\%Va/A/g\<CR>"
820 exe "normal jfx\<C-V>fxj:s/\\%Vo/O/g\<CR>"
821 call assert_equal(['Visual:', 'thexE thE thExethe', 'AndAxAnd AndAxAnd',
822 \ 'oooxOfOr fOrOxooo', 'oooxOfOr fOrOxooo', ''], getline(1, '$'))
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200823 %d
824endfunc
825
826" Check matching Visual area
827func Test_matching_visual_area()
828 new
829 set regexpengine=1
830 call Regex_Match_Visual_Area()
831 set regexpengine=2
832 call Regex_Match_Visual_Area()
833 set regexpengine&
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100834 bwipe!
835endfunc
836
837" Check matching marks
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200838func Regex_Mark()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100839 call append(0, ['', '', '', 'Marks:', 'asdfSasdfsadfEasdf', 'asdfSas',
840 \ 'dfsadfEasdf', '', '', '', '', ''])
841 call cursor(4, 1)
842 exe "normal jfSmsfEme:.-4,.+6s/.\\%>'s.*\\%<'e../here/\<CR>"
843 exe "normal jfSmsj0fEme:.-4,.+6s/.\\%>'s\\_.*\\%<'e../again/\<CR>"
844 call assert_equal(['', '', '', 'Marks:', 'asdfhereasdf', 'asdfagainasdf',
845 \ '', '', '', '', '', ''], getline(1, '$'))
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200846 %d
847endfunc
848
849func Test_matching_marks()
850 new
851 set regexpengine=1
852 call Regex_Mark()
853 set regexpengine=2
854 call Regex_Mark()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100855 bwipe!
856endfunc
857
858" Check patterns matching cursor position.
859func s:curpos_test()
860 new
861 call setline(1, ['ffooooo', 'boboooo', 'zoooooo', 'koooooo', 'moooooo',
862 \ "\t\t\tfoo", 'abababababababfoo', 'bababababababafoo', '********_',
863 \ ' xxxxxxxxxxxx xxxx xxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxx xxxxx xxxxxxx xx xxxx xxxxxxxx xxxx xxxxxxxxxxx xxx xxxxxxx xxxxxxxxx xx xxxxxx xx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxx xxxxxxxx xxxxxxxxx xxxx xxx xxxx xxx xxx xxxxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxxxxxxx xx xxxxx xxx xxxxxxxx xxxxxx xxx xxx xxxxxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxxx xxxxxxxxxxxxxxxxxx xxxxxxx xxxxxxx xxx xxx xxxxxxxx xxxxxxx xxxx xxx xxxxxx xxxxx xxxxx xx xxxxxx xxxxxxx xxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxx xxxxxx xxxxx xxx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxx xxxx xx xxxxxxxx xxx xxxxxxxxxxx xxxxx'])
864 call setpos('.', [0, 1, 0, 0])
865 s/\%>3c.//g
866 call setpos('.', [0, 2, 4, 0])
867 s/\%#.*$//g
868 call setpos('.', [0, 3, 0, 0])
869 s/\%<3c./_/g
870 %s/\%4l\%>5c./_/g
871 %s/\%6l\%>25v./_/g
872 %s/\%>6l\%3c./!/g
873 %s/\%>7l\%12c./?/g
874 %s/\%>7l\%<9l\%>5v\%<8v./#/g
875 $s/\%(|\u.*\)\@<=[^|\t]\+$//ge
876 call assert_equal(['ffo', 'bob', '__ooooo', 'koooo__', 'moooooo',
877 \ ' f__', 'ab!babababababfoo',
878 \ 'ba!ab##abab?bafoo', '**!*****_',
879 \ ' ! xxx?xxxxxxxx xxxx xxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxx xxxxx xxxxxxx xx xxxx xxxxxxxx xxxx xxxxxxxxxxx xxx xxxxxxx xxxxxxxxx xx xxxxxx xx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxx xxxxxxxx xxxxxxxxx xxxx xxx xxxx xxx xxx xxxxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxxxxxxx xx xxxxx xxx xxxxxxxx xxxxxx xxx xxx xxxxxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxxx xxxxxxxxxxxxxxxxxx xxxxxxx xxxxxxx xxx xxx xxxxxxxx xxxxxxx xxxx xxx xxxxxx xxxxx xxxxx xx xxxxxx xxxxxxx xxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxx xxxxxx xxxxx xxx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxx xxxx xx xxxxxxxx xxx xxxxxxxxxxx xxxxx'],
880 \ getline(1, '$'))
881 bwipe!
882endfunc
883
884func Test_matching_curpos()
885 set re=0
886 call s:curpos_test()
887 set re=1
888 call s:curpos_test()
889 set re=2
890 call s:curpos_test()
891 set re&
892endfunc
893
894" Test for matching the start and end of a buffer
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200895func Regex_start_end_buffer()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100896 call setline(1, repeat(['vim edit'], 20))
897 /\%^
898 call assert_equal([0, 1, 1, 0], getpos('.'))
899 exe "normal 50%/\\%^..\<CR>"
900 call assert_equal([0, 1, 1, 0], getpos('.'))
901 exe "normal 50%/\\%$\<CR>"
902 call assert_equal([0, 20, 8, 0], getpos('.'))
903 exe "normal 6gg/..\\%$\<CR>"
904 call assert_equal([0, 20, 7, 0], getpos('.'))
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200905 %d
906endfunc
907
908func Test_start_end_of_buffer_match()
909 new
910 set regexpengine=1
911 call Regex_start_end_buffer()
912 set regexpengine=2
913 call Regex_start_end_buffer()
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100914 bwipe!
915endfunc
916
Bram Moolenaara7a691c2020-12-09 16:36:04 +0100917func Test_ze_before_zs()
918 call assert_equal('', matchstr(' ', '\%#=1\ze \zs'))
919 call assert_equal('', matchstr(' ', '\%#=2\ze \zs'))
920 call assert_equal(repeat([''], 10), matchlist(' ', '\%#=1\ze \zs'))
921 call assert_equal(repeat([''], 10), matchlist(' ', '\%#=2\ze \zs'))
922endfunc
923
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100924" Check for detecting error
925func Test_regexp_error()
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200926 call assert_fails("call matchlist('x x', '\\%#=1 \\zs*')", 'E888:')
927 call assert_fails("call matchlist('x x', '\\%#=1 \\ze*')", 'E888:')
928 call assert_fails("call matchlist('x x', '\\%#=2 \\zs*')", 'E888:')
929 call assert_fails("call matchlist('x x', '\\%#=2 \\ze*')", 'E888:')
Bram Moolenaar004a6782020-04-11 17:09:31 +0200930 call assert_fails("call matchstr('abcd', '\\%o841\\%o142')", 'E678:')
Bram Moolenaara2b3e7d2021-03-26 17:24:34 +0100931 call assert_fails("call matchstr('abcd', '\\%#=2\\%2147483647c')", 'E951:')
932 call assert_fails("call matchstr('abcd', '\\%#=2\\%2147483647l')", 'E951:')
933 call assert_fails("call matchstr('abcd', '\\%#=2\\%2147483647v')", 'E951:')
934 call assert_fails('exe "normal /\\%#=1\\%[x\\%[x]]\<CR>"', 'E369:')
935 call assert_fails('exe "normal /\\%#=2\\%2147483647l\<CR>"', 'E951:')
936 call assert_fails('exe "normal /\\%#=2\\%2147483647c\<CR>"', 'E951:')
937 call assert_fails('exe "normal /\\%#=2\\%102261126v\<CR>"', 'E951:')
938 call assert_fails('exe "normal /\\%#=2\\%2147483646l\<CR>"', 'E486:')
939 call assert_fails('exe "normal /\\%#=2\\%2147483646c\<CR>"', 'E486:')
940 call assert_fails('exe "normal /\\%#=2\\%102261125v\<CR>"', 'E486:')
Bram Moolenaar004a6782020-04-11 17:09:31 +0200941 call assert_equal('', matchstr('abcd', '\%o181\%o142'))
Bram Moolenaar4d23c522020-04-09 18:42:11 +0200942endfunc
943
944" Test for using the last substitute string pattern (~)
945func Test_regexp_last_subst_string()
946 new
947 s/bar/baz/e
948 call assert_equal(matchstr("foo\nbaz\nbar", "\\%#=1\~"), "baz")
949 call assert_equal(matchstr("foo\nbaz\nbar", "\\%#=2\~"), "baz")
950 close!
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +0100951endfunc
952
Bram Moolenaar04db26b2021-07-05 20:15:23 +0200953" Check patterns matching cursor position.
954func s:curpos_test2()
955 new
956 call setline(1, ['1', '2 foobar eins zwei drei vier fünf sechse',
957 \ '3 foobar eins zwei drei vier fünf sechse',
958 \ '4 foobar eins zwei drei vier fünf sechse',
959 \ '5 foobar eins zwei drei vier fünf sechse',
960 \ '6 foobar eins zwei drei vier fünf sechse',
961 \ '7 foobar eins zwei drei vier fünf sechse'])
962 call setpos('.', [0, 2, 10, 0])
963 s/\%.c.*//g
964 call setpos('.', [0, 3, 15, 0])
965 s/\%.l.*//g
966 call setpos('.', [0, 5, 3, 0])
967 s/\%.v.*/_/g
968 call assert_equal(['1',
969 \ '2 foobar ',
970 \ '',
971 \ '4 foobar eins zwei drei vier fünf sechse',
972 \ '5 _',
973 \ '6 foobar eins zwei drei vier fünf sechse',
974 \ '7 foobar eins zwei drei vier fünf sechse'],
975 \ getline(1, '$'))
976 call assert_fails('call search("\\%.1l")', 'E1204:')
977 call assert_fails('call search("\\%.1c")', 'E1204:')
978 call assert_fails('call search("\\%.1v")', 'E1204:')
979 bwipe!
980endfunc
981
982" Check patterns matching before or after cursor position.
983func s:curpos_test3()
984 new
985 call setline(1, ['1', '2 foobar eins zwei drei vier fünf sechse',
986 \ '3 foobar eins zwei drei vier fünf sechse',
987 \ '4 foobar eins zwei drei vier fünf sechse',
988 \ '5 foobar eins zwei drei vier fünf sechse',
989 \ '6 foobar eins zwei drei vier fünf sechse',
990 \ '7 foobar eins zwei drei vier fünf sechse'])
991 call setpos('.', [0, 2, 10, 0])
992 " Note: This removes all columns, except for the column directly in front of
993 " the cursor. Bug????
994 :s/^.*\%<.c//
995 call setpos('.', [0, 3, 10, 0])
996 :s/\%>.c.*$//
997 call setpos('.', [0, 5, 4, 0])
998 " Note: This removes all columns, except for the column directly in front of
999 " the cursor. Bug????
1000 :s/^.*\%<.v/_/
1001 call setpos('.', [0, 6, 4, 0])
1002 :s/\%>.v.*$/_/
1003 call assert_equal(['1',
1004 \ ' eins zwei drei vier fünf sechse',
1005 \ '3 foobar e',
1006 \ '4 foobar eins zwei drei vier fünf sechse',
1007 \ '_foobar eins zwei drei vier fünf sechse',
1008 \ '6 fo_',
1009 \ '7 foobar eins zwei drei vier fünf sechse'],
1010 \ getline(1, '$'))
1011 sil %d
1012 call setline(1, ['1', '2 foobar eins zwei drei vier fünf sechse',
1013 \ '3 foobar eins zwei drei vier fünf sechse',
1014 \ '4 foobar eins zwei drei vier fünf sechse',
1015 \ '5 foobar eins zwei drei vier fünf sechse',
1016 \ '6 foobar eins zwei drei vier fünf sechse',
1017 \ '7 foobar eins zwei drei vier fünf sechse'])
1018 call setpos('.', [0, 4, 4, 0])
1019 %s/\%<.l.*//
1020 call setpos('.', [0, 5, 4, 0])
1021 %s/\%>.l.*//
1022 call assert_equal(['', '', '',
1023 \ '4 foobar eins zwei drei vier fünf sechse',
1024 \ '5 foobar eins zwei drei vier fünf sechse',
1025 \ '', ''],
1026 \ getline(1, '$'))
1027 bwipe!
1028endfunc
1029
1030" Test that matching below, at or after the
1031" cursor position work
1032func Test_matching_pos()
1033 for val in range(3)
1034 exe "set re=" .. val
1035 " Match at cursor position
1036 call s:curpos_test2()
1037 " Match before or after cursor position
1038 call s:curpos_test3()
1039 endfor
1040 set re&
1041endfunc
1042
Bram Moolenaar64066b92021-11-17 18:22:56 +00001043func Test_using_mark_position()
1044 " this was using freed memory
1045 new
1046 norm O0
1047 call assert_fails("s/\\%')", 'E486:')
1048 bwipe!
1049endfunc
1050
Bram Moolenaar4c13e5e2021-12-30 14:49:43 +00001051func Test_using_visual_position()
1052 " this was using freed memory
1053 new
1054 exe "norm 0o\<Esc>\<C-V>k\<C-X>o0"
1055 /\%V
1056 bwipe!
1057endfunc
1058
Bram Moolenaar94f31922021-12-30 15:29:18 +00001059func Test_using_invalid_visual_position()
1060 " this was going beyond the end of the line
1061 new
1062 exe "norm 0o000\<Esc>0\<C-V>$s0"
1063 /\%V
1064 bwipe!
1065endfunc
1066
Bram Moolenaarf9cb05c2019-12-15 13:39:22 +01001067" vim: shiftwidth=2 sts=2 expandtab