blob: bc705441e7a1a49eb034e7f14262ddacee144048 [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in utf8 encoding
Bram Moolenaar22e42152016-04-03 14:02:02 +02002
Bram Moolenaarf5094052022-07-29 16:22:25 +01003source shared.vim
4
Bram Moolenaar22e42152016-04-03 14:02:02 +02005func s:equivalence_test()
Bram Moolenaar0b94e292021-04-05 13:59:53 +02006 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňʼnǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ"
Bram Moolenaar22e42152016-04-03 14:02:02 +02007 let groups = split(str)
8 for group1 in groups
9 for c in split(group1, '\zs')
10 " next statement confirms that equivalence class matches every
11 " character in group
Bram Moolenaar0b94e292021-04-05 13:59:53 +020012 call assert_match('^[[=' .. c .. '=]]*$', group1)
Bram Moolenaar22e42152016-04-03 14:02:02 +020013 for group2 in groups
14 if group2 != group1
15 " next statement converts that equivalence class doesn't match
16 " character in any other group
Bram Moolenaar0b94e292021-04-05 13:59:53 +020017 call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
Bram Moolenaar22e42152016-04-03 14:02:02 +020018 endif
19 endfor
20 endfor
21 endfor
22endfunc
23
24func Test_equivalence_re1()
25 set re=1
26 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020027 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020028endfunc
29
30func Test_equivalence_re2()
31 set re=2
32 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020033 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020034endfunc
Bram Moolenaaraf98a492016-04-24 14:40:12 +020035
36func s:classes_test()
Bram Moolenaar470adb82020-07-20 21:21:30 +020037 if has('win32')
38 set iskeyword=@,48-57,_,192-255
39 endif
Bram Moolenaar490465b2016-04-24 15:11:02 +020040 set isprint=@,161-255
Bram Moolenaaraf98a492016-04-24 14:40:12 +020041 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
42
Bram Moolenaaraf98a492016-04-24 14:40:12 +020043 let alnumchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020044 let alphachars = ''
45 let backspacechar = ''
46 let blankchars = ''
47 let cntrlchars = ''
48 let digitchars = ''
49 let escapechar = ''
50 let graphchars = ''
51 let lowerchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020052 let printchars = ''
53 let punctchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020054 let returnchar = ''
55 let spacechars = ''
56 let tabchar = ''
57 let upperchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020058 let xdigitchars = ''
Bram Moolenaar221cd9f2019-01-31 15:34:40 +010059 let identchars = ''
60 let identchars1 = ''
61 let kwordchars = ''
62 let kwordchars1 = ''
63 let fnamechars = ''
64 let fnamechars1 = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020065 let i = 1
66 while i <= 255
67 let c = nr2char(i)
68 if c =~ '[[:alpha:]]'
69 let alphachars .= c
70 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020071 if c =~ '[[:alnum:]]'
72 let alnumchars .= c
73 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020074 if c =~ '[[:backspace:]]'
75 let backspacechar .= c
76 endif
77 if c =~ '[[:blank:]]'
78 let blankchars .= c
79 endif
80 if c =~ '[[:cntrl:]]'
81 let cntrlchars .= c
82 endif
83 if c =~ '[[:digit:]]'
84 let digitchars .= c
85 endif
86 if c =~ '[[:escape:]]'
87 let escapechar .= c
88 endif
89 if c =~ '[[:graph:]]'
90 let graphchars .= c
91 endif
92 if c =~ '[[:lower:]]'
93 let lowerchars .= c
94 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020095 if c =~ '[[:print:]]'
96 let printchars .= c
97 endif
98 if c =~ '[[:punct:]]'
99 let punctchars .= c
100 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200101 if c =~ '[[:return:]]'
102 let returnchar .= c
103 endif
104 if c =~ '[[:space:]]'
105 let spacechars .= c
106 endif
107 if c =~ '[[:tab:]]'
108 let tabchar .= c
109 endif
110 if c =~ '[[:upper:]]'
111 let upperchars .= c
112 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200113 if c =~ '[[:xdigit:]]'
114 let xdigitchars .= c
115 endif
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100116 if c =~ '[[:ident:]]'
117 let identchars .= c
118 endif
119 if c =~ '\i'
120 let identchars1 .= c
121 endif
122 if c =~ '[[:keyword:]]'
123 let kwordchars .= c
124 endif
125 if c =~ '\k'
126 let kwordchars1 .= c
127 endif
128 if c =~ '[[:fname:]]'
129 let fnamechars .= c
130 endif
131 if c =~ '\f'
132 let fnamechars1 .= c
133 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200134 let i += 1
135 endwhile
136
137 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200138 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200139 call assert_equal("\b", backspacechar)
140 call assert_equal("\t ", blankchars)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200141 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200142 call assert_equal("0123456789", digitchars)
143 call assert_equal("\<Esc>", escapechar)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200144 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200145 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200146 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
147 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200148 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
149 call assert_equal("\r", returnchar)
150 call assert_equal("\t\n\x0b\f\r ", spacechars)
151 call assert_equal("\t", tabchar)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200152 call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100153
154 if has('win32')
155 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
156 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100157 else
158 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
159 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
160 endif
161
162 if has('win32')
163 let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
164 elseif has('amiga')
165 let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
166 elseif has('vms')
167 let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100168 else
169 let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
170 endif
171
172 call assert_equal(identchars_ok, identchars)
173 call assert_equal(kwordchars_ok, kwordchars)
174 call assert_equal(fnamechars_ok, fnamechars)
175
176 call assert_equal(identchars1, identchars)
177 call assert_equal(kwordchars1, kwordchars)
178 call assert_equal(fnamechars1, fnamechars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200179endfunc
180
181func Test_classes_re1()
182 set re=1
183 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200184 set re=0
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200185endfunc
186
187func Test_classes_re2()
188 set re=2
189 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200190 set re=0
191endfunc
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200192
193func Test_reversed_range()
194 for re in range(0, 2)
195 exe 'set re=' . re
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200196 call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200197 endfor
198 set re=0
199endfunc
200
201func Test_large_class()
202 set re=1
203 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
204 set re=2
205 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
206 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
207 set re=0
208endfunc
Bram Moolenaar2a5b5272019-07-20 18:56:06 +0200209
210func Test_optmatch_toolong()
211 set re=1
212 " Can only handle about 8000 characters.
213 let pat = '\\%[' .. repeat('x', 9000) .. ']'
214 call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
215 set re=0
216endfunc
217
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100218" Test for regexp patterns with multi-byte support, using utf-8.
219func Test_multibyte_chars()
220 " tl is a List of Lists with:
221 " 2: test auto/old/new 0: test auto/old 1: test auto/new
222 " regexp pattern
223 " text to test the pattern on
224 " expected match (optional)
225 " expected submatch 1 (optional)
226 " expected submatch 2 (optional)
227 " etc.
228 " When there is no match use only the first two items.
229 let tl = []
230
Martin Tournoij25f3a142022-10-08 19:26:41 +0100231 " Multi-byte character tests.
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100232 call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
233 call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes
234 call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
235 call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
236 call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
237
238 " this is not a normal "i" but 0xec
239 call add(tl, [2, '\p\+', 'ìa', 'ìa'])
240 call add(tl, [2, '\p*', 'aあ', 'aあ'])
241
242 " Test recognition of some character classes
243 call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
244 call add(tl, [2, '\f\+', '&*Ÿfname ', 'fname'])
245
246 " Test composing character matching
247 call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
248 call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
249 call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
250 call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
251 call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
252 call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
253 call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
254 call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
255 call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
256 call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
257 call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
258 call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
259 call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
260 call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
261 call add(tl, [2, "a", "ca\u0300t"])
262 call add(tl, [2, "ca", "ca\u0300t"])
263 call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
264 call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
265 call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
266 call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
267
268 " Test \Z
269 call add(tl, [2, 'ú\Z', 'x'])
270 call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
271 call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
272 call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
273 call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
274 call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
275 call add(tl, [2, "×§\u200d\u05b9x\\Z", "x×§\u200d\u05b9xy", "×§\u200d\u05b9x"])
276 call add(tl, [2, "×§\u200d\u05b9x\\Z", "x×§\u200dxy", "×§\u200dx"])
277 call add(tl, [2, "×§\u200dx\\Z", "x×§\u200d\u05b9xy", "×§\u200d\u05b9x"])
278 call add(tl, [2, "×§\u200dx\\Z", "x×§\u200dxy", "×§\u200dx"])
279 call add(tl, [2, "\u05b9\\Z", "xyz"])
280 call add(tl, [2, "\\Z\u05b9", "xyz"])
281 call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
282 call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
283 call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
284 call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
285
286 " Combining different tests and features
287 call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])
288
289 " Run the tests
290 for t in tl
291 let re = t[0]
292 let pat = t[1]
293 let text = t[2]
294 let matchidx = 3
295 for engine in [0, 1, 2]
296 if engine == 2 && re == 0 || engine == 1 && re == 1
297 continue
298 endif
299 let &regexpengine = engine
300 try
301 let l = matchlist(text, pat)
302 catch
303 call assert_report('Error ' . engine . ': pat: \"' . pat .
304 \ '\", text: \"' . text .
305 \ '\", caused an exception: \"' . v:exception . '\"')
306 endtry
307 " check the match itself
308 if len(l) == 0 && len(t) > matchidx
309 call assert_report('Error ' . engine . ': pat: \"' . pat .
310 \ '\", text: \"' . text .
311 \ '\", did not match, expected: \"' . t[matchidx] . '\"')
312 elseif len(l) > 0 && len(t) == matchidx
313 call assert_report('Error ' . engine . ': pat: \"' . pat .
314 \ '\", text: \"' . text . '\", match: \"' . l[0] .
315 \ '\", expected no match')
316 elseif len(t) > matchidx && l[0] != t[matchidx]
317 call assert_report('Error ' . engine . ': pat: \"' . pat .
318 \ '\", text: \"' . text . '\", match: \"' . l[0] .
319 \ '\", expected: \"' . t[matchidx] . '\"')
320 else
321 " Test passed
322 endif
323 if len(l) > 0
324 " check all the nine submatches
325 for i in range(1, 9)
326 if len(t) <= matchidx + i
327 let e = ''
328 else
329 let e = t[matchidx + i]
330 endif
331 if l[i] != e
332 call assert_report('Error ' . engine . ': pat: \"' . pat .
333 \ '\", text: \"' . text . '\", submatch ' . i .
334 \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
335 endif
336 endfor
337 unlet i
338 endif
339 endfor
340 endfor
341 set regexpengine&
342endfunc
343
344" check that 'ambiwidth' does not change the meaning of \p
zeertzjqa59e0312024-04-15 19:14:38 +0200345func Test_regexp_ambiwidth()
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100346 set regexpengine=1 ambiwidth=single
347 call assert_equal(0, match("\u00EC", '\p'))
348 set regexpengine=1 ambiwidth=double
349 call assert_equal(0, match("\u00EC", '\p'))
350 set regexpengine=2 ambiwidth=single
351 call assert_equal(0, match("\u00EC", '\p'))
352 set regexpengine=2 ambiwidth=double
353 call assert_equal(0, match("\u00EC", '\p'))
354 set regexpengine& ambiwidth&
355endfunc
356
Bram Moolenaar59de4172020-06-09 19:34:54 +0200357func Run_regexp_ignore_case()
358 call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
359
360 call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
361 call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
362 call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
363 call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
364 call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
365 call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
366endfunc
367
368func Test_regexp_ignore_case()
369 set regexpengine=1
370 call Run_regexp_ignore_case()
371 set regexpengine=2
372 call Run_regexp_ignore_case()
373 set regexpengine&
374endfunc
375
Bram Moolenaar7d40b8a2020-07-26 12:52:59 +0200376" Tests for regexp with multi-byte encoding and various magic settings
377func Run_regexp_multibyte_magic()
378 let text =<< trim END
379 1 a aa abb abbccc
380 2 d dd dee deefff
381 3 g gg ghh ghhiii
382 4 j jj jkk jkklll
383 5 m mm mnn mnnooo
384 6 x ^aa$ x
385 7 (a)(b) abbaa
386 8 axx [ab]xx
387 9 หม่x อมx
388 a อมx หม่x
389 b ちカヨは
390 c x ¬€x
391 d 天使x
392 e ü’…™¸y
393 f ü’Š¯z
394 g a啷bb
395 j 0123❤x
396 k combinations
397 l äö üᾱ̆́
398 END
399
400 new
401 call setline(1, text)
402 exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
403 call assert_equal('1 a aa abb abbcc', getline('.'))
404 exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
405 call assert_equal('2 d dd dee deeff', getline('.'))
406 set nomagic
407 exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
408 call assert_equal('3 g gg ghh ghhii', getline('.'))
409 exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
410 call assert_equal('4 j jj jkk jkkll', getline('.'))
411 exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
412 call assert_equal('5 m mm mnn mnnoo', getline('.'))
413 exe 'normal /\V^aa$/' .. "\<CR>x"
414 call assert_equal('6 x aa$ x', getline('.'))
415 set magic
416 exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
417 call assert_equal('7 (a)(b) abba', getline('.'))
418 exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
419 call assert_equal('8 axx ab]xx', getline('.'))
420
421 " search for multi-byte without composing char
422 exe 'normal /ม' .. "\<CR>x"
423 call assert_equal('9 หม่x อx', getline('.'))
424
425 " search for multi-byte with composing char
426 exe 'normal /ม่' .. "\<CR>x"
427 call assert_equal('a อมx หx', getline('.'))
428
429 " find word by change of word class
430 exe 'normal /ち\<カヨ\>は' .. "\<CR>x"
431 call assert_equal('b カヨは', getline('.'))
432
433 " Test \%u, [\u] and friends
434 " c
435 exe 'normal /\%u20ac' .. "\<CR>x"
436 call assert_equal('c x ¬x', getline('.'))
437 " d
438 exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
439 call assert_equal('d 使x', getline('.'))
440 " e
441 exe 'normal /\%U12345678' .. "\<CR>x"
442 call assert_equal('e y', getline('.'))
443 " f
444 exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
445 call assert_equal('f z', getline('.'))
446 " g
447 exe 'normal /\%d21879b' .. "\<CR>x"
448 call assert_equal('g abb', getline('.'))
449
450 " j Test backwards search from a multi-byte char
451 exe "normal /x\<CR>x?.\<CR>x"
452 call assert_equal('j 012❤', getline('.'))
453 " k
454 let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
455 @w
456 call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18))
457
458 close!
459endfunc
460
461func Test_regexp_multibyte_magic()
462 set regexpengine=1
463 call Run_regexp_multibyte_magic()
464 set regexpengine=2
465 call Run_regexp_multibyte_magic()
466 set regexpengine&
467endfunc
468
469" Test for 7.3.192
470" command ":s/ \?/ /g" splits multi-byte characters into bytes
471func Test_split_multibyte_to_bytes()
472 new
473 call setline(1, 'l äö üᾱ̆́')
474 s/ \?/ /g
475 call assert_equal(' l ä ö ü ᾱ̆́', getline(1))
476 close!
477endfunc
478
479" Test for matchstr() with multibyte characters
480func Test_matchstr_multibyte()
481 new
482 call assert_equal('ב', matchstr("אבגד", ".", 0, 2))
483 call assert_equal('בג', matchstr("אבגד", "..", 0, 2))
484 call assert_equal('א', matchstr("אבגד", ".", 0, 0))
485 call assert_equal('ג', matchstr("אבגד", ".", 4, -1))
486 close!
487endfunc
488
489" Test for 7.4.636
490" A search with end offset gets stuck at end of file.
491func Test_search_with_end_offset()
492 new
493 call setline(1, ['', 'dog(a', 'cat('])
Bram Moolenaar8a9bc952020-10-02 18:48:07 +0200494 exe "normal /(/e+\<CR>"
495 normal n"ayn
Bram Moolenaar7d40b8a2020-07-26 12:52:59 +0200496 call assert_equal("a\ncat(", @a)
497 close!
498endfunc
499
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100500" Check that "^" matches even when the line starts with a combining char
501func Test_match_start_of_line_combining()
502 new
503 call setline(1, ['', "\u05ae", ''])
504 exe "normal gg/^\<CR>"
505 call assert_equal(2, getcurpos()[1])
506 bwipe!
507endfunc
508
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100509" Check that [[:upper:]] matches for automatic engine
510func Test_match_char_class_upper()
511 new
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100512
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100513 " Test 1: [[:upper:]]\{2,\}
514 set regexpengine=0
515 call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
516 call cursor(1,1)
517 let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
518 exe search_cmd
519 call assert_equal(4, searchcount().total, 'TEST 1')
520 set regexpengine=1
521 exe search_cmd
522 call assert_equal(2, searchcount().total, 'TEST 1')
523 set regexpengine=2
524 exe search_cmd
525 call assert_equal(4, searchcount().total, 'TEST 1')
526
527 " Test 2: [[:upper:]].\+
528 let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
529 set regexpengine=0
530 exe search_cmd
531 call assert_equal(2, searchcount().total, 'TEST 2')
532 set regexpengine=1
533 exe search_cmd
534 call assert_equal(1, searchcount().total, 'TEST 2')
535 set regexpengine=2
536 exe search_cmd
537 call assert_equal(2, searchcount().total, 'TEST 2')
538
539 " Test 3: [[:lower:]]\+
540 let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
541 set regexpengine=0
542 exe search_cmd
543 call assert_equal(4, searchcount().total, 'TEST 3 lower')
544 set regexpengine=1
545 exe search_cmd
546 call assert_equal(2, searchcount().total, 'TEST 3 lower')
547 set regexpengine=2
548 exe search_cmd
549 call assert_equal(4, searchcount().total, 'TEST 3 lower')
550
551 " clean up
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000552 set regexpengine=0
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100553 bwipe!
554endfunc
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100555
Bram Moolenaar65b60562021-09-07 19:26:53 +0200556func Test_match_invalid_byte()
Bram Moolenaardb77cb32022-10-05 21:45:30 +0100557 call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid', 'D')
Bram Moolenaar65b60562021-09-07 19:26:53 +0200558 new
559 source Xinvalid
560 bwipe!
Bram Moolenaar65b60562021-09-07 19:26:53 +0200561endfunc
562
Bram Moolenaarf5094052022-07-29 16:22:25 +0100563func Test_match_illegal_byte()
Bram Moolenaarcb36c2a2022-07-29 18:32:20 +0100564 " Text has illegal bytes which need to be set explicitly
565 let lines = ["norm :set no\x01\<CR>", "silent n\xff", "silent norm :b\xff\<CR>"]
Bram Moolenaardb77cb32022-10-05 21:45:30 +0100566 call writefile(lines, 'Xregexp', 'D')
Bram Moolenaarf5094052022-07-29 16:22:25 +0100567 call system(GetVimCommand() .. ' -X -Z -e -s -S Xregexp -c qa!')
Bram Moolenaarf5094052022-07-29 16:22:25 +0100568endfunc
569
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000570func Test_match_too_complicated()
571 set regexpengine=1
Bram Moolenaar2457b2b2022-02-22 16:19:37 +0000572 exe "noswapfile vsplit \xeb\xdb\x99"
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000573 silent! buf \&\zs*\zs*0
574 bwipe!
575 set regexpengine=0
576endfunc
577
Christian Brabandtd2cc51f2024-01-04 22:54:08 +0100578func Test_combining_chars_in_collection()
579 new
580 for i in range(0,2)
581 exe "set re=".i
582 put =['ɔ̃', 'ɔ', '̃ ã', 'abcd']
583 :%s/[ɔ̃]//
584 call assert_equal(['', '', 'ɔ', '̃ ã', 'abcd'], getline(1,'$'))
585 %d
586 endfor
587 bw!
588endfunc
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000589
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100590" vim: shiftwidth=2 sts=2 expandtab